1//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the RegisterBankInfo class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64RegisterBankInfo.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64RegisterInfo.h"
17#include "AArch64Subtarget.h"
18#include "MCTargetDesc/AArch64AddressingModes.h"
19#include "MCTargetDesc/AArch64MCTargetDesc.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
24#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25#include "llvm/CodeGen/GlobalISel/Utils.h"
26#include "llvm/CodeGen/LowLevelTypeUtils.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/MachineSizeOpts.h"
32#include "llvm/CodeGen/RegisterBank.h"
33#include "llvm/CodeGen/RegisterBankInfo.h"
34#include "llvm/CodeGen/TargetOpcodes.h"
35#include "llvm/CodeGen/TargetRegisterInfo.h"
36#include "llvm/CodeGen/TargetSubtargetInfo.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/IntrinsicsAArch64.h"
39#include "llvm/Support/ErrorHandling.h"
40#include "llvm/Support/Threading.h"
41#include <cassert>
42
43#define GET_TARGET_REGBANK_IMPL
44#include "AArch64GenRegisterBank.inc"
45
46// This file will be TableGen'ed at some point.
47#include "AArch64GenRegisterBankInfo.def"
48
49using namespace llvm;
50static const unsigned CustomMappingID = 1;
51
52AArch64RegisterBankInfo::AArch64RegisterBankInfo(
53 const TargetRegisterInfo &TRI) {
54 static llvm::once_flag InitializeRegisterBankFlag;
55
56 static auto InitializeRegisterBankOnce = [&]() {
57 // We have only one set of register banks, whatever the subtarget
58 // is. Therefore, the initialization of the RegBanks table should be
59 // done only once. Indeed the table of all register banks
60 // (AArch64::RegBanks) is unique in the compiler. At some point, it
61 // will get tablegen'ed and the whole constructor becomes empty.
62
63 const RegisterBank &RBGPR = getRegBank(ID: AArch64::GPRRegBankID);
64 (void)RBGPR;
65 assert(&AArch64::GPRRegBank == &RBGPR &&
66 "The order in RegBanks is messed up");
67
68 const RegisterBank &RBFPR = getRegBank(ID: AArch64::FPRRegBankID);
69 (void)RBFPR;
70 assert(&AArch64::FPRRegBank == &RBFPR &&
71 "The order in RegBanks is messed up");
72
73 const RegisterBank &RBCCR = getRegBank(ID: AArch64::CCRegBankID);
74 (void)RBCCR;
75 assert(&AArch64::CCRegBank == &RBCCR &&
76 "The order in RegBanks is messed up");
77
78 // The GPR register bank is fully defined by all the registers in
79 // GR64all + its subclasses.
80 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
81 "Subclass not added?");
82 assert(getMaximumSize(RBGPR.getID()) == 128 &&
83 "GPRs should hold up to 128-bit");
84
85 // The FPR register bank is fully defined by all the registers in
86 // GR64all + its subclasses.
87 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
88 "Subclass not added?");
89 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
90 "Subclass not added?");
91 assert(getMaximumSize(RBFPR.getID()) == 512 &&
92 "FPRs should hold up to 512-bit via QQQQ sequence");
93
94 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
95 "Class not added?");
96 assert(getMaximumSize(RBCCR.getID()) == 32 &&
97 "CCR should hold up to 32-bit");
98
99 // Check that the TableGen'ed like file is in sync we our expectations.
100 // First, the Idx.
101 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
102 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
103 "PartialMappingIdx's are incorrectly ordered");
104 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
105 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
106 PMI_FPR256, PMI_FPR512}) &&
107 "PartialMappingIdx's are incorrectly ordered");
108// Now, the content.
109// Check partial mapping.
110#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
111 do { \
112 assert( \
113 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
114 #Idx " is incorrectly initialized"); \
115 } while (false)
116
117 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
118 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
119 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
120 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
121 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
122 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
123 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
124 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
125 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
126
127// Check value mapping.
128#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
129 do { \
130 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
131 PartialMappingIdx::PMI_First##RBName, Size, \
132 Offset) && \
133 #RBName #Size " " #Offset " is incorrectly initialized"); \
134 } while (false)
135
136#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
137
138 CHECK_VALUEMAP(GPR, 32);
139 CHECK_VALUEMAP(GPR, 64);
140 CHECK_VALUEMAP(GPR, 128);
141 CHECK_VALUEMAP(FPR, 16);
142 CHECK_VALUEMAP(FPR, 32);
143 CHECK_VALUEMAP(FPR, 64);
144 CHECK_VALUEMAP(FPR, 128);
145 CHECK_VALUEMAP(FPR, 256);
146 CHECK_VALUEMAP(FPR, 512);
147
148// Check the value mapping for 3-operands instructions where all the operands
149// map to the same value mapping.
150#define CHECK_VALUEMAP_3OPS(RBName, Size) \
151 do { \
152 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
153 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
154 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
155 } while (false)
156
157 CHECK_VALUEMAP_3OPS(GPR, 32);
158 CHECK_VALUEMAP_3OPS(GPR, 64);
159 CHECK_VALUEMAP_3OPS(GPR, 128);
160 CHECK_VALUEMAP_3OPS(FPR, 32);
161 CHECK_VALUEMAP_3OPS(FPR, 64);
162 CHECK_VALUEMAP_3OPS(FPR, 128);
163 CHECK_VALUEMAP_3OPS(FPR, 256);
164 CHECK_VALUEMAP_3OPS(FPR, 512);
165
166#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
167 do { \
168 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
169 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
170 (void)PartialMapDstIdx; \
171 (void)PartialMapSrcIdx; \
172 const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \
173 AArch64::RBNameSrc##RegBankID, \
174 TypeSize::getFixed(Size)); \
175 (void)Map; \
176 assert(Map[0].BreakDown == \
177 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
178 Map[0].NumBreakDowns == 1 && \
179 #RBNameDst #Size " Dst is incorrectly initialized"); \
180 assert(Map[1].BreakDown == \
181 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
182 Map[1].NumBreakDowns == 1 && \
183 #RBNameSrc #Size " Src is incorrectly initialized"); \
184 \
185 } while (false)
186
187 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
188 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
189 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
190 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
191 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
192 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
193 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
194 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
195
196#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
197 do { \
198 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
199 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
200 (void)PartialMapDstIdx; \
201 (void)PartialMapSrcIdx; \
202 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
203 (void)Map; \
204 assert(Map[0].BreakDown == \
205 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
206 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
207 " Dst is incorrectly initialized"); \
208 assert(Map[1].BreakDown == \
209 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
210 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
211 " Src is incorrectly initialized"); \
212 \
213 } while (false)
214
215 CHECK_VALUEMAP_FPEXT(32, 16);
216 CHECK_VALUEMAP_FPEXT(64, 16);
217 CHECK_VALUEMAP_FPEXT(64, 32);
218 CHECK_VALUEMAP_FPEXT(128, 64);
219
220 assert(verify(TRI) && "Invalid register bank information");
221 };
222
223 llvm::call_once(flag&: InitializeRegisterBankFlag, F&: InitializeRegisterBankOnce);
224}
225
226unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
227 const RegisterBank &B,
228 const TypeSize Size) const {
229 // What do we do with different size?
230 // copy are same size.
231 // Will introduce other hooks for different size:
232 // * extract cost.
233 // * build_sequence cost.
234
235 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
236 // FIXME: This should be deduced from the scheduling model.
237 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
238 // FMOVXDr or FMOVWSr.
239 return 5;
240 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
241 // FMOVDXr or FMOVSWr.
242 return 4;
243
244 return RegisterBankInfo::copyCost(A, B, Size);
245}
246
247const RegisterBank &
248AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
249 LLT Ty) const {
250 switch (RC.getID()) {
251 case AArch64::GPR64sponlyRegClassID:
252 return getRegBank(ID: AArch64::GPRRegBankID);
253 default:
254 return AArch64GenRegisterBankInfo::getRegBankFromRegClass(RC, Ty);
255 }
256}
257
258RegisterBankInfo::InstructionMappings
259AArch64RegisterBankInfo::getInstrAlternativeMappings(
260 const MachineInstr &MI) const {
261 const MachineFunction &MF = *MI.getParent()->getParent();
262 const TargetSubtargetInfo &STI = MF.getSubtarget();
263 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
264 const MachineRegisterInfo &MRI = MF.getRegInfo();
265
266 switch (MI.getOpcode()) {
267 case TargetOpcode::G_OR: {
268 // 32 and 64-bit or can be mapped on either FPR or
269 // GPR for the same cost.
270 TypeSize Size = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
271 if (Size != 32 && Size != 64)
272 break;
273
274 // If the instruction has any implicit-defs or uses,
275 // do not mess with it.
276 if (MI.getNumOperands() != 3)
277 break;
278 InstructionMappings AltMappings;
279 const InstructionMapping &GPRMapping = getInstructionMapping(
280 /*ID*/ 1, /*Cost*/ 1, OperandsMapping: getValueMapping(RBIdx: PMI_FirstGPR, Size),
281 /*NumOperands*/ 3);
282 const InstructionMapping &FPRMapping = getInstructionMapping(
283 /*ID*/ 2, /*Cost*/ 1, OperandsMapping: getValueMapping(RBIdx: PMI_FirstFPR, Size),
284 /*NumOperands*/ 3);
285
286 AltMappings.push_back(Elt: &GPRMapping);
287 AltMappings.push_back(Elt: &FPRMapping);
288 return AltMappings;
289 }
290 case TargetOpcode::G_BITCAST: {
291 TypeSize Size = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
292 if (Size != 32 && Size != 64)
293 break;
294
295 // If the instruction has any implicit-defs or uses,
296 // do not mess with it.
297 if (MI.getNumOperands() != 2)
298 break;
299
300 InstructionMappings AltMappings;
301 const InstructionMapping &GPRMapping = getInstructionMapping(
302 /*ID*/ 1, /*Cost*/ 1,
303 OperandsMapping: getCopyMapping(DstBankID: AArch64::GPRRegBankID, SrcBankID: AArch64::GPRRegBankID, Size),
304 /*NumOperands*/ 2);
305 const InstructionMapping &FPRMapping = getInstructionMapping(
306 /*ID*/ 2, /*Cost*/ 1,
307 OperandsMapping: getCopyMapping(DstBankID: AArch64::FPRRegBankID, SrcBankID: AArch64::FPRRegBankID, Size),
308 /*NumOperands*/ 2);
309 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
310 /*ID*/ 3,
311 /*Cost*/
312 copyCost(A: AArch64::GPRRegBank, B: AArch64::FPRRegBank,
313 Size: TypeSize::getFixed(ExactSize: Size)),
314 OperandsMapping: getCopyMapping(DstBankID: AArch64::FPRRegBankID, SrcBankID: AArch64::GPRRegBankID, Size),
315 /*NumOperands*/ 2);
316 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
317 /*ID*/ 3,
318 /*Cost*/
319 copyCost(A: AArch64::GPRRegBank, B: AArch64::FPRRegBank,
320 Size: TypeSize::getFixed(ExactSize: Size)),
321 OperandsMapping: getCopyMapping(DstBankID: AArch64::GPRRegBankID, SrcBankID: AArch64::FPRRegBankID, Size),
322 /*NumOperands*/ 2);
323
324 AltMappings.push_back(Elt: &GPRMapping);
325 AltMappings.push_back(Elt: &FPRMapping);
326 AltMappings.push_back(Elt: &GPRToFPRMapping);
327 AltMappings.push_back(Elt: &FPRToGPRMapping);
328 return AltMappings;
329 }
330 case TargetOpcode::G_LOAD: {
331 TypeSize Size = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
332 if (Size != 64)
333 break;
334
335 // If the instruction has any implicit-defs or uses,
336 // do not mess with it.
337 if (MI.getNumOperands() != 2)
338 break;
339
340 InstructionMappings AltMappings;
341 const InstructionMapping &GPRMapping = getInstructionMapping(
342 /*ID*/ 1, /*Cost*/ 1,
343 OperandsMapping: getOperandsMapping(
344 OpdsMapping: {getValueMapping(RBIdx: PMI_FirstGPR, Size),
345 // Addresses are GPR 64-bit.
346 getValueMapping(RBIdx: PMI_FirstGPR, Size: TypeSize::getFixed(ExactSize: 64))}),
347 /*NumOperands*/ 2);
348 const InstructionMapping &FPRMapping = getInstructionMapping(
349 /*ID*/ 2, /*Cost*/ 1,
350 OperandsMapping: getOperandsMapping(
351 OpdsMapping: {getValueMapping(RBIdx: PMI_FirstFPR, Size),
352 // Addresses are GPR 64-bit.
353 getValueMapping(RBIdx: PMI_FirstGPR, Size: TypeSize::getFixed(ExactSize: 64))}),
354 /*NumOperands*/ 2);
355
356 AltMappings.push_back(Elt: &GPRMapping);
357 AltMappings.push_back(Elt: &FPRMapping);
358 return AltMappings;
359 }
360 default:
361 break;
362 }
363 return RegisterBankInfo::getInstrAlternativeMappings(MI);
364}
365
366static bool preferGPRForFPImm(const MachineInstr &MI,
367 const MachineRegisterInfo &MRI,
368 const AArch64Subtarget &STI) {
369 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
370 Register Dst = MI.getOperand(i: 0).getReg();
371 LLT Ty = MRI.getType(Reg: Dst);
372
373 unsigned Size = Ty.getSizeInBits();
374 if (Size != 16 && Size != 32 && Size != 64)
375 return false;
376
377 EVT VT = EVT::getFloatingPointVT(BitWidth: Size);
378 const AArch64TargetLowering *TLI = STI.getTargetLowering();
379
380 const APFloat Imm = MI.getOperand(i: 1).getFPImm()->getValueAPF();
381 const APInt ImmBits = Imm.bitcastToAPInt();
382
383 // If all the uses are stores use a gpr constant
384 if (all_of(Range: MRI.use_nodbg_instructions(Reg: Dst), P: [&](const MachineInstr &UseMI) {
385 return UseMI.getOpcode() == TargetOpcode::G_STORE &&
386 UseMI.getOperand(i: 0).getReg() == Dst;
387 }))
388 return true;
389
390 // Check if we can encode this as a movi. Note, we only have one pattern so
391 // far for movis, hence the one check.
392 if (Size == 32) {
393 uint64_t Val = APInt::getSplat(NewLen: 64, V: ImmBits).getZExtValue();
394 if (AArch64_AM::isAdvSIMDModImmType4(Imm: Val))
395 return false;
396 }
397
398 // We want to use GPR when the value cannot be encoded as the immediate value
399 // of a fmov and when it will not result in a constant pool load. As
400 // AArch64TargetLowering::isFPImmLegal is used by the instruction selector
401 // to choose whether to emit a constant pool load, negating this check will
402 // ensure it would not have become a constant pool load.
403 bool OptForSize =
404 shouldOptimizeForSize(F: &MI.getMF()->getFunction(), PSI: nullptr, BFI: nullptr);
405 bool IsLegal = TLI->isFPImmLegal(Imm, VT, ForCodeSize: OptForSize);
406 bool IsFMov = TLI->isFPImmLegalAsFMov(Imm, VT);
407 return !IsFMov && IsLegal;
408}
409
410// Some of the instructions in applyMappingImpl attempt to anyext small values.
411// It may be that these values come from a G_CONSTANT that has been expanded to
412// 32 bits and then truncated. If this is the case, we shouldn't insert an
413// anyext and should instead make use of the G_CONSTANT directly, deleting the
414// trunc if possible.
415static bool foldTruncOfI32Constant(MachineInstr &MI, unsigned OpIdx,
416 MachineRegisterInfo &MRI,
417 const AArch64RegisterBankInfo &RBI) {
418 MachineOperand &Op = MI.getOperand(i: OpIdx);
419
420 Register ScalarReg = Op.getReg();
421 MachineInstr *TruncMI = MRI.getVRegDef(Reg: ScalarReg);
422 if (!TruncMI || TruncMI->getOpcode() != TargetOpcode::G_TRUNC)
423 return false;
424
425 Register TruncSrc = TruncMI->getOperand(i: 1).getReg();
426 MachineInstr *SrcDef = MRI.getVRegDef(Reg: TruncSrc);
427 if (!SrcDef || SrcDef->getOpcode() != TargetOpcode::G_CONSTANT)
428 return false;
429
430 LLT TruncSrcTy = MRI.getType(Reg: TruncSrc);
431 if (!TruncSrcTy.isScalar() || TruncSrcTy.getSizeInBits() != 32)
432 return false;
433
434 // Avoid truncating and extending a constant, this helps with selection.
435 Op.setReg(TruncSrc);
436 MRI.setRegBank(Reg: TruncSrc, RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
437
438 if (MRI.use_empty(RegNo: ScalarReg))
439 TruncMI->eraseFromParent();
440
441 return true;
442}
443
444void AArch64RegisterBankInfo::applyMappingImpl(
445 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
446 MachineInstr &MI = OpdMapper.getMI();
447 MachineRegisterInfo &MRI = OpdMapper.getMRI();
448
449 switch (MI.getOpcode()) {
450 case TargetOpcode::G_CONSTANT: {
451 Register Dst = MI.getOperand(i: 0).getReg();
452 [[maybe_unused]] LLT DstTy = MRI.getType(Reg: Dst);
453 assert(MRI.getRegBank(Dst) == &AArch64::GPRRegBank && DstTy.isScalar() &&
454 DstTy.getSizeInBits() < 32 &&
455 "Expected a scalar smaller than 32 bits on a GPR.");
456 Builder.setInsertPt(MBB&: *MI.getParent(), II: std::next(x: MI.getIterator()));
457 Register ExtReg = MRI.createGenericVirtualRegister(Ty: LLT::integer(SizeInBits: 32));
458 Builder.buildTrunc(Res: Dst, Op: ExtReg);
459
460 APInt Val = MI.getOperand(i: 1).getCImm()->getValue().zext(width: 32);
461 LLVMContext &Ctx = Builder.getMF().getFunction().getContext();
462 MI.getOperand(i: 1).setCImm(ConstantInt::get(Context&: Ctx, V: Val));
463 MI.getOperand(i: 0).setReg(ExtReg);
464 MRI.setRegBank(Reg: ExtReg, RegBank: AArch64::GPRRegBank);
465
466 return applyDefaultMapping(OpdMapper);
467 }
468 case TargetOpcode::G_FCONSTANT: {
469 Register Dst = MI.getOperand(i: 0).getReg();
470 assert(MRI.getRegBank(Dst) == &AArch64::GPRRegBank &&
471 "Expected Dst to be on a GPR.");
472 const APFloat &Imm = MI.getOperand(i: 1).getFPImm()->getValueAPF();
473 APInt Bits = Imm.bitcastToAPInt();
474 Builder.setInsertPt(MBB&: *MI.getParent(), II: MI.getIterator());
475 if (Bits.getBitWidth() < 32) {
476 Register ExtReg = MRI.createGenericVirtualRegister(Ty: LLT::integer(SizeInBits: 32));
477 Builder.buildConstant(Res: ExtReg, Val: Bits.zext(width: 32));
478 Builder.buildTrunc(Res: Dst, Op: ExtReg);
479 MRI.setRegBank(Reg: ExtReg, RegBank: AArch64::GPRRegBank);
480 } else {
481 Builder.buildConstant(Res: Dst, Val: Bits);
482 }
483 MI.eraseFromParent();
484 return;
485 }
486 case TargetOpcode::G_STORE: {
487 Register Dst = MI.getOperand(i: 0).getReg();
488 LLT Ty = MRI.getType(Reg: Dst);
489
490 if (MRI.getRegBank(Reg: Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
491 Ty.getSizeInBits() < 32) {
492
493 if (foldTruncOfI32Constant(MI, OpIdx: 0, MRI, RBI: *this))
494 return applyDefaultMapping(OpdMapper);
495
496 Builder.setInsertPt(MBB&: *MI.getParent(), II: MI.getIterator());
497 auto Ext = Builder.buildAnyExt(Res: LLT::integer(SizeInBits: 32), Op: Dst);
498 MI.getOperand(i: 0).setReg(Ext.getReg(Idx: 0));
499 MRI.setRegBank(Reg: Ext.getReg(Idx: 0), RegBank: AArch64::GPRRegBank);
500 }
501 return applyDefaultMapping(OpdMapper);
502 }
503 case TargetOpcode::G_LOAD: {
504 Register Dst = MI.getOperand(i: 0).getReg();
505 LLT Ty = MRI.getType(Reg: Dst);
506 if (MRI.getRegBank(Reg: Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
507 Ty.getSizeInBits() < 32) {
508 Builder.setInsertPt(MBB&: *MI.getParent(), II: std::next(x: MI.getIterator()));
509 Register ExtReg = MRI.createGenericVirtualRegister(Ty: LLT::integer(SizeInBits: 32));
510 Builder.buildTrunc(Res: Dst, Op: ExtReg);
511 MI.getOperand(i: 0).setReg(ExtReg);
512 MRI.setRegBank(Reg: ExtReg, RegBank: AArch64::GPRRegBank);
513 }
514 [[fallthrough]];
515 }
516 case TargetOpcode::G_OR:
517 case TargetOpcode::G_BITCAST:
518 // Those ID must match getInstrAlternativeMappings.
519 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
520 OpdMapper.getInstrMapping().getID() <= 4) &&
521 "Don't know how to handle that ID");
522 return applyDefaultMapping(OpdMapper);
523 case AArch64::G_DUP: {
524 if (foldTruncOfI32Constant(MI, OpIdx: 1, MRI, RBI: *this))
525 return applyDefaultMapping(OpdMapper);
526
527 // Extend smaller gpr to 32-bits
528 assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
529 "Expected sources smaller than 32-bits");
530 Builder.setInsertPt(MBB&: *MI.getParent(), II: MI.getIterator());
531
532 Register ConstReg =
533 Builder.buildAnyExt(Res: LLT::integer(SizeInBits: 32), Op: MI.getOperand(i: 1).getReg())
534 .getReg(Idx: 0);
535 MRI.setRegBank(Reg: ConstReg, RegBank: getRegBank(ID: AArch64::GPRRegBankID));
536 MI.getOperand(i: 1).setReg(ConstReg);
537
538 return applyDefaultMapping(OpdMapper);
539 }
540 default:
541 llvm_unreachable("Don't know how to handle that operation");
542 }
543}
544
545const RegisterBankInfo::InstructionMapping &
546AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
547 const MachineInstr &MI) const {
548 const unsigned Opc = MI.getOpcode();
549 const MachineFunction &MF = *MI.getParent()->getParent();
550 const MachineRegisterInfo &MRI = MF.getRegInfo();
551
552 unsigned NumOperands = MI.getNumOperands();
553 assert(NumOperands <= 3 &&
554 "This code is for instructions with 3 or less operands");
555
556 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
557 TypeSize Size = Ty.getSizeInBits();
558 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
559
560 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
561
562#ifndef NDEBUG
563 // Make sure all the operands are using similar size and type.
564 // Should probably be checked by the machine verifier.
565 // This code won't catch cases where the number of lanes is
566 // different between the operands.
567 // If we want to go to that level of details, it is probably
568 // best to check that the types are the same, period.
569 // Currently, we just check that the register banks are the same
570 // for each types.
571 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
572 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
573 assert(
574 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
575 RBIdx, OpTy.getSizeInBits()) ==
576 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
577 "Operand has incompatible size");
578 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
579 (void)OpIsFPR;
580 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
581 }
582#endif // End NDEBUG.
583
584 return getInstructionMapping(ID: DefaultMappingID, Cost: 1,
585 OperandsMapping: getValueMapping(RBIdx, Size), NumOperands);
586}
587
588/// \returns true if a given intrinsic only uses and defines FPRs.
589static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
590 const MachineInstr &MI) {
591 // TODO: Add more intrinsics.
592 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
593 default:
594 return false;
595 case Intrinsic::aarch64_neon_uaddlv:
596 case Intrinsic::aarch64_neon_uaddv:
597 case Intrinsic::aarch64_neon_saddv:
598 case Intrinsic::aarch64_neon_umaxv:
599 case Intrinsic::aarch64_neon_smaxv:
600 case Intrinsic::aarch64_neon_uminv:
601 case Intrinsic::aarch64_neon_sminv:
602 case Intrinsic::aarch64_neon_faddv:
603 case Intrinsic::aarch64_neon_fmaxv:
604 case Intrinsic::aarch64_neon_fminv:
605 case Intrinsic::aarch64_neon_fmaxnmv:
606 case Intrinsic::aarch64_neon_fminnmv:
607 case Intrinsic::aarch64_neon_fmulx:
608 case Intrinsic::aarch64_neon_frecpe:
609 case Intrinsic::aarch64_neon_frecps:
610 case Intrinsic::aarch64_neon_frecpx:
611 case Intrinsic::aarch64_neon_frsqrte:
612 case Intrinsic::aarch64_neon_frsqrts:
613 case Intrinsic::aarch64_neon_facge:
614 case Intrinsic::aarch64_neon_facgt:
615 case Intrinsic::aarch64_neon_fabd:
616 case Intrinsic::aarch64_neon_sqrdmlah:
617 case Intrinsic::aarch64_neon_sqrdmlsh:
618 case Intrinsic::aarch64_neon_sqrdmulh:
619 case Intrinsic::aarch64_neon_suqadd:
620 case Intrinsic::aarch64_neon_usqadd:
621 case Intrinsic::aarch64_neon_uqadd:
622 case Intrinsic::aarch64_neon_sqadd:
623 case Intrinsic::aarch64_neon_uqsub:
624 case Intrinsic::aarch64_neon_sqsub:
625 case Intrinsic::aarch64_neon_sqdmulh:
626 case Intrinsic::aarch64_neon_sqdmulls_scalar:
627 case Intrinsic::aarch64_neon_srshl:
628 case Intrinsic::aarch64_neon_urshl:
629 case Intrinsic::aarch64_neon_sqshl:
630 case Intrinsic::aarch64_neon_uqshl:
631 case Intrinsic::aarch64_neon_sqrshl:
632 case Intrinsic::aarch64_neon_uqrshl:
633 case Intrinsic::aarch64_neon_ushl:
634 case Intrinsic::aarch64_neon_sshl:
635 case Intrinsic::aarch64_neon_sqshrn:
636 case Intrinsic::aarch64_neon_sqshrun:
637 case Intrinsic::aarch64_neon_sqrshrn:
638 case Intrinsic::aarch64_neon_sqrshrun:
639 case Intrinsic::aarch64_neon_uqshrn:
640 case Intrinsic::aarch64_neon_uqrshrn:
641 case Intrinsic::aarch64_neon_sqneg:
642 case Intrinsic::aarch64_neon_sqabs:
643 case Intrinsic::aarch64_neon_scalar_uqxtn:
644 case Intrinsic::aarch64_neon_scalar_sqxtn:
645 case Intrinsic::aarch64_neon_scalar_sqxtun:
646 case Intrinsic::aarch64_crypto_sha1h:
647 case Intrinsic::aarch64_crypto_sha1c:
648 case Intrinsic::aarch64_crypto_sha1p:
649 case Intrinsic::aarch64_crypto_sha1m:
650 case Intrinsic::aarch64_sisd_fcvtxn:
651 case Intrinsic::aarch64_sisd_fabd:
652 return true;
653 case Intrinsic::aarch64_neon_saddlv: {
654 const LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
655 return SrcTy.getElementType().getSizeInBits() >= 16 &&
656 SrcTy.getElementCount().getFixedValue() >= 4;
657 }
658 }
659}
660
661bool AArch64RegisterBankInfo::isPHIWithFPConstraints(
662 const MachineInstr &MI, const MachineRegisterInfo &MRI,
663 const AArch64RegisterInfo &TRI, const unsigned Depth) const {
664 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
665 return false;
666
667 return any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
668 P: [&](const MachineInstr &UseMI) {
669 if (onlyUsesFP(MI: UseMI, MRI, TRI, Depth: Depth + 1))
670 return true;
671 return isPHIWithFPConstraints(MI: UseMI, MRI, TRI, Depth: Depth + 1);
672 });
673}
674
675bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
676 const MachineRegisterInfo &MRI,
677 const AArch64RegisterInfo &TRI,
678 unsigned Depth) const {
679 unsigned Op = MI.getOpcode();
680 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
681 return true;
682
683 // Do we have an explicit floating point instruction?
684 if (isPreISelGenericFloatingPointOpcode(Opc: Op))
685 return true;
686
687 // No. Check if we have a copy-like instruction. If we do, then we could
688 // still be fed by floating point instructions.
689 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
690 !isPreISelGenericOptimizationHint(Opcode: Op))
691 return false;
692
693 // Check if we already know the register bank.
694 auto *RB = getRegBank(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
695 if (RB == &AArch64::FPRRegBank)
696 return true;
697 if (RB == &AArch64::GPRRegBank)
698 return false;
699
700 // We don't know anything.
701 //
702 // If we have a phi, we may be able to infer that it will be assigned a FPR
703 // based off of its inputs.
704 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
705 return false;
706
707 return any_of(Range: MI.explicit_uses(), P: [&](const MachineOperand &Op) {
708 return Op.isReg() &&
709 onlyDefinesFP(MI: *MRI.getVRegDef(Reg: Op.getReg()), MRI, TRI, Depth: Depth + 1);
710 });
711}
712
713bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
714 const MachineRegisterInfo &MRI,
715 const AArch64RegisterInfo &TRI,
716 unsigned Depth) const {
717 switch (MI.getOpcode()) {
718 case TargetOpcode::G_BITCAST: {
719 Register DstReg = MI.getOperand(i: 0).getReg();
720 return all_of(Range: MRI.use_nodbg_instructions(Reg: DstReg),
721 P: [&](const MachineInstr &UseMI) {
722 return onlyUsesFP(MI: UseMI, MRI, TRI, Depth: Depth + 1) ||
723 prefersFPUse(MI: UseMI, MRI, TRI);
724 });
725 }
726
727 case TargetOpcode::G_FPTOSI:
728 case TargetOpcode::G_FPTOUI:
729 case TargetOpcode::G_FPTOSI_SAT:
730 case TargetOpcode::G_FPTOUI_SAT:
731 case TargetOpcode::G_FCMP:
732 case TargetOpcode::G_LROUND:
733 case TargetOpcode::G_LLROUND:
734 case TargetOpcode::G_CLMUL:
735 case AArch64::G_PMULL:
736 case AArch64::G_SLI:
737 case AArch64::G_SRI:
738 case AArch64::G_FPTRUNC_ODD:
739 return true;
740 case TargetOpcode::G_INTRINSIC:
741 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
742 case Intrinsic::aarch64_neon_fcvtas:
743 case Intrinsic::aarch64_neon_fcvtau:
744 case Intrinsic::aarch64_neon_fcvtzs:
745 case Intrinsic::aarch64_neon_fcvtzu:
746 case Intrinsic::aarch64_neon_fcvtms:
747 case Intrinsic::aarch64_neon_fcvtmu:
748 case Intrinsic::aarch64_neon_fcvtns:
749 case Intrinsic::aarch64_neon_fcvtnu:
750 case Intrinsic::aarch64_neon_fcvtps:
751 case Intrinsic::aarch64_neon_fcvtpu:
752 return true;
753 default:
754 break;
755 }
756 break;
757 default:
758 break;
759 }
760 return hasFPConstraints(MI, MRI, TRI, Depth);
761}
762
763bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
764 const MachineRegisterInfo &MRI,
765 const AArch64RegisterInfo &TRI,
766 unsigned Depth) const {
767 switch (MI.getOpcode()) {
768 case AArch64::G_DUP:
769 case AArch64::G_SADDLP:
770 case AArch64::G_UADDLP:
771 case TargetOpcode::G_SITOFP:
772 case TargetOpcode::G_UITOFP:
773 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
774 case TargetOpcode::G_INSERT_VECTOR_ELT:
775 case TargetOpcode::G_BUILD_VECTOR:
776 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
777 case AArch64::G_SLI:
778 case AArch64::G_SRI:
779 case AArch64::G_FPTRUNC_ODD:
780 return true;
781 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
782 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
783 case Intrinsic::aarch64_neon_ld1x2:
784 case Intrinsic::aarch64_neon_ld1x3:
785 case Intrinsic::aarch64_neon_ld1x4:
786 case Intrinsic::aarch64_neon_ld2:
787 case Intrinsic::aarch64_neon_ld2lane:
788 case Intrinsic::aarch64_neon_ld2r:
789 case Intrinsic::aarch64_neon_ld3:
790 case Intrinsic::aarch64_neon_ld3lane:
791 case Intrinsic::aarch64_neon_ld3r:
792 case Intrinsic::aarch64_neon_ld4:
793 case Intrinsic::aarch64_neon_ld4lane:
794 case Intrinsic::aarch64_neon_ld4r:
795 return true;
796 default:
797 break;
798 }
799 break;
800 default:
801 break;
802 }
803 return hasFPConstraints(MI, MRI, TRI, Depth);
804}
805
806bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI,
807 const MachineRegisterInfo &MRI,
808 const AArch64RegisterInfo &TRI,
809 unsigned Depth) const {
810 switch (MI.getOpcode()) {
811 case TargetOpcode::G_SITOFP:
812 case TargetOpcode::G_UITOFP:
813 return MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() ==
814 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
815 }
816 return onlyDefinesFP(MI, MRI, TRI, Depth);
817}
818
819bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
820 // GMemOperation because we also want to match indexed loads.
821 auto *MemOp = cast<GMemOperation>(Val: &MI);
822 const Value *LdVal = MemOp->getMMO().getValue();
823 if (!LdVal)
824 return false;
825
826 Type *EltTy = nullptr;
827 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: LdVal)) {
828 EltTy = GV->getValueType();
829 // Look at the first element of the struct to determine the type we are
830 // loading
831 while (StructType *StructEltTy = dyn_cast<StructType>(Val: EltTy)) {
832 if (StructEltTy->getNumElements() == 0)
833 break;
834 EltTy = StructEltTy->getTypeAtIndex(N: 0U);
835 }
836 // Look at the first element of the array to determine its type
837 if (isa<ArrayType>(Val: EltTy))
838 EltTy = EltTy->getArrayElementType();
839 } else if (!isa<Constant>(Val: LdVal)) {
840 // FIXME: grubbing around uses is pretty ugly, but with no more
841 // `getPointerElementType` there's not much else we can do.
842 for (const auto *LdUser : LdVal->users()) {
843 if (isa<LoadInst>(Val: LdUser)) {
844 EltTy = LdUser->getType();
845 break;
846 }
847 if (isa<StoreInst>(Val: LdUser) && LdUser->getOperand(i: 1) == LdVal) {
848 EltTy = LdUser->getOperand(i: 0)->getType();
849 break;
850 }
851 }
852 }
853 return EltTy && EltTy->isFPOrFPVectorTy();
854}
855
856const RegisterBankInfo::InstructionMapping &
857AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
858 const unsigned Opc = MI.getOpcode();
859
860 // Try the default logic for non-generic instructions that are either copies
861 // or already have some operands assigned to banks.
862 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opcode: Opc)) ||
863 Opc == TargetOpcode::G_PHI) {
864 const RegisterBankInfo::InstructionMapping &Mapping =
865 getInstrMappingImpl(MI);
866 if (Mapping.isValid())
867 return Mapping;
868 }
869
870 const MachineFunction &MF = *MI.getParent()->getParent();
871 const MachineRegisterInfo &MRI = MF.getRegInfo();
872 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
873 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
874
875 switch (Opc) {
876 // G_{F|S|U}REM are not listed because they are not legal.
877 // Arithmetic ops.
878 case TargetOpcode::G_ADD:
879 case TargetOpcode::G_SUB:
880 case TargetOpcode::G_PTR_ADD:
881 case TargetOpcode::G_MUL:
882 case TargetOpcode::G_SDIV:
883 case TargetOpcode::G_UDIV:
884 // Bitwise ops.
885 case TargetOpcode::G_AND:
886 case TargetOpcode::G_OR:
887 case TargetOpcode::G_XOR:
888 // Floating point ops.
889 case TargetOpcode::G_FADD:
890 case TargetOpcode::G_FSUB:
891 case TargetOpcode::G_FMUL:
892 case TargetOpcode::G_FDIV:
893 case TargetOpcode::G_FMAXIMUM:
894 case TargetOpcode::G_FMINIMUM:
895 return getSameKindOfOperandsMapping(MI);
896 case TargetOpcode::G_FPEXT: {
897 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
898 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
899 return getInstructionMapping(
900 ID: DefaultMappingID, /*Cost*/ 1,
901 OperandsMapping: getFPExtMapping(DstSize: DstTy.getSizeInBits(), SrcSize: SrcTy.getSizeInBits()),
902 /*NumOperands*/ 2);
903 }
904 // Shifts.
905 case TargetOpcode::G_SHL:
906 case TargetOpcode::G_LSHR:
907 case TargetOpcode::G_ASHR: {
908 LLT ShiftAmtTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
909 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
910 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
911 return getInstructionMapping(ID: DefaultMappingID, Cost: 1,
912 OperandsMapping: &ValMappings[Shift64Imm], NumOperands: 3);
913 return getSameKindOfOperandsMapping(MI);
914 }
915 case TargetOpcode::G_BITCAST: {
916 Register SrcReg = MI.getOperand(i: 1).getReg();
917 const RegisterBank *SrcRB = getRegBank(Reg: SrcReg, MRI, TRI);
918 if (SrcRB) {
919 TypeSize Size = getSizeInBits(Reg: SrcReg, MRI, TRI);
920 return getInstructionMapping(
921 ID: DefaultMappingID, Cost: 0,
922 OperandsMapping: getCopyMapping(DstBankID: SrcRB->getID(), SrcBankID: SrcRB->getID(), Size),
923 // We only care about the mapping of the destination.
924 /*NumOperands=*/2);
925 }
926 [[fallthrough]];
927 }
928 case TargetOpcode::COPY: {
929 Register DstReg = MI.getOperand(i: 0).getReg();
930 Register SrcReg = MI.getOperand(i: 1).getReg();
931 // Check if one of the register is not a generic register.
932 if ((DstReg.isPhysical() || !MRI.getType(Reg: DstReg).isValid()) ||
933 (SrcReg.isPhysical() || !MRI.getType(Reg: SrcReg).isValid())) {
934 const RegisterBank *DstRB = getRegBank(Reg: DstReg, MRI, TRI);
935 const RegisterBank *SrcRB = getRegBank(Reg: SrcReg, MRI, TRI);
936 if (!DstRB)
937 DstRB = SrcRB;
938 else if (!SrcRB)
939 SrcRB = DstRB;
940 // If both RB are null that means both registers are generic.
941 // We shouldn't be here.
942 assert(DstRB && SrcRB && "Both RegBank were nullptr");
943 TypeSize Size = getSizeInBits(Reg: DstReg, MRI, TRI);
944 return getInstructionMapping(
945 ID: DefaultMappingID, Cost: copyCost(A: *DstRB, B: *SrcRB, Size),
946 OperandsMapping: getCopyMapping(DstBankID: DstRB->getID(), SrcBankID: SrcRB->getID(), Size),
947 // We only care about the mapping of the destination.
948 /*NumOperands*/ 1);
949 }
950 // Both registers are generic
951 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
952 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
953 TypeSize Size = DstTy.getSizeInBits();
954 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
955 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
956 const RegisterBank &DstRB =
957 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
958 const RegisterBank &SrcRB =
959 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
960 return getInstructionMapping(
961 ID: DefaultMappingID, Cost: copyCost(A: DstRB, B: SrcRB, Size),
962 OperandsMapping: getCopyMapping(DstBankID: DstRB.getID(), SrcBankID: SrcRB.getID(), Size),
963 // We only care about the mapping of the destination for COPY.
964 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
965 }
966 case TargetOpcode::G_CONSTANT: {
967 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
968 TypeSize Size = DstTy.getSizeInBits();
969 if (!DstTy.isPointer() && (!DstTy.isScalar() || Size < 32 || Size > 64))
970 break;
971 // Scalar constants materialize in GPRs.
972 [[fallthrough]];
973 }
974 case TargetOpcode::G_BRCOND:
975 case TargetOpcode::G_FRAME_INDEX: {
976 // Operand 0 is the only banked operand and is mapped to GPR.
977 return getInstructionMapping(
978 ID: DefaultMappingID, /*Cost=*/1,
979 OperandsMapping: getOperandsMapping(
980 OpdsMapping: {getValueMapping(
981 RBIdx: PMI_FirstGPR,
982 Size: MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits()),
983 nullptr}),
984 /*NumOperands=*/2);
985 }
986 default:
987 break;
988 }
989
990 unsigned NumOperands = MI.getNumOperands();
991 unsigned MappingID = DefaultMappingID;
992
993 // Track the size and bank of each register. We don't do partial mappings.
994 SmallVector<unsigned, 4> OpSize(NumOperands);
995 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
996 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
997 auto &MO = MI.getOperand(i: Idx);
998 if (!MO.isReg() || !MO.getReg())
999 continue;
1000
1001 LLT Ty = MRI.getType(Reg: MO.getReg());
1002 if (!Ty.isValid())
1003 continue;
1004 OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue();
1005
1006 // As a top-level guess, vectors including both scalable and non-scalable
1007 // ones go in FPRs, scalars and pointers in GPRs.
1008 // For floating-point instructions, scalars go in FPRs.
1009 if (Ty.isVector())
1010 OpRegBankIdx[Idx] = PMI_FirstFPR;
1011 else if (isPreISelGenericFloatingPointOpcode(Opc) ||
1012 (MO.isDef() && onlyDefinesFP(MI, MRI, TRI)) ||
1013 (MO.isUse() && onlyUsesFP(MI, MRI, TRI)) ||
1014 Ty.getSizeInBits() > 64)
1015 OpRegBankIdx[Idx] = PMI_FirstFPR;
1016 else
1017 OpRegBankIdx[Idx] = PMI_FirstGPR;
1018 }
1019
1020 unsigned Cost = 1;
1021 // Some of the floating-point instructions have mixed GPR and FPR operands:
1022 // fine-tune the computed mapping.
1023 switch (Opc) {
1024 case TargetOpcode::G_CONSTANT: {
1025 Register Dst = MI.getOperand(i: 0).getReg();
1026 LLT DstTy = MRI.getType(Reg: Dst);
1027 if (DstTy.isScalar() && DstTy.getSizeInBits() < 32)
1028 MappingID = CustomMappingID;
1029 break;
1030 }
1031 case TargetOpcode::G_FCONSTANT: {
1032 if (preferGPRForFPImm(MI, MRI, STI)) {
1033 // Materialize in GPR and rely on later bank copies for FP uses.
1034 MappingID = CustomMappingID;
1035 OpRegBankIdx = {PMI_FirstGPR};
1036 }
1037 break;
1038 }
1039 case AArch64::G_DUP: {
1040 Register ScalarReg = MI.getOperand(i: 1).getReg();
1041 LLT ScalarTy = MRI.getType(Reg: ScalarReg);
1042 auto ScalarDef = MRI.getVRegDef(Reg: ScalarReg);
1043 // We want to select dup(load) into LD1R.
1044 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
1045 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1046 // s8 is an exception for G_DUP, which we always want on gpr.
1047 else if (ScalarTy.getSizeInBits() != 8 &&
1048 (getRegBank(Reg: ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
1049 onlyDefinesFP(MI: *ScalarDef, MRI, TRI)))
1050 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1051 else {
1052 if (ScalarTy.getSizeInBits() < 32 &&
1053 getRegBank(Reg: ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
1054 // Calls applyMappingImpl()
1055 MappingID = CustomMappingID;
1056 }
1057 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
1058 }
1059 break;
1060 }
1061 case TargetOpcode::G_TRUNC: {
1062 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
1063 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
1064 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1065 break;
1066 }
1067 case TargetOpcode::G_SITOFP:
1068 case TargetOpcode::G_UITOFP: {
1069 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1070 break;
1071 // Integer to FP conversions don't necessarily happen between GPR -> FPR
1072 // regbanks. They can also be done within an FPR register.
1073 Register SrcReg = MI.getOperand(i: 1).getReg();
1074 if (getRegBank(Reg: SrcReg, MRI, TRI) == &AArch64::FPRRegBank &&
1075 MRI.getType(Reg: SrcReg).getSizeInBits() ==
1076 MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits())
1077 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1078 else
1079 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
1080 break;
1081 }
1082 case TargetOpcode::G_FPTOSI_SAT:
1083 case TargetOpcode::G_FPTOUI_SAT:
1084 case TargetOpcode::G_FPTOSI:
1085 case TargetOpcode::G_FPTOUI:
1086 case TargetOpcode::G_INTRINSIC_LRINT:
1087 case TargetOpcode::G_INTRINSIC_LLRINT:
1088 case TargetOpcode::G_LROUND:
1089 case TargetOpcode::G_LLROUND: {
1090 LLT DstType = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1091 if (DstType.isVector())
1092 break;
1093 if (DstType == LLT::scalar(SizeInBits: 16)) {
1094 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1095 break;
1096 }
1097 TypeSize DstSize = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
1098 TypeSize SrcSize = getSizeInBits(Reg: MI.getOperand(i: 1).getReg(), MRI, TRI);
1099 if (((DstSize == SrcSize) || STI.hasFeature(Feature: AArch64::FeatureFPRCVT)) &&
1100 all_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1101 P: [&](const MachineInstr &UseMI) {
1102 return onlyUsesFP(MI: UseMI, MRI, TRI) ||
1103 prefersFPUse(MI: UseMI, MRI, TRI);
1104 }))
1105 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1106 else
1107 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
1108 break;
1109 }
1110 case TargetOpcode::G_FCMP: {
1111 // If the result is a vector, it must use a FPR.
1112 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
1113 MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector() ? PMI_FirstFPR
1114 : PMI_FirstGPR;
1115 OpRegBankIdx = {Idx0,
1116 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
1117 break;
1118 }
1119 case TargetOpcode::G_BITCAST:
1120 // This is going to be a cross register bank copy and this is expensive.
1121 if (OpRegBankIdx[0] != OpRegBankIdx[1])
1122 Cost = copyCost(
1123 A: *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
1124 B: *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
1125 Size: TypeSize::getFixed(ExactSize: OpSize[0]));
1126 break;
1127 case TargetOpcode::G_LOAD: {
1128 // Loading in vector unit is slightly more expensive.
1129 // This is actually only true for the LD1R and co instructions,
1130 // but anyway for the fast mode this number does not matter and
1131 // for the greedy mode the cost of the cross bank copy will
1132 // offset this number.
1133 // FIXME: Should be derived from the scheduling model.
1134 if (OpRegBankIdx[0] != PMI_FirstGPR) {
1135 Cost = 2;
1136 break;
1137 }
1138
1139 if (cast<GLoad>(Val: MI).isAtomic()) {
1140 // Atomics always use GPR destinations. Don't refine any further.
1141 OpRegBankIdx[0] = PMI_FirstGPR;
1142 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() < 32)
1143 MappingID = CustomMappingID;
1144 break;
1145 }
1146
1147 // Try to guess the type of the load from the MMO.
1148 if (isLoadFromFPType(MI)) {
1149 OpRegBankIdx[0] = PMI_FirstFPR;
1150 break;
1151 }
1152
1153 // Check if that load feeds fp instructions.
1154 // In that case, we want the default mapping to be on FPR
1155 // instead of blind map every scalar to GPR.
1156 if (any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1157 P: [&](const MachineInstr &UseMI) {
1158 // If we have at least one direct or indirect use
1159 // in a FP instruction,
1160 // assume this was a floating point load in the IR. If it was
1161 // not, we would have had a bitcast before reaching that
1162 // instruction.
1163 //
1164 // Int->FP conversion operations are also captured in
1165 // prefersFPUse().
1166
1167 if (isPHIWithFPConstraints(MI: UseMI, MRI, TRI))
1168 return true;
1169
1170 return onlyUsesFP(MI: UseMI, MRI, TRI) ||
1171 prefersFPUse(MI: UseMI, MRI, TRI);
1172 }))
1173 OpRegBankIdx[0] = PMI_FirstFPR;
1174
1175 // On GPR, extend any load < 32bits to 32bit.
1176 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1177 if (Ty.isScalar() && Ty.getSizeInBits() < 32)
1178 MappingID = CustomMappingID;
1179 break;
1180 }
1181 case TargetOpcode::G_STORE:
1182 // Check if that store is fed by fp instructions.
1183 if (OpRegBankIdx[0] == PMI_FirstGPR) {
1184 Register VReg = MI.getOperand(i: 0).getReg();
1185 if (VReg) {
1186 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1187 if (onlyDefinesFP(MI: *DefMI, MRI, TRI)) {
1188 OpRegBankIdx[0] = PMI_FirstFPR;
1189 break;
1190 }
1191 }
1192
1193 // On GPR, extend any store < 32bits to 32bit.
1194 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1195 if (Ty.isScalar() && Ty.getSizeInBits() < 32)
1196 MappingID = CustomMappingID;
1197 }
1198 break;
1199 case TargetOpcode::G_INDEXED_STORE:
1200 if (OpRegBankIdx[1] == PMI_FirstGPR) {
1201 Register VReg = MI.getOperand(i: 1).getReg();
1202 if (!VReg)
1203 break;
1204 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1205 if (onlyDefinesFP(MI: *DefMI, MRI, TRI))
1206 OpRegBankIdx[1] = PMI_FirstFPR;
1207 break;
1208 }
1209 break;
1210 case TargetOpcode::G_INDEXED_SEXTLOAD:
1211 case TargetOpcode::G_INDEXED_ZEXTLOAD:
1212 // These should always be GPR.
1213 OpRegBankIdx[0] = PMI_FirstGPR;
1214 break;
1215 case TargetOpcode::G_INDEXED_LOAD: {
1216 if (isLoadFromFPType(MI))
1217 OpRegBankIdx[0] = PMI_FirstFPR;
1218 break;
1219 }
1220 case TargetOpcode::G_SELECT: {
1221 // If the destination is FPR, preserve that.
1222 if (OpRegBankIdx[0] != PMI_FirstGPR)
1223 break;
1224
1225 // If we're taking in vectors, we have no choice but to put everything on
1226 // FPRs, except for the condition. The condition must always be on a GPR.
1227 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
1228 if (SrcTy.isVector()) {
1229 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
1230 break;
1231 }
1232
1233 // Try to minimize the number of copies. If we have more floating point
1234 // constrained values than not, then we'll put everything on FPR. Otherwise,
1235 // everything has to be on GPR.
1236 unsigned NumFP = 0;
1237
1238 // Check if the uses of the result always produce floating point values.
1239 //
1240 // For example:
1241 //
1242 // %z = G_SELECT %cond %x %y
1243 // fpr = G_FOO %z ...
1244 if (any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1245 P: [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
1246 ++NumFP;
1247
1248 // Check if the defs of the source values always produce floating point
1249 // values.
1250 //
1251 // For example:
1252 //
1253 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
1254 // %z = G_SELECT %cond %x %y
1255 //
1256 // Also check whether or not the sources have already been decided to be
1257 // FPR. Keep track of this.
1258 //
1259 // This doesn't check the condition, since it's just whatever is in NZCV.
1260 // This isn't passed explicitly in a register to fcsel/csel.
1261 for (unsigned Idx = 2; Idx < 4; ++Idx) {
1262 Register VReg = MI.getOperand(i: Idx).getReg();
1263 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1264 if (getRegBank(Reg: VReg, MRI, TRI) == &AArch64::FPRRegBank ||
1265 onlyDefinesFP(MI: *DefMI, MRI, TRI))
1266 ++NumFP;
1267 }
1268
1269 // If we have more FP constraints than not, then move everything over to
1270 // FPR.
1271 if (NumFP >= 2)
1272 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
1273
1274 break;
1275 }
1276 case TargetOpcode::G_UNMERGE_VALUES: {
1277 // If the first operand belongs to a FPR register bank, then make sure that
1278 // we preserve that.
1279 if (OpRegBankIdx[0] != PMI_FirstGPR)
1280 break;
1281
1282 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: MI.getNumOperands()-1).getReg());
1283 // UNMERGE into scalars from a vector should always use FPR.
1284 // Likewise if any of the uses are FP instructions.
1285 if (SrcTy.isVector() || SrcTy == LLT::scalar(SizeInBits: 128) ||
1286 any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1287 P: [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
1288 // Set the register bank of every operand to FPR.
1289 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
1290 Idx < NumOperands; ++Idx)
1291 OpRegBankIdx[Idx] = PMI_FirstFPR;
1292 }
1293 break;
1294 }
1295 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1296 // Destination and source need to be FPRs.
1297 OpRegBankIdx[0] = PMI_FirstFPR;
1298 OpRegBankIdx[1] = PMI_FirstFPR;
1299
1300 // Index needs to be a GPR.
1301 OpRegBankIdx[2] = PMI_FirstGPR;
1302 break;
1303 case AArch64::G_SQSHLU_I:
1304 // Destination and source need to be FPRs.
1305 OpRegBankIdx[0] = PMI_FirstFPR;
1306 OpRegBankIdx[1] = PMI_FirstFPR;
1307
1308 // Shift Index needs to be a GPR.
1309 OpRegBankIdx[2] = PMI_FirstGPR;
1310 break;
1311
1312 case TargetOpcode::G_INSERT_VECTOR_ELT:
1313 OpRegBankIdx[0] = PMI_FirstFPR;
1314 OpRegBankIdx[1] = PMI_FirstFPR;
1315
1316 // The element may be either a GPR or FPR. Preserve that behaviour.
1317 if (getRegBank(Reg: MI.getOperand(i: 2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1318 OpRegBankIdx[2] = PMI_FirstFPR;
1319 else {
1320 OpRegBankIdx[2] = PMI_FirstGPR;
1321 }
1322
1323 // Index needs to be a GPR.
1324 OpRegBankIdx[3] = PMI_FirstGPR;
1325 break;
1326 case TargetOpcode::G_EXTRACT: {
1327 // For s128 sources we have to use fpr unless we know otherwise.
1328 auto Src = MI.getOperand(i: 1).getReg();
1329 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
1330 if (SrcTy.getSizeInBits() != 128)
1331 break;
1332 auto Idx = MRI.getRegClassOrNull(Reg: Src) == &AArch64::XSeqPairsClassRegClass
1333 ? PMI_FirstGPR
1334 : PMI_FirstFPR;
1335 OpRegBankIdx[0] = Idx;
1336 OpRegBankIdx[1] = Idx;
1337 break;
1338 }
1339 case TargetOpcode::G_BUILD_VECTOR: {
1340 // If the first source operand belongs to a FPR register bank, then make
1341 // sure that we preserve that.
1342 if (OpRegBankIdx[1] != PMI_FirstGPR)
1343 break;
1344 Register VReg = MI.getOperand(i: 1).getReg();
1345 if (!VReg)
1346 break;
1347
1348 // Get the instruction that defined the source operand reg, and check if
1349 // it's a floating point operation. Or, if it's a type like s16 which
1350 // doesn't have a exact size gpr register class. The exception is if the
1351 // build_vector has all constant operands, which may be better to leave as
1352 // gpr without copies, so it can be matched in imported patterns.
1353 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1354 unsigned DefOpc = DefMI->getOpcode();
1355 const LLT SrcTy = MRI.getType(Reg: VReg);
1356 if (all_of(Range: MI.operands(), P: [&](const MachineOperand &Op) {
1357 return Op.isDef() || MRI.getVRegDef(Reg: Op.getReg())->getOpcode() ==
1358 TargetOpcode::G_CONSTANT;
1359 }))
1360 break;
1361 if (isPreISelGenericFloatingPointOpcode(Opc: DefOpc) ||
1362 SrcTy.getSizeInBits() < 32 ||
1363 getRegBank(Reg: VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1364 // Have a floating point op.
1365 // Make sure every operand gets mapped to a FPR register class.
1366 unsigned NumOperands = MI.getNumOperands();
1367 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1368 OpRegBankIdx[Idx] = PMI_FirstFPR;
1369 }
1370 break;
1371 }
1372 case TargetOpcode::G_VECREDUCE_FADD:
1373 case TargetOpcode::G_VECREDUCE_FMUL:
1374 case TargetOpcode::G_VECREDUCE_FMAX:
1375 case TargetOpcode::G_VECREDUCE_FMIN:
1376 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1377 case TargetOpcode::G_VECREDUCE_FMINIMUM:
1378 case TargetOpcode::G_VECREDUCE_ADD:
1379 case TargetOpcode::G_VECREDUCE_MUL:
1380 case TargetOpcode::G_VECREDUCE_AND:
1381 case TargetOpcode::G_VECREDUCE_OR:
1382 case TargetOpcode::G_VECREDUCE_XOR:
1383 case TargetOpcode::G_VECREDUCE_SMAX:
1384 case TargetOpcode::G_VECREDUCE_SMIN:
1385 case TargetOpcode::G_VECREDUCE_UMAX:
1386 case TargetOpcode::G_VECREDUCE_UMIN:
1387 // Reductions produce a scalar value from a vector, the scalar should be on
1388 // FPR bank.
1389 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1390 break;
1391 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1392 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1393 // These reductions also take a scalar accumulator input.
1394 // Assign them FPR for now.
1395 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1396 break;
1397 case TargetOpcode::G_INTRINSIC:
1398 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1399 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
1400 case Intrinsic::aarch64_neon_fcvtas:
1401 case Intrinsic::aarch64_neon_fcvtau:
1402 case Intrinsic::aarch64_neon_fcvtzs:
1403 case Intrinsic::aarch64_neon_fcvtzu:
1404 case Intrinsic::aarch64_neon_fcvtms:
1405 case Intrinsic::aarch64_neon_fcvtmu:
1406 case Intrinsic::aarch64_neon_fcvtns:
1407 case Intrinsic::aarch64_neon_fcvtnu:
1408 case Intrinsic::aarch64_neon_fcvtps:
1409 case Intrinsic::aarch64_neon_fcvtpu: {
1410 OpRegBankIdx[2] = PMI_FirstFPR;
1411 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector()) {
1412 OpRegBankIdx[0] = PMI_FirstFPR;
1413 break;
1414 }
1415 TypeSize DstSize = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
1416 TypeSize SrcSize = getSizeInBits(Reg: MI.getOperand(i: 2).getReg(), MRI, TRI);
1417 // Fp conversions to i16 must be kept on fp register banks to ensure
1418 // proper saturation, as there are no 16-bit gprs.
1419 // In addition, conversion intrinsics have fpr output when the input
1420 // size matches the output size, or FPRCVT is present.
1421 if (DstSize == 16 ||
1422 ((DstSize == SrcSize || STI.hasFeature(Feature: AArch64::FeatureFPRCVT)) &&
1423 all_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1424 P: [&](const MachineInstr &UseMI) {
1425 return onlyUsesFP(MI: UseMI, MRI, TRI) ||
1426 prefersFPUse(MI: UseMI, MRI, TRI);
1427 })))
1428 OpRegBankIdx[0] = PMI_FirstFPR;
1429 else
1430 OpRegBankIdx[0] = PMI_FirstGPR;
1431 break;
1432 }
1433 case Intrinsic::aarch64_neon_vcvtfxs2fp:
1434 case Intrinsic::aarch64_neon_vcvtfxu2fp:
1435 case Intrinsic::aarch64_neon_vcvtfp2fxs:
1436 case Intrinsic::aarch64_neon_vcvtfp2fxu:
1437 // Override these intrinsics, because they would have a partial
1438 // mapping. This is needed for 'half' types, which otherwise don't
1439 // get legalised correctly.
1440 OpRegBankIdx[0] = PMI_FirstFPR;
1441 OpRegBankIdx[2] = PMI_FirstFPR;
1442 // OpRegBankIdx[1] is the intrinsic ID.
1443 // OpRegBankIdx[3] is an integer immediate.
1444 break;
1445 default: {
1446 // Check if we know that the intrinsic has any constraints on its register
1447 // banks. If it does, then update the mapping accordingly.
1448 unsigned Idx = 0;
1449 if (onlyDefinesFP(MI, MRI, TRI))
1450 for (const auto &Op : MI.defs()) {
1451 if (Op.isReg())
1452 OpRegBankIdx[Idx] = PMI_FirstFPR;
1453 ++Idx;
1454 }
1455 else
1456 Idx += MI.getNumExplicitDefs();
1457
1458 if (onlyUsesFP(MI, MRI, TRI))
1459 for (const auto &Op : MI.explicit_uses()) {
1460 if (Op.isReg())
1461 OpRegBankIdx[Idx] = PMI_FirstFPR;
1462 ++Idx;
1463 }
1464 break;
1465 }
1466 }
1467 break;
1468 }
1469 }
1470
1471 // Finally construct the computed mapping.
1472 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1473 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1474 if (MI.getOperand(i: Idx).isReg() && MI.getOperand(i: Idx).getReg()) {
1475 LLT Ty = MRI.getType(Reg: MI.getOperand(i: Idx).getReg());
1476 if (!Ty.isValid())
1477 continue;
1478 auto Mapping =
1479 getValueMapping(RBIdx: OpRegBankIdx[Idx], Size: TypeSize::getFixed(ExactSize: OpSize[Idx]));
1480 if (!Mapping->isValid())
1481 return getInvalidInstructionMapping();
1482
1483 OpdsMapping[Idx] = Mapping;
1484 }
1485 }
1486
1487 return getInstructionMapping(ID: MappingID, Cost, OperandsMapping: getOperandsMapping(OpdsMapping),
1488 NumOperands);
1489}
1490