1 | //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the InstructionSelector class for |
10 | /// AArch64. |
11 | /// \todo This should be generated by TableGen. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AArch64GlobalISelUtils.h" |
15 | #include "AArch64InstrInfo.h" |
16 | #include "AArch64MachineFunctionInfo.h" |
17 | #include "AArch64RegisterBankInfo.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "AArch64TargetMachine.h" |
21 | #include "MCTargetDesc/AArch64AddressingModes.h" |
22 | #include "MCTargetDesc/AArch64MCTargetDesc.h" |
23 | #include "llvm/BinaryFormat/Dwarf.h" |
24 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
25 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
26 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
27 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
28 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
29 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
30 | #include "llvm/CodeGen/MachineBasicBlock.h" |
31 | #include "llvm/CodeGen/MachineConstantPool.h" |
32 | #include "llvm/CodeGen/MachineFrameInfo.h" |
33 | #include "llvm/CodeGen/MachineFunction.h" |
34 | #include "llvm/CodeGen/MachineInstr.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | #include "llvm/CodeGen/MachineOperand.h" |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
39 | #include "llvm/CodeGen/TargetOpcodes.h" |
40 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
41 | #include "llvm/IR/Constants.h" |
42 | #include "llvm/IR/DerivedTypes.h" |
43 | #include "llvm/IR/Instructions.h" |
44 | #include "llvm/IR/IntrinsicsAArch64.h" |
45 | #include "llvm/IR/PatternMatch.h" |
46 | #include "llvm/IR/Type.h" |
47 | #include "llvm/Pass.h" |
48 | #include "llvm/Support/Debug.h" |
49 | #include "llvm/Support/raw_ostream.h" |
50 | #include <optional> |
51 | |
52 | #define DEBUG_TYPE "aarch64-isel" |
53 | |
54 | using namespace llvm; |
55 | using namespace MIPatternMatch; |
56 | using namespace AArch64GISelUtils; |
57 | |
58 | namespace llvm { |
59 | class BlockFrequencyInfo; |
60 | class ProfileSummaryInfo; |
61 | } |
62 | |
63 | namespace { |
64 | |
65 | #define GET_GLOBALISEL_PREDICATE_BITSET |
66 | #include "AArch64GenGlobalISel.inc" |
67 | #undef GET_GLOBALISEL_PREDICATE_BITSET |
68 | |
69 | |
70 | class AArch64InstructionSelector : public InstructionSelector { |
71 | public: |
72 | AArch64InstructionSelector(const AArch64TargetMachine &TM, |
73 | const AArch64Subtarget &STI, |
74 | const AArch64RegisterBankInfo &RBI); |
75 | |
76 | bool select(MachineInstr &I) override; |
77 | static const char *getName() { return DEBUG_TYPE; } |
78 | |
79 | void setupMF(MachineFunction &MF, GISelKnownBits *KB, |
80 | CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, |
81 | BlockFrequencyInfo *BFI) override { |
82 | InstructionSelector::setupMF(mf&: MF, kb: KB, covinfo: CoverageInfo, psi: PSI, bfi: BFI); |
83 | MIB.setMF(MF); |
84 | |
85 | // hasFnAttribute() is expensive to call on every BRCOND selection, so |
86 | // cache it here for each run of the selector. |
87 | ProduceNonFlagSettingCondBr = |
88 | !MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening); |
89 | MFReturnAddr = Register(); |
90 | |
91 | processPHIs(MF); |
92 | } |
93 | |
94 | private: |
95 | /// tblgen-erated 'select' implementation, used as the initial selector for |
96 | /// the patterns that don't require complex C++. |
97 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; |
98 | |
99 | // A lowering phase that runs before any selection attempts. |
100 | // Returns true if the instruction was modified. |
101 | bool preISelLower(MachineInstr &I); |
102 | |
103 | // An early selection function that runs before the selectImpl() call. |
104 | bool earlySelect(MachineInstr &I); |
105 | |
106 | /// Save state that is shared between select calls, call select on \p I and |
107 | /// then restore the saved state. This can be used to recursively call select |
108 | /// within a select call. |
109 | bool selectAndRestoreState(MachineInstr &I); |
110 | |
111 | // Do some preprocessing of G_PHIs before we begin selection. |
112 | void processPHIs(MachineFunction &MF); |
113 | |
114 | bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
115 | |
116 | /// Eliminate same-sized cross-bank copies into stores before selectImpl(). |
117 | bool contractCrossBankCopyIntoStore(MachineInstr &I, |
118 | MachineRegisterInfo &MRI); |
119 | |
120 | bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); |
121 | |
122 | bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, |
123 | MachineRegisterInfo &MRI) const; |
124 | bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, |
125 | MachineRegisterInfo &MRI) const; |
126 | |
127 | ///@{ |
128 | /// Helper functions for selectCompareBranch. |
129 | bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, |
130 | MachineIRBuilder &MIB) const; |
131 | bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
132 | MachineIRBuilder &MIB) const; |
133 | bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
134 | MachineIRBuilder &MIB) const; |
135 | bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, |
136 | MachineBasicBlock *DstMBB, |
137 | MachineIRBuilder &MIB) const; |
138 | ///@} |
139 | |
140 | bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, |
141 | MachineRegisterInfo &MRI); |
142 | |
143 | bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); |
144 | bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
145 | |
146 | // Helper to generate an equivalent of scalar_to_vector into a new register, |
147 | // returned via 'Dst'. |
148 | MachineInstr *emitScalarToVector(unsigned EltSize, |
149 | const TargetRegisterClass *DstRC, |
150 | Register Scalar, |
151 | MachineIRBuilder &MIRBuilder) const; |
152 | /// Helper to narrow vector that was widened by emitScalarToVector. |
153 | /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit |
154 | /// vector, correspondingly. |
155 | MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg, |
156 | MachineIRBuilder &MIRBuilder, |
157 | MachineRegisterInfo &MRI) const; |
158 | |
159 | /// Emit a lane insert into \p DstReg, or a new vector register if |
160 | /// std::nullopt is provided. |
161 | /// |
162 | /// The lane inserted into is defined by \p LaneIdx. The vector source |
163 | /// register is given by \p SrcReg. The register containing the element is |
164 | /// given by \p EltReg. |
165 | MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg, |
166 | Register EltReg, unsigned LaneIdx, |
167 | const RegisterBank &RB, |
168 | MachineIRBuilder &MIRBuilder) const; |
169 | |
170 | /// Emit a sequence of instructions representing a constant \p CV for a |
171 | /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) |
172 | /// |
173 | /// \returns the last instruction in the sequence on success, and nullptr |
174 | /// otherwise. |
175 | MachineInstr *emitConstantVector(Register Dst, Constant *CV, |
176 | MachineIRBuilder &MIRBuilder, |
177 | MachineRegisterInfo &MRI); |
178 | |
179 | MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits, |
180 | MachineIRBuilder &MIRBuilder); |
181 | |
182 | MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits, |
183 | MachineIRBuilder &MIRBuilder, bool Inv); |
184 | |
185 | MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits, |
186 | MachineIRBuilder &MIRBuilder, bool Inv); |
187 | MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits, |
188 | MachineIRBuilder &MIRBuilder); |
189 | MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits, |
190 | MachineIRBuilder &MIRBuilder, bool Inv); |
191 | MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits, |
192 | MachineIRBuilder &MIRBuilder); |
193 | |
194 | bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, |
195 | MachineRegisterInfo &MRI); |
196 | /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a |
197 | /// SUBREG_TO_REG. |
198 | bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); |
199 | bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); |
200 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
201 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
202 | |
203 | bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); |
204 | bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); |
205 | bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); |
206 | bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); |
207 | |
208 | /// Helper function to select vector load intrinsics like |
209 | /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. |
210 | /// \p Opc is the opcode that the selected instruction should use. |
211 | /// \p NumVecs is the number of vector destinations for the instruction. |
212 | /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. |
213 | bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, |
214 | MachineInstr &I); |
215 | bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs, |
216 | MachineInstr &I); |
217 | void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs, |
218 | unsigned Opc); |
219 | bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs, |
220 | unsigned Opc); |
221 | bool selectIntrinsicWithSideEffects(MachineInstr &I, |
222 | MachineRegisterInfo &MRI); |
223 | bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); |
224 | bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); |
225 | bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); |
226 | bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); |
227 | bool selectPtrAuthGlobalValue(MachineInstr &I, |
228 | MachineRegisterInfo &MRI) const; |
229 | bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); |
230 | bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); |
231 | bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); |
232 | void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs, |
233 | unsigned Opc1, unsigned Opc2, bool isExt); |
234 | |
235 | bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
236 | bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
237 | bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI); |
238 | |
239 | unsigned emitConstantPoolEntry(const Constant *CPVal, |
240 | MachineFunction &MF) const; |
241 | MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, |
242 | MachineIRBuilder &MIRBuilder) const; |
243 | |
244 | // Emit a vector concat operation. |
245 | MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1, |
246 | Register Op2, |
247 | MachineIRBuilder &MIRBuilder) const; |
248 | |
249 | // Emit an integer compare between LHS and RHS, which checks for Predicate. |
250 | MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
251 | MachineOperand &Predicate, |
252 | MachineIRBuilder &MIRBuilder) const; |
253 | |
254 | /// Emit a floating point comparison between \p LHS and \p RHS. |
255 | /// \p Pred if given is the intended predicate to use. |
256 | MachineInstr * |
257 | emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
258 | std::optional<CmpInst::Predicate> = std::nullopt) const; |
259 | |
260 | MachineInstr * |
261 | emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
262 | std::initializer_list<llvm::SrcOp> SrcOps, |
263 | MachineIRBuilder &MIRBuilder, |
264 | const ComplexRendererFns &RenderFns = std::nullopt) const; |
265 | /// Helper function to emit an add or sub instruction. |
266 | /// |
267 | /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above |
268 | /// in a specific order. |
269 | /// |
270 | /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. |
271 | /// |
272 | /// \code |
273 | /// const std::array<std::array<unsigned, 2>, 4> Table { |
274 | /// {{AArch64::ADDXri, AArch64::ADDWri}, |
275 | /// {AArch64::ADDXrs, AArch64::ADDWrs}, |
276 | /// {AArch64::ADDXrr, AArch64::ADDWrr}, |
277 | /// {AArch64::SUBXri, AArch64::SUBWri}, |
278 | /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
279 | /// \endcode |
280 | /// |
281 | /// Each row in the table corresponds to a different addressing mode. Each |
282 | /// column corresponds to a different register size. |
283 | /// |
284 | /// \attention Rows must be structured as follows: |
285 | /// - Row 0: The ri opcode variants |
286 | /// - Row 1: The rs opcode variants |
287 | /// - Row 2: The rr opcode variants |
288 | /// - Row 3: The ri opcode variants for negative immediates |
289 | /// - Row 4: The rx opcode variants |
290 | /// |
291 | /// \attention Columns must be structured as follows: |
292 | /// - Column 0: The 64-bit opcode variants |
293 | /// - Column 1: The 32-bit opcode variants |
294 | /// |
295 | /// \p Dst is the destination register of the binop to emit. |
296 | /// \p LHS is the left-hand operand of the binop to emit. |
297 | /// \p RHS is the right-hand operand of the binop to emit. |
298 | MachineInstr *emitAddSub( |
299 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
300 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
301 | MachineIRBuilder &MIRBuilder) const; |
302 | MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, |
303 | MachineOperand &RHS, |
304 | MachineIRBuilder &MIRBuilder) const; |
305 | MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
306 | MachineIRBuilder &MIRBuilder) const; |
307 | MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
308 | MachineIRBuilder &MIRBuilder) const; |
309 | MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
310 | MachineIRBuilder &MIRBuilder) const; |
311 | MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
312 | MachineIRBuilder &MIRBuilder) const; |
313 | MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
314 | MachineIRBuilder &MIRBuilder) const; |
315 | MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, |
316 | MachineIRBuilder &MIRBuilder) const; |
317 | MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, |
318 | AArch64CC::CondCode CC, |
319 | MachineIRBuilder &MIRBuilder) const; |
320 | MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg, |
321 | const RegisterBank &DstRB, LLT ScalarTy, |
322 | Register VecReg, unsigned LaneIdx, |
323 | MachineIRBuilder &MIRBuilder) const; |
324 | MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, |
325 | AArch64CC::CondCode Pred, |
326 | MachineIRBuilder &MIRBuilder) const; |
327 | /// Emit a CSet for a FP compare. |
328 | /// |
329 | /// \p Dst is expected to be a 32-bit scalar register. |
330 | MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, |
331 | MachineIRBuilder &MIRBuilder) const; |
332 | |
333 | /// Emit an instruction that sets NZCV to the carry-in expected by \p I. |
334 | /// Might elide the instruction if the previous instruction already sets NZCV |
335 | /// correctly. |
336 | MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg); |
337 | |
338 | /// Emit the overflow op for \p Opcode. |
339 | /// |
340 | /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, |
341 | /// G_USUBO, etc. |
342 | std::pair<MachineInstr *, AArch64CC::CondCode> |
343 | emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, |
344 | MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; |
345 | |
346 | bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI); |
347 | |
348 | /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). |
349 | /// In some cases this is even possible with OR operations in the expression. |
350 | MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, |
351 | MachineIRBuilder &MIB) const; |
352 | MachineInstr *emitConditionalComparison(Register LHS, Register RHS, |
353 | CmpInst::Predicate CC, |
354 | AArch64CC::CondCode Predicate, |
355 | AArch64CC::CondCode OutCC, |
356 | MachineIRBuilder &MIB) const; |
357 | MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, |
358 | bool Negate, Register CCOp, |
359 | AArch64CC::CondCode Predicate, |
360 | MachineIRBuilder &MIB) const; |
361 | |
362 | /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. |
363 | /// \p IsNegative is true if the test should be "not zero". |
364 | /// This will also optimize the test bit instruction when possible. |
365 | MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, |
366 | MachineBasicBlock *DstMBB, |
367 | MachineIRBuilder &MIB) const; |
368 | |
369 | /// Emit a CB(N)Z instruction which branches to \p DestMBB. |
370 | MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, |
371 | MachineBasicBlock *DestMBB, |
372 | MachineIRBuilder &MIB) const; |
373 | |
374 | // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. |
375 | // We use these manually instead of using the importer since it doesn't |
376 | // support SDNodeXForm. |
377 | ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; |
378 | ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; |
379 | ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; |
380 | ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; |
381 | |
382 | ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; |
383 | ComplexRendererFns selectArithImmed(MachineOperand &Root) const; |
384 | ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; |
385 | |
386 | ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, |
387 | unsigned Size) const; |
388 | |
389 | ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { |
390 | return selectAddrModeUnscaled(Root, Size: 1); |
391 | } |
392 | ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { |
393 | return selectAddrModeUnscaled(Root, Size: 2); |
394 | } |
395 | ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { |
396 | return selectAddrModeUnscaled(Root, Size: 4); |
397 | } |
398 | ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { |
399 | return selectAddrModeUnscaled(Root, Size: 8); |
400 | } |
401 | ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { |
402 | return selectAddrModeUnscaled(Root, Size: 16); |
403 | } |
404 | |
405 | /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used |
406 | /// from complex pattern matchers like selectAddrModeIndexed(). |
407 | ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, |
408 | MachineRegisterInfo &MRI) const; |
409 | |
410 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, |
411 | unsigned Size) const; |
412 | template <int Width> |
413 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { |
414 | return selectAddrModeIndexed(Root, Size: Width / 8); |
415 | } |
416 | |
417 | std::optional<bool> |
418 | isWorthFoldingIntoAddrMode(MachineInstr &MI, |
419 | const MachineRegisterInfo &MRI) const; |
420 | |
421 | bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, |
422 | const MachineRegisterInfo &MRI, |
423 | bool IsAddrOperand) const; |
424 | ComplexRendererFns |
425 | selectAddrModeShiftedExtendXReg(MachineOperand &Root, |
426 | unsigned SizeInBytes) const; |
427 | |
428 | /// Returns a \p ComplexRendererFns which contains a base, offset, and whether |
429 | /// or not a shift + extend should be folded into an addressing mode. Returns |
430 | /// None when this is not profitable or possible. |
431 | ComplexRendererFns |
432 | selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, |
433 | MachineOperand &Offset, unsigned SizeInBytes, |
434 | bool WantsExt) const; |
435 | ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; |
436 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, |
437 | unsigned SizeInBytes) const; |
438 | template <int Width> |
439 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { |
440 | return selectAddrModeXRO(Root, SizeInBytes: Width / 8); |
441 | } |
442 | |
443 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, |
444 | unsigned SizeInBytes) const; |
445 | template <int Width> |
446 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { |
447 | return selectAddrModeWRO(Root, SizeInBytes: Width / 8); |
448 | } |
449 | |
450 | ComplexRendererFns selectShiftedRegister(MachineOperand &Root, |
451 | bool AllowROR = false) const; |
452 | |
453 | ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { |
454 | return selectShiftedRegister(Root); |
455 | } |
456 | |
457 | ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { |
458 | return selectShiftedRegister(Root, AllowROR: true); |
459 | } |
460 | |
461 | /// Given an extend instruction, determine the correct shift-extend type for |
462 | /// that instruction. |
463 | /// |
464 | /// If the instruction is going to be used in a load or store, pass |
465 | /// \p IsLoadStore = true. |
466 | AArch64_AM::ShiftExtendType |
467 | getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, |
468 | bool IsLoadStore = false) const; |
469 | |
470 | /// Move \p Reg to \p RC if \p Reg is not already on \p RC. |
471 | /// |
472 | /// \returns Either \p Reg if no change was necessary, or the new register |
473 | /// created by moving \p Reg. |
474 | /// |
475 | /// Note: This uses emitCopy right now. |
476 | Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, |
477 | MachineIRBuilder &MIB) const; |
478 | |
479 | ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; |
480 | |
481 | ComplexRendererFns selectExtractHigh(MachineOperand &Root) const; |
482 | |
483 | void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
484 | int OpIdx = -1) const; |
485 | void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, |
486 | int OpIdx = -1) const; |
487 | void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, |
488 | int OpIdx = -1) const; |
489 | void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI, |
490 | int OpIdx) const; |
491 | void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, |
492 | int OpIdx = -1) const; |
493 | void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, |
494 | int OpIdx = -1) const; |
495 | void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, |
496 | int OpIdx = -1) const; |
497 | void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, |
498 | const MachineInstr &MI, |
499 | int OpIdx = -1) const; |
500 | |
501 | // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. |
502 | void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); |
503 | |
504 | // Optimization methods. |
505 | bool tryOptSelect(GSelect &Sel); |
506 | bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); |
507 | MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
508 | MachineOperand &Predicate, |
509 | MachineIRBuilder &MIRBuilder) const; |
510 | |
511 | /// Return true if \p MI is a load or store of \p NumBytes bytes. |
512 | bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; |
513 | |
514 | /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit |
515 | /// register zeroed out. In other words, the result of MI has been explicitly |
516 | /// zero extended. |
517 | bool isDef32(const MachineInstr &MI) const; |
518 | |
519 | const AArch64TargetMachine &TM; |
520 | const AArch64Subtarget &STI; |
521 | const AArch64InstrInfo &TII; |
522 | const AArch64RegisterInfo &TRI; |
523 | const AArch64RegisterBankInfo &RBI; |
524 | |
525 | bool ProduceNonFlagSettingCondBr = false; |
526 | |
527 | // Some cached values used during selection. |
528 | // We use LR as a live-in register, and we keep track of it here as it can be |
529 | // clobbered by calls. |
530 | Register MFReturnAddr; |
531 | |
532 | MachineIRBuilder MIB; |
533 | |
534 | #define GET_GLOBALISEL_PREDICATES_DECL |
535 | #include "AArch64GenGlobalISel.inc" |
536 | #undef GET_GLOBALISEL_PREDICATES_DECL |
537 | |
538 | // We declare the temporaries used by selectImpl() in the class to minimize the |
539 | // cost of constructing placeholder values. |
540 | #define GET_GLOBALISEL_TEMPORARIES_DECL |
541 | #include "AArch64GenGlobalISel.inc" |
542 | #undef GET_GLOBALISEL_TEMPORARIES_DECL |
543 | }; |
544 | |
545 | } // end anonymous namespace |
546 | |
547 | #define GET_GLOBALISEL_IMPL |
548 | #include "AArch64GenGlobalISel.inc" |
549 | #undef GET_GLOBALISEL_IMPL |
550 | |
551 | AArch64InstructionSelector::AArch64InstructionSelector( |
552 | const AArch64TargetMachine &TM, const AArch64Subtarget &STI, |
553 | const AArch64RegisterBankInfo &RBI) |
554 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), |
555 | RBI(RBI), |
556 | #define GET_GLOBALISEL_PREDICATES_INIT |
557 | #include "AArch64GenGlobalISel.inc" |
558 | #undef GET_GLOBALISEL_PREDICATES_INIT |
559 | #define GET_GLOBALISEL_TEMPORARIES_INIT |
560 | #include "AArch64GenGlobalISel.inc" |
561 | #undef GET_GLOBALISEL_TEMPORARIES_INIT |
562 | { |
563 | } |
564 | |
565 | // FIXME: This should be target-independent, inferred from the types declared |
566 | // for each class in the bank. |
567 | // |
568 | /// Given a register bank, and a type, return the smallest register class that |
569 | /// can represent that combination. |
570 | static const TargetRegisterClass * |
571 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, |
572 | bool GetAllRegSet = false) { |
573 | if (RB.getID() == AArch64::GPRRegBankID) { |
574 | if (Ty.getSizeInBits() <= 32) |
575 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
576 | : &AArch64::GPR32RegClass; |
577 | if (Ty.getSizeInBits() == 64) |
578 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
579 | : &AArch64::GPR64RegClass; |
580 | if (Ty.getSizeInBits() == 128) |
581 | return &AArch64::XSeqPairsClassRegClass; |
582 | return nullptr; |
583 | } |
584 | |
585 | if (RB.getID() == AArch64::FPRRegBankID) { |
586 | switch (Ty.getSizeInBits()) { |
587 | case 8: |
588 | return &AArch64::FPR8RegClass; |
589 | case 16: |
590 | return &AArch64::FPR16RegClass; |
591 | case 32: |
592 | return &AArch64::FPR32RegClass; |
593 | case 64: |
594 | return &AArch64::FPR64RegClass; |
595 | case 128: |
596 | return &AArch64::FPR128RegClass; |
597 | } |
598 | return nullptr; |
599 | } |
600 | |
601 | return nullptr; |
602 | } |
603 | |
604 | /// Given a register bank, and size in bits, return the smallest register class |
605 | /// that can represent that combination. |
606 | static const TargetRegisterClass * |
607 | getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, |
608 | bool GetAllRegSet = false) { |
609 | if (SizeInBits.isScalable()) { |
610 | assert(RB.getID() == AArch64::FPRRegBankID && |
611 | "Expected FPR regbank for scalable type size" ); |
612 | return &AArch64::ZPRRegClass; |
613 | } |
614 | |
615 | unsigned RegBankID = RB.getID(); |
616 | |
617 | if (RegBankID == AArch64::GPRRegBankID) { |
618 | if (SizeInBits <= 32) |
619 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
620 | : &AArch64::GPR32RegClass; |
621 | if (SizeInBits == 64) |
622 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
623 | : &AArch64::GPR64RegClass; |
624 | if (SizeInBits == 128) |
625 | return &AArch64::XSeqPairsClassRegClass; |
626 | } |
627 | |
628 | if (RegBankID == AArch64::FPRRegBankID) { |
629 | switch (SizeInBits) { |
630 | default: |
631 | return nullptr; |
632 | case 8: |
633 | return &AArch64::FPR8RegClass; |
634 | case 16: |
635 | return &AArch64::FPR16RegClass; |
636 | case 32: |
637 | return &AArch64::FPR32RegClass; |
638 | case 64: |
639 | return &AArch64::FPR64RegClass; |
640 | case 128: |
641 | return &AArch64::FPR128RegClass; |
642 | } |
643 | } |
644 | |
645 | return nullptr; |
646 | } |
647 | |
648 | /// Returns the correct subregister to use for a given register class. |
649 | static bool getSubRegForClass(const TargetRegisterClass *RC, |
650 | const TargetRegisterInfo &TRI, unsigned &SubReg) { |
651 | switch (TRI.getRegSizeInBits(RC: *RC)) { |
652 | case 8: |
653 | SubReg = AArch64::bsub; |
654 | break; |
655 | case 16: |
656 | SubReg = AArch64::hsub; |
657 | break; |
658 | case 32: |
659 | if (RC != &AArch64::FPR32RegClass) |
660 | SubReg = AArch64::sub_32; |
661 | else |
662 | SubReg = AArch64::ssub; |
663 | break; |
664 | case 64: |
665 | SubReg = AArch64::dsub; |
666 | break; |
667 | default: |
668 | LLVM_DEBUG( |
669 | dbgs() << "Couldn't find appropriate subregister for register class." ); |
670 | return false; |
671 | } |
672 | |
673 | return true; |
674 | } |
675 | |
676 | /// Returns the minimum size the given register bank can hold. |
677 | static unsigned getMinSizeForRegBank(const RegisterBank &RB) { |
678 | switch (RB.getID()) { |
679 | case AArch64::GPRRegBankID: |
680 | return 32; |
681 | case AArch64::FPRRegBankID: |
682 | return 8; |
683 | default: |
684 | llvm_unreachable("Tried to get minimum size for unknown register bank." ); |
685 | } |
686 | } |
687 | |
688 | /// Create a REG_SEQUENCE instruction using the registers in \p Regs. |
689 | /// Helper function for functions like createDTuple and createQTuple. |
690 | /// |
691 | /// \p RegClassIDs - The list of register class IDs available for some tuple of |
692 | /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is |
693 | /// expected to contain between 2 and 4 tuple classes. |
694 | /// |
695 | /// \p SubRegs - The list of subregister classes associated with each register |
696 | /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 |
697 | /// subregister class. The index of each subregister class is expected to |
698 | /// correspond with the index of each register class. |
699 | /// |
700 | /// \returns Either the destination register of REG_SEQUENCE instruction that |
701 | /// was created, or the 0th element of \p Regs if \p Regs contains a single |
702 | /// element. |
703 | static Register createTuple(ArrayRef<Register> Regs, |
704 | const unsigned RegClassIDs[], |
705 | const unsigned SubRegs[], MachineIRBuilder &MIB) { |
706 | unsigned NumRegs = Regs.size(); |
707 | if (NumRegs == 1) |
708 | return Regs[0]; |
709 | assert(NumRegs >= 2 && NumRegs <= 4 && |
710 | "Only support between two and 4 registers in a tuple!" ); |
711 | const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); |
712 | auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - 2]); |
713 | auto RegSequence = |
714 | MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {}); |
715 | for (unsigned I = 0, E = Regs.size(); I < E; ++I) { |
716 | RegSequence.addUse(RegNo: Regs[I]); |
717 | RegSequence.addImm(Val: SubRegs[I]); |
718 | } |
719 | return RegSequence.getReg(Idx: 0); |
720 | } |
721 | |
722 | /// Create a tuple of D-registers using the registers in \p Regs. |
723 | static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
724 | static const unsigned RegClassIDs[] = { |
725 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; |
726 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, |
727 | AArch64::dsub2, AArch64::dsub3}; |
728 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
729 | } |
730 | |
731 | /// Create a tuple of Q-registers using the registers in \p Regs. |
732 | static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
733 | static const unsigned RegClassIDs[] = { |
734 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; |
735 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, |
736 | AArch64::qsub2, AArch64::qsub3}; |
737 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
738 | } |
739 | |
740 | static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { |
741 | auto &MI = *Root.getParent(); |
742 | auto &MBB = *MI.getParent(); |
743 | auto &MF = *MBB.getParent(); |
744 | auto &MRI = MF.getRegInfo(); |
745 | uint64_t Immed; |
746 | if (Root.isImm()) |
747 | Immed = Root.getImm(); |
748 | else if (Root.isCImm()) |
749 | Immed = Root.getCImm()->getZExtValue(); |
750 | else if (Root.isReg()) { |
751 | auto ValAndVReg = |
752 | getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true); |
753 | if (!ValAndVReg) |
754 | return std::nullopt; |
755 | Immed = ValAndVReg->Value.getSExtValue(); |
756 | } else |
757 | return std::nullopt; |
758 | return Immed; |
759 | } |
760 | |
761 | /// Check whether \p I is a currently unsupported binary operation: |
762 | /// - it has an unsized type |
763 | /// - an operand is not a vreg |
764 | /// - all operands are not in the same bank |
765 | /// These are checks that should someday live in the verifier, but right now, |
766 | /// these are mostly limitations of the aarch64 selector. |
767 | static bool unsupportedBinOp(const MachineInstr &I, |
768 | const AArch64RegisterBankInfo &RBI, |
769 | const MachineRegisterInfo &MRI, |
770 | const AArch64RegisterInfo &TRI) { |
771 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
772 | if (!Ty.isValid()) { |
773 | LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n" ); |
774 | return true; |
775 | } |
776 | |
777 | const RegisterBank *PrevOpBank = nullptr; |
778 | for (auto &MO : I.operands()) { |
779 | // FIXME: Support non-register operands. |
780 | if (!MO.isReg()) { |
781 | LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n" ); |
782 | return true; |
783 | } |
784 | |
785 | // FIXME: Can generic operations have physical registers operands? If |
786 | // so, this will need to be taught about that, and we'll need to get the |
787 | // bank out of the minimal class for the register. |
788 | // Either way, this needs to be documented (and possibly verified). |
789 | if (!MO.getReg().isVirtual()) { |
790 | LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n" ); |
791 | return true; |
792 | } |
793 | |
794 | const RegisterBank *OpBank = RBI.getRegBank(Reg: MO.getReg(), MRI, TRI); |
795 | if (!OpBank) { |
796 | LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n" ); |
797 | return true; |
798 | } |
799 | |
800 | if (PrevOpBank && OpBank != PrevOpBank) { |
801 | LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n" ); |
802 | return true; |
803 | } |
804 | PrevOpBank = OpBank; |
805 | } |
806 | return false; |
807 | } |
808 | |
809 | /// Select the AArch64 opcode for the basic binary operation \p GenericOpc |
810 | /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID |
811 | /// and of size \p OpSize. |
812 | /// \returns \p GenericOpc if the combination is unsupported. |
813 | static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, |
814 | unsigned OpSize) { |
815 | switch (RegBankID) { |
816 | case AArch64::GPRRegBankID: |
817 | if (OpSize == 32) { |
818 | switch (GenericOpc) { |
819 | case TargetOpcode::G_SHL: |
820 | return AArch64::LSLVWr; |
821 | case TargetOpcode::G_LSHR: |
822 | return AArch64::LSRVWr; |
823 | case TargetOpcode::G_ASHR: |
824 | return AArch64::ASRVWr; |
825 | default: |
826 | return GenericOpc; |
827 | } |
828 | } else if (OpSize == 64) { |
829 | switch (GenericOpc) { |
830 | case TargetOpcode::G_PTR_ADD: |
831 | return AArch64::ADDXrr; |
832 | case TargetOpcode::G_SHL: |
833 | return AArch64::LSLVXr; |
834 | case TargetOpcode::G_LSHR: |
835 | return AArch64::LSRVXr; |
836 | case TargetOpcode::G_ASHR: |
837 | return AArch64::ASRVXr; |
838 | default: |
839 | return GenericOpc; |
840 | } |
841 | } |
842 | break; |
843 | case AArch64::FPRRegBankID: |
844 | switch (OpSize) { |
845 | case 32: |
846 | switch (GenericOpc) { |
847 | case TargetOpcode::G_FADD: |
848 | return AArch64::FADDSrr; |
849 | case TargetOpcode::G_FSUB: |
850 | return AArch64::FSUBSrr; |
851 | case TargetOpcode::G_FMUL: |
852 | return AArch64::FMULSrr; |
853 | case TargetOpcode::G_FDIV: |
854 | return AArch64::FDIVSrr; |
855 | default: |
856 | return GenericOpc; |
857 | } |
858 | case 64: |
859 | switch (GenericOpc) { |
860 | case TargetOpcode::G_FADD: |
861 | return AArch64::FADDDrr; |
862 | case TargetOpcode::G_FSUB: |
863 | return AArch64::FSUBDrr; |
864 | case TargetOpcode::G_FMUL: |
865 | return AArch64::FMULDrr; |
866 | case TargetOpcode::G_FDIV: |
867 | return AArch64::FDIVDrr; |
868 | case TargetOpcode::G_OR: |
869 | return AArch64::ORRv8i8; |
870 | default: |
871 | return GenericOpc; |
872 | } |
873 | } |
874 | break; |
875 | } |
876 | return GenericOpc; |
877 | } |
878 | |
879 | /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, |
880 | /// appropriate for the (value) register bank \p RegBankID and of memory access |
881 | /// size \p OpSize. This returns the variant with the base+unsigned-immediate |
882 | /// addressing mode (e.g., LDRXui). |
883 | /// \returns \p GenericOpc if the combination is unsupported. |
884 | static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, |
885 | unsigned OpSize) { |
886 | const bool isStore = GenericOpc == TargetOpcode::G_STORE; |
887 | switch (RegBankID) { |
888 | case AArch64::GPRRegBankID: |
889 | switch (OpSize) { |
890 | case 8: |
891 | return isStore ? AArch64::STRBBui : AArch64::LDRBBui; |
892 | case 16: |
893 | return isStore ? AArch64::STRHHui : AArch64::LDRHHui; |
894 | case 32: |
895 | return isStore ? AArch64::STRWui : AArch64::LDRWui; |
896 | case 64: |
897 | return isStore ? AArch64::STRXui : AArch64::LDRXui; |
898 | } |
899 | break; |
900 | case AArch64::FPRRegBankID: |
901 | switch (OpSize) { |
902 | case 8: |
903 | return isStore ? AArch64::STRBui : AArch64::LDRBui; |
904 | case 16: |
905 | return isStore ? AArch64::STRHui : AArch64::LDRHui; |
906 | case 32: |
907 | return isStore ? AArch64::STRSui : AArch64::LDRSui; |
908 | case 64: |
909 | return isStore ? AArch64::STRDui : AArch64::LDRDui; |
910 | case 128: |
911 | return isStore ? AArch64::STRQui : AArch64::LDRQui; |
912 | } |
913 | break; |
914 | } |
915 | return GenericOpc; |
916 | } |
917 | |
918 | /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg |
919 | /// to \p *To. |
920 | /// |
921 | /// E.g "To = COPY SrcReg:SubReg" |
922 | static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, |
923 | const RegisterBankInfo &RBI, Register SrcReg, |
924 | const TargetRegisterClass *To, unsigned SubReg) { |
925 | assert(SrcReg.isValid() && "Expected a valid source register?" ); |
926 | assert(To && "Destination register class cannot be null" ); |
927 | assert(SubReg && "Expected a valid subregister" ); |
928 | |
929 | MachineIRBuilder MIB(I); |
930 | auto SubRegCopy = |
931 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: 0, SubReg); |
932 | MachineOperand &RegOp = I.getOperand(i: 1); |
933 | RegOp.setReg(SubRegCopy.getReg(Idx: 0)); |
934 | |
935 | // It's possible that the destination register won't be constrained. Make |
936 | // sure that happens. |
937 | if (!I.getOperand(i: 0).getReg().isPhysical()) |
938 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: *To, MRI); |
939 | |
940 | return true; |
941 | } |
942 | |
943 | /// Helper function to get the source and destination register classes for a |
944 | /// copy. Returns a std::pair containing the source register class for the |
945 | /// copy, and the destination register class for the copy. If a register class |
946 | /// cannot be determined, then it will be nullptr. |
947 | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> |
948 | getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, |
949 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
950 | const RegisterBankInfo &RBI) { |
951 | Register DstReg = I.getOperand(i: 0).getReg(); |
952 | Register SrcReg = I.getOperand(i: 1).getReg(); |
953 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
954 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
955 | |
956 | TypeSize DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI); |
957 | TypeSize SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI); |
958 | |
959 | // Special casing for cross-bank copies of s1s. We can technically represent |
960 | // a 1-bit value with any size of register. The minimum size for a GPR is 32 |
961 | // bits. So, we need to put the FPR on 32 bits as well. |
962 | // |
963 | // FIXME: I'm not sure if this case holds true outside of copies. If it does, |
964 | // then we can pull it into the helpers that get the appropriate class for a |
965 | // register bank. Or make a new helper that carries along some constraint |
966 | // information. |
967 | if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) |
968 | SrcSize = DstSize = TypeSize::getFixed(ExactSize: 32); |
969 | |
970 | return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true), |
971 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)}; |
972 | } |
973 | |
974 | // FIXME: We need some sort of API in RBI/TRI to allow generic code to |
975 | // constrain operands of simple instructions given a TargetRegisterClass |
976 | // and LLT |
977 | static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, |
978 | const RegisterBankInfo &RBI) { |
979 | for (MachineOperand &MO : I.operands()) { |
980 | if (!MO.isReg()) |
981 | continue; |
982 | Register Reg = MO.getReg(); |
983 | if (!Reg) |
984 | continue; |
985 | if (Reg.isPhysical()) |
986 | continue; |
987 | LLT Ty = MRI.getType(Reg); |
988 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); |
989 | const TargetRegisterClass *RC = |
990 | RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); |
991 | if (!RC) { |
992 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); |
993 | RC = getRegClassForTypeOnBank(Ty, RB); |
994 | if (!RC) { |
995 | LLVM_DEBUG( |
996 | dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n" ); |
997 | break; |
998 | } |
999 | } |
1000 | RBI.constrainGenericRegister(Reg, RC: *RC, MRI); |
1001 | } |
1002 | |
1003 | return true; |
1004 | } |
1005 | |
1006 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, |
1007 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
1008 | const RegisterBankInfo &RBI) { |
1009 | Register DstReg = I.getOperand(i: 0).getReg(); |
1010 | Register SrcReg = I.getOperand(i: 1).getReg(); |
1011 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
1012 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
1013 | |
1014 | // Find the correct register classes for the source and destination registers. |
1015 | const TargetRegisterClass *SrcRC; |
1016 | const TargetRegisterClass *DstRC; |
1017 | std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); |
1018 | |
1019 | if (!DstRC) { |
1020 | LLVM_DEBUG(dbgs() << "Unexpected dest size " |
1021 | << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); |
1022 | return false; |
1023 | } |
1024 | |
1025 | // Is this a copy? If so, then we may need to insert a subregister copy. |
1026 | if (I.isCopy()) { |
1027 | // Yes. Check if there's anything to fix up. |
1028 | if (!SrcRC) { |
1029 | LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n" ); |
1030 | return false; |
1031 | } |
1032 | |
1033 | const TypeSize SrcSize = TRI.getRegSizeInBits(RC: *SrcRC); |
1034 | const TypeSize DstSize = TRI.getRegSizeInBits(RC: *DstRC); |
1035 | unsigned SubReg; |
1036 | |
1037 | // If the source bank doesn't support a subregister copy small enough, |
1038 | // then we first need to copy to the destination bank. |
1039 | if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) { |
1040 | const TargetRegisterClass *DstTempRC = |
1041 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, /* GetAllRegSet */ true); |
1042 | getSubRegForClass(RC: DstRC, TRI, SubReg); |
1043 | |
1044 | MachineIRBuilder MIB(I); |
1045 | auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg}); |
1046 | copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: 0), To: DstRC, SubReg); |
1047 | } else if (SrcSize > DstSize) { |
1048 | // If the source register is bigger than the destination we need to |
1049 | // perform a subregister copy. |
1050 | const TargetRegisterClass *SubRegRC = |
1051 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1052 | getSubRegForClass(RC: SubRegRC, TRI, SubReg); |
1053 | copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg); |
1054 | } else if (DstSize > SrcSize) { |
1055 | // If the destination register is bigger than the source we need to do |
1056 | // a promotion using SUBREG_TO_REG. |
1057 | const TargetRegisterClass *PromotionRC = |
1058 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1059 | getSubRegForClass(RC: SrcRC, TRI, SubReg); |
1060 | |
1061 | Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC); |
1062 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1063 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: PromoteReg) |
1064 | .addImm(Val: 0) |
1065 | .addUse(RegNo: SrcReg) |
1066 | .addImm(Val: SubReg); |
1067 | MachineOperand &RegOp = I.getOperand(i: 1); |
1068 | RegOp.setReg(PromoteReg); |
1069 | } |
1070 | |
1071 | // If the destination is a physical register, then there's nothing to |
1072 | // change, so we're done. |
1073 | if (DstReg.isPhysical()) |
1074 | return true; |
1075 | } |
1076 | |
1077 | // No need to constrain SrcReg. It will get constrained when we hit another |
1078 | // of its use or its defs. Copies do not have constraints. |
1079 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
1080 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
1081 | << " operand\n" ); |
1082 | return false; |
1083 | } |
1084 | |
1085 | // If this a GPR ZEXT that we want to just reduce down into a copy. |
1086 | // The sizes will be mismatched with the source < 32b but that's ok. |
1087 | if (I.getOpcode() == TargetOpcode::G_ZEXT) { |
1088 | I.setDesc(TII.get(Opcode: AArch64::COPY)); |
1089 | assert(SrcRegBank.getID() == AArch64::GPRRegBankID); |
1090 | return selectCopy(I, TII, MRI, TRI, RBI); |
1091 | } |
1092 | |
1093 | I.setDesc(TII.get(Opcode: AArch64::COPY)); |
1094 | return true; |
1095 | } |
1096 | |
1097 | static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { |
1098 | if (!DstTy.isScalar() || !SrcTy.isScalar()) |
1099 | return GenericOpc; |
1100 | |
1101 | const unsigned DstSize = DstTy.getSizeInBits(); |
1102 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
1103 | |
1104 | switch (DstSize) { |
1105 | case 32: |
1106 | switch (SrcSize) { |
1107 | case 32: |
1108 | switch (GenericOpc) { |
1109 | case TargetOpcode::G_SITOFP: |
1110 | return AArch64::SCVTFUWSri; |
1111 | case TargetOpcode::G_UITOFP: |
1112 | return AArch64::UCVTFUWSri; |
1113 | case TargetOpcode::G_FPTOSI: |
1114 | return AArch64::FCVTZSUWSr; |
1115 | case TargetOpcode::G_FPTOUI: |
1116 | return AArch64::FCVTZUUWSr; |
1117 | default: |
1118 | return GenericOpc; |
1119 | } |
1120 | case 64: |
1121 | switch (GenericOpc) { |
1122 | case TargetOpcode::G_SITOFP: |
1123 | return AArch64::SCVTFUXSri; |
1124 | case TargetOpcode::G_UITOFP: |
1125 | return AArch64::UCVTFUXSri; |
1126 | case TargetOpcode::G_FPTOSI: |
1127 | return AArch64::FCVTZSUWDr; |
1128 | case TargetOpcode::G_FPTOUI: |
1129 | return AArch64::FCVTZUUWDr; |
1130 | default: |
1131 | return GenericOpc; |
1132 | } |
1133 | default: |
1134 | return GenericOpc; |
1135 | } |
1136 | case 64: |
1137 | switch (SrcSize) { |
1138 | case 32: |
1139 | switch (GenericOpc) { |
1140 | case TargetOpcode::G_SITOFP: |
1141 | return AArch64::SCVTFUWDri; |
1142 | case TargetOpcode::G_UITOFP: |
1143 | return AArch64::UCVTFUWDri; |
1144 | case TargetOpcode::G_FPTOSI: |
1145 | return AArch64::FCVTZSUXSr; |
1146 | case TargetOpcode::G_FPTOUI: |
1147 | return AArch64::FCVTZUUXSr; |
1148 | default: |
1149 | return GenericOpc; |
1150 | } |
1151 | case 64: |
1152 | switch (GenericOpc) { |
1153 | case TargetOpcode::G_SITOFP: |
1154 | return AArch64::SCVTFUXDri; |
1155 | case TargetOpcode::G_UITOFP: |
1156 | return AArch64::UCVTFUXDri; |
1157 | case TargetOpcode::G_FPTOSI: |
1158 | return AArch64::FCVTZSUXDr; |
1159 | case TargetOpcode::G_FPTOUI: |
1160 | return AArch64::FCVTZUUXDr; |
1161 | default: |
1162 | return GenericOpc; |
1163 | } |
1164 | default: |
1165 | return GenericOpc; |
1166 | } |
1167 | default: |
1168 | return GenericOpc; |
1169 | }; |
1170 | return GenericOpc; |
1171 | } |
1172 | |
1173 | MachineInstr * |
1174 | AArch64InstructionSelector::emitSelect(Register Dst, Register True, |
1175 | Register False, AArch64CC::CondCode CC, |
1176 | MachineIRBuilder &MIB) const { |
1177 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1178 | assert(RBI.getRegBank(False, MRI, TRI)->getID() == |
1179 | RBI.getRegBank(True, MRI, TRI)->getID() && |
1180 | "Expected both select operands to have the same regbank?" ); |
1181 | LLT Ty = MRI.getType(Reg: True); |
1182 | if (Ty.isVector()) |
1183 | return nullptr; |
1184 | const unsigned Size = Ty.getSizeInBits(); |
1185 | assert((Size == 32 || Size == 64) && |
1186 | "Expected 32 bit or 64 bit select only?" ); |
1187 | const bool Is32Bit = Size == 32; |
1188 | if (RBI.getRegBank(Reg: True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { |
1189 | unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; |
1190 | auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1191 | constrainSelectedInstRegOperands(I&: *FCSel, TII, TRI, RBI); |
1192 | return &*FCSel; |
1193 | } |
1194 | |
1195 | // By default, we'll try and emit a CSEL. |
1196 | unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; |
1197 | bool Optimized = false; |
1198 | auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, |
1199 | &Optimized](Register &Reg, Register &OtherReg, |
1200 | bool Invert) { |
1201 | if (Optimized) |
1202 | return false; |
1203 | |
1204 | // Attempt to fold: |
1205 | // |
1206 | // %sub = G_SUB 0, %x |
1207 | // %select = G_SELECT cc, %reg, %sub |
1208 | // |
1209 | // Into: |
1210 | // %select = CSNEG %reg, %x, cc |
1211 | Register MatchReg; |
1212 | if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) { |
1213 | Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; |
1214 | Reg = MatchReg; |
1215 | if (Invert) { |
1216 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1217 | std::swap(a&: Reg, b&: OtherReg); |
1218 | } |
1219 | return true; |
1220 | } |
1221 | |
1222 | // Attempt to fold: |
1223 | // |
1224 | // %xor = G_XOR %x, -1 |
1225 | // %select = G_SELECT cc, %reg, %xor |
1226 | // |
1227 | // Into: |
1228 | // %select = CSINV %reg, %x, cc |
1229 | if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) { |
1230 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1231 | Reg = MatchReg; |
1232 | if (Invert) { |
1233 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1234 | std::swap(a&: Reg, b&: OtherReg); |
1235 | } |
1236 | return true; |
1237 | } |
1238 | |
1239 | // Attempt to fold: |
1240 | // |
1241 | // %add = G_ADD %x, 1 |
1242 | // %select = G_SELECT cc, %reg, %add |
1243 | // |
1244 | // Into: |
1245 | // %select = CSINC %reg, %x, cc |
1246 | if (mi_match(R: Reg, MRI, |
1247 | P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1)), |
1248 | preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1))))) { |
1249 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1250 | Reg = MatchReg; |
1251 | if (Invert) { |
1252 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1253 | std::swap(a&: Reg, b&: OtherReg); |
1254 | } |
1255 | return true; |
1256 | } |
1257 | |
1258 | return false; |
1259 | }; |
1260 | |
1261 | // Helper lambda which tries to use CSINC/CSINV for the instruction when its |
1262 | // true/false values are constants. |
1263 | // FIXME: All of these patterns already exist in tablegen. We should be |
1264 | // able to import these. |
1265 | auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, |
1266 | &Optimized]() { |
1267 | if (Optimized) |
1268 | return false; |
1269 | auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI); |
1270 | auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI); |
1271 | if (!TrueCst && !FalseCst) |
1272 | return false; |
1273 | |
1274 | Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; |
1275 | if (TrueCst && FalseCst) { |
1276 | int64_t T = TrueCst->Value.getSExtValue(); |
1277 | int64_t F = FalseCst->Value.getSExtValue(); |
1278 | |
1279 | if (T == 0 && F == 1) { |
1280 | // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc |
1281 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1282 | True = ZReg; |
1283 | False = ZReg; |
1284 | return true; |
1285 | } |
1286 | |
1287 | if (T == 0 && F == -1) { |
1288 | // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc |
1289 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1290 | True = ZReg; |
1291 | False = ZReg; |
1292 | return true; |
1293 | } |
1294 | } |
1295 | |
1296 | if (TrueCst) { |
1297 | int64_t T = TrueCst->Value.getSExtValue(); |
1298 | if (T == 1) { |
1299 | // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc |
1300 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1301 | True = False; |
1302 | False = ZReg; |
1303 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1304 | return true; |
1305 | } |
1306 | |
1307 | if (T == -1) { |
1308 | // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc |
1309 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1310 | True = False; |
1311 | False = ZReg; |
1312 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1313 | return true; |
1314 | } |
1315 | } |
1316 | |
1317 | if (FalseCst) { |
1318 | int64_t F = FalseCst->Value.getSExtValue(); |
1319 | if (F == 1) { |
1320 | // G_SELECT cc, t, 1 -> CSINC t, zreg, cc |
1321 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1322 | False = ZReg; |
1323 | return true; |
1324 | } |
1325 | |
1326 | if (F == -1) { |
1327 | // G_SELECT cc, t, -1 -> CSINC t, zreg, cc |
1328 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1329 | False = ZReg; |
1330 | return true; |
1331 | } |
1332 | } |
1333 | return false; |
1334 | }; |
1335 | |
1336 | Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); |
1337 | Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); |
1338 | Optimized |= TryOptSelectCst(); |
1339 | auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1340 | constrainSelectedInstRegOperands(I&: *SelectInst, TII, TRI, RBI); |
1341 | return &*SelectInst; |
1342 | } |
1343 | |
1344 | static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { |
1345 | switch (P) { |
1346 | default: |
1347 | llvm_unreachable("Unknown condition code!" ); |
1348 | case CmpInst::ICMP_NE: |
1349 | return AArch64CC::NE; |
1350 | case CmpInst::ICMP_EQ: |
1351 | return AArch64CC::EQ; |
1352 | case CmpInst::ICMP_SGT: |
1353 | return AArch64CC::GT; |
1354 | case CmpInst::ICMP_SGE: |
1355 | return AArch64CC::GE; |
1356 | case CmpInst::ICMP_SLT: |
1357 | return AArch64CC::LT; |
1358 | case CmpInst::ICMP_SLE: |
1359 | return AArch64CC::LE; |
1360 | case CmpInst::ICMP_UGT: |
1361 | return AArch64CC::HI; |
1362 | case CmpInst::ICMP_UGE: |
1363 | return AArch64CC::HS; |
1364 | case CmpInst::ICMP_ULT: |
1365 | return AArch64CC::LO; |
1366 | case CmpInst::ICMP_ULE: |
1367 | return AArch64CC::LS; |
1368 | } |
1369 | } |
1370 | |
1371 | /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. |
1372 | static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, |
1373 | AArch64CC::CondCode &CondCode, |
1374 | AArch64CC::CondCode &CondCode2) { |
1375 | CondCode2 = AArch64CC::AL; |
1376 | switch (CC) { |
1377 | default: |
1378 | llvm_unreachable("Unknown FP condition!" ); |
1379 | case CmpInst::FCMP_OEQ: |
1380 | CondCode = AArch64CC::EQ; |
1381 | break; |
1382 | case CmpInst::FCMP_OGT: |
1383 | CondCode = AArch64CC::GT; |
1384 | break; |
1385 | case CmpInst::FCMP_OGE: |
1386 | CondCode = AArch64CC::GE; |
1387 | break; |
1388 | case CmpInst::FCMP_OLT: |
1389 | CondCode = AArch64CC::MI; |
1390 | break; |
1391 | case CmpInst::FCMP_OLE: |
1392 | CondCode = AArch64CC::LS; |
1393 | break; |
1394 | case CmpInst::FCMP_ONE: |
1395 | CondCode = AArch64CC::MI; |
1396 | CondCode2 = AArch64CC::GT; |
1397 | break; |
1398 | case CmpInst::FCMP_ORD: |
1399 | CondCode = AArch64CC::VC; |
1400 | break; |
1401 | case CmpInst::FCMP_UNO: |
1402 | CondCode = AArch64CC::VS; |
1403 | break; |
1404 | case CmpInst::FCMP_UEQ: |
1405 | CondCode = AArch64CC::EQ; |
1406 | CondCode2 = AArch64CC::VS; |
1407 | break; |
1408 | case CmpInst::FCMP_UGT: |
1409 | CondCode = AArch64CC::HI; |
1410 | break; |
1411 | case CmpInst::FCMP_UGE: |
1412 | CondCode = AArch64CC::PL; |
1413 | break; |
1414 | case CmpInst::FCMP_ULT: |
1415 | CondCode = AArch64CC::LT; |
1416 | break; |
1417 | case CmpInst::FCMP_ULE: |
1418 | CondCode = AArch64CC::LE; |
1419 | break; |
1420 | case CmpInst::FCMP_UNE: |
1421 | CondCode = AArch64CC::NE; |
1422 | break; |
1423 | } |
1424 | } |
1425 | |
1426 | /// Convert an IR fp condition code to an AArch64 CC. |
1427 | /// This differs from changeFPCCToAArch64CC in that it returns cond codes that |
1428 | /// should be AND'ed instead of OR'ed. |
1429 | static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, |
1430 | AArch64CC::CondCode &CondCode, |
1431 | AArch64CC::CondCode &CondCode2) { |
1432 | CondCode2 = AArch64CC::AL; |
1433 | switch (CC) { |
1434 | default: |
1435 | changeFPCCToORAArch64CC(CC, CondCode, CondCode2); |
1436 | assert(CondCode2 == AArch64CC::AL); |
1437 | break; |
1438 | case CmpInst::FCMP_ONE: |
1439 | // (a one b) |
1440 | // == ((a olt b) || (a ogt b)) |
1441 | // == ((a ord b) && (a une b)) |
1442 | CondCode = AArch64CC::VC; |
1443 | CondCode2 = AArch64CC::NE; |
1444 | break; |
1445 | case CmpInst::FCMP_UEQ: |
1446 | // (a ueq b) |
1447 | // == ((a uno b) || (a oeq b)) |
1448 | // == ((a ule b) && (a uge b)) |
1449 | CondCode = AArch64CC::PL; |
1450 | CondCode2 = AArch64CC::LE; |
1451 | break; |
1452 | } |
1453 | } |
1454 | |
1455 | /// Return a register which can be used as a bit to test in a TB(N)Z. |
1456 | static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, |
1457 | MachineRegisterInfo &MRI) { |
1458 | assert(Reg.isValid() && "Expected valid register!" ); |
1459 | bool HasZext = false; |
1460 | while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { |
1461 | unsigned Opc = MI->getOpcode(); |
1462 | |
1463 | if (!MI->getOperand(i: 0).isReg() || |
1464 | !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg())) |
1465 | break; |
1466 | |
1467 | // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. |
1468 | // |
1469 | // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number |
1470 | // on the truncated x is the same as the bit number on x. |
1471 | if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || |
1472 | Opc == TargetOpcode::G_TRUNC) { |
1473 | if (Opc == TargetOpcode::G_ZEXT) |
1474 | HasZext = true; |
1475 | |
1476 | Register NextReg = MI->getOperand(i: 1).getReg(); |
1477 | // Did we find something worth folding? |
1478 | if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(RegNo: NextReg)) |
1479 | break; |
1480 | |
1481 | // NextReg is worth folding. Keep looking. |
1482 | Reg = NextReg; |
1483 | continue; |
1484 | } |
1485 | |
1486 | // Attempt to find a suitable operation with a constant on one side. |
1487 | std::optional<uint64_t> C; |
1488 | Register TestReg; |
1489 | switch (Opc) { |
1490 | default: |
1491 | break; |
1492 | case TargetOpcode::G_AND: |
1493 | case TargetOpcode::G_XOR: { |
1494 | TestReg = MI->getOperand(i: 1).getReg(); |
1495 | Register ConstantReg = MI->getOperand(i: 2).getReg(); |
1496 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1497 | if (!VRegAndVal) { |
1498 | // AND commutes, check the other side for a constant. |
1499 | // FIXME: Can we canonicalize the constant so that it's always on the |
1500 | // same side at some point earlier? |
1501 | std::swap(a&: ConstantReg, b&: TestReg); |
1502 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1503 | } |
1504 | if (VRegAndVal) { |
1505 | if (HasZext) |
1506 | C = VRegAndVal->Value.getZExtValue(); |
1507 | else |
1508 | C = VRegAndVal->Value.getSExtValue(); |
1509 | } |
1510 | break; |
1511 | } |
1512 | case TargetOpcode::G_ASHR: |
1513 | case TargetOpcode::G_LSHR: |
1514 | case TargetOpcode::G_SHL: { |
1515 | TestReg = MI->getOperand(i: 1).getReg(); |
1516 | auto VRegAndVal = |
1517 | getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: 2).getReg(), MRI); |
1518 | if (VRegAndVal) |
1519 | C = VRegAndVal->Value.getSExtValue(); |
1520 | break; |
1521 | } |
1522 | } |
1523 | |
1524 | // Didn't find a constant or viable register. Bail out of the loop. |
1525 | if (!C || !TestReg.isValid()) |
1526 | break; |
1527 | |
1528 | // We found a suitable instruction with a constant. Check to see if we can |
1529 | // walk through the instruction. |
1530 | Register NextReg; |
1531 | unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits(); |
1532 | switch (Opc) { |
1533 | default: |
1534 | break; |
1535 | case TargetOpcode::G_AND: |
1536 | // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. |
1537 | if ((*C >> Bit) & 1) |
1538 | NextReg = TestReg; |
1539 | break; |
1540 | case TargetOpcode::G_SHL: |
1541 | // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in |
1542 | // the type of the register. |
1543 | if (*C <= Bit && (Bit - *C) < TestRegSize) { |
1544 | NextReg = TestReg; |
1545 | Bit = Bit - *C; |
1546 | } |
1547 | break; |
1548 | case TargetOpcode::G_ASHR: |
1549 | // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits |
1550 | // in x |
1551 | NextReg = TestReg; |
1552 | Bit = Bit + *C; |
1553 | if (Bit >= TestRegSize) |
1554 | Bit = TestRegSize - 1; |
1555 | break; |
1556 | case TargetOpcode::G_LSHR: |
1557 | // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x |
1558 | if ((Bit + *C) < TestRegSize) { |
1559 | NextReg = TestReg; |
1560 | Bit = Bit + *C; |
1561 | } |
1562 | break; |
1563 | case TargetOpcode::G_XOR: |
1564 | // We can walk through a G_XOR by inverting whether we use tbz/tbnz when |
1565 | // appropriate. |
1566 | // |
1567 | // e.g. If x' = xor x, c, and the b-th bit is set in c then |
1568 | // |
1569 | // tbz x', b -> tbnz x, b |
1570 | // |
1571 | // Because x' only has the b-th bit set if x does not. |
1572 | if ((*C >> Bit) & 1) |
1573 | Invert = !Invert; |
1574 | NextReg = TestReg; |
1575 | break; |
1576 | } |
1577 | |
1578 | // Check if we found anything worth folding. |
1579 | if (!NextReg.isValid()) |
1580 | return Reg; |
1581 | Reg = NextReg; |
1582 | } |
1583 | |
1584 | return Reg; |
1585 | } |
1586 | |
1587 | MachineInstr *AArch64InstructionSelector::emitTestBit( |
1588 | Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, |
1589 | MachineIRBuilder &MIB) const { |
1590 | assert(TestReg.isValid()); |
1591 | assert(ProduceNonFlagSettingCondBr && |
1592 | "Cannot emit TB(N)Z with speculation tracking!" ); |
1593 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1594 | |
1595 | // Attempt to optimize the test bit by walking over instructions. |
1596 | TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI); |
1597 | LLT Ty = MRI.getType(Reg: TestReg); |
1598 | unsigned Size = Ty.getSizeInBits(); |
1599 | assert(!Ty.isVector() && "Expected a scalar!" ); |
1600 | assert(Bit < 64 && "Bit is too large!" ); |
1601 | |
1602 | // When the test register is a 64-bit register, we have to narrow to make |
1603 | // TBNZW work. |
1604 | bool UseWReg = Bit < 32; |
1605 | unsigned NecessarySize = UseWReg ? 32 : 64; |
1606 | if (Size != NecessarySize) |
1607 | TestReg = moveScalarRegClass( |
1608 | Reg: TestReg, RC: UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, |
1609 | MIB); |
1610 | |
1611 | static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, |
1612 | {AArch64::TBZW, AArch64::TBNZW}}; |
1613 | unsigned Opc = OpcTable[UseWReg][IsNegative]; |
1614 | auto TestBitMI = |
1615 | MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB); |
1616 | constrainSelectedInstRegOperands(I&: *TestBitMI, TII, TRI, RBI); |
1617 | return &*TestBitMI; |
1618 | } |
1619 | |
1620 | bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( |
1621 | MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, |
1622 | MachineIRBuilder &MIB) const { |
1623 | assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?" ); |
1624 | // Given something like this: |
1625 | // |
1626 | // %x = ...Something... |
1627 | // %one = G_CONSTANT i64 1 |
1628 | // %zero = G_CONSTANT i64 0 |
1629 | // %and = G_AND %x, %one |
1630 | // %cmp = G_ICMP intpred(ne), %and, %zero |
1631 | // %cmp_trunc = G_TRUNC %cmp |
1632 | // G_BRCOND %cmp_trunc, %bb.3 |
1633 | // |
1634 | // We want to try and fold the AND into the G_BRCOND and produce either a |
1635 | // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). |
1636 | // |
1637 | // In this case, we'd get |
1638 | // |
1639 | // TBNZ %x %bb.3 |
1640 | // |
1641 | |
1642 | // Check if the AND has a constant on its RHS which we can use as a mask. |
1643 | // If it's a power of 2, then it's the same as checking a specific bit. |
1644 | // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) |
1645 | auto MaybeBit = getIConstantVRegValWithLookThrough( |
1646 | VReg: AndInst.getOperand(i: 2).getReg(), MRI: *MIB.getMRI()); |
1647 | if (!MaybeBit) |
1648 | return false; |
1649 | |
1650 | int32_t Bit = MaybeBit->Value.exactLogBase2(); |
1651 | if (Bit < 0) |
1652 | return false; |
1653 | |
1654 | Register TestReg = AndInst.getOperand(i: 1).getReg(); |
1655 | |
1656 | // Emit a TB(N)Z. |
1657 | emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB); |
1658 | return true; |
1659 | } |
1660 | |
1661 | MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, |
1662 | bool IsNegative, |
1663 | MachineBasicBlock *DestMBB, |
1664 | MachineIRBuilder &MIB) const { |
1665 | assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!" ); |
1666 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1667 | assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == |
1668 | AArch64::GPRRegBankID && |
1669 | "Expected GPRs only?" ); |
1670 | auto Ty = MRI.getType(Reg: CompareReg); |
1671 | unsigned Width = Ty.getSizeInBits(); |
1672 | assert(!Ty.isVector() && "Expected scalar only?" ); |
1673 | assert(Width <= 64 && "Expected width to be at most 64?" ); |
1674 | static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, |
1675 | {AArch64::CBNZW, AArch64::CBNZX}}; |
1676 | unsigned Opc = OpcTable[IsNegative][Width == 64]; |
1677 | auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB); |
1678 | constrainSelectedInstRegOperands(I&: *BranchMI, TII, TRI, RBI); |
1679 | return &*BranchMI; |
1680 | } |
1681 | |
1682 | bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( |
1683 | MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { |
1684 | assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); |
1685 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1686 | // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't |
1687 | // totally clean. Some of them require two branches to implement. |
1688 | auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: 1).getPredicate(); |
1689 | emitFPCompare(LHS: FCmp.getOperand(i: 2).getReg(), RHS: FCmp.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
1690 | Pred); |
1691 | AArch64CC::CondCode CC1, CC2; |
1692 | changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2); |
1693 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1694 | MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC1).addMBB(MBB: DestMBB); |
1695 | if (CC2 != AArch64CC::AL) |
1696 | MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC2).addMBB(MBB: DestMBB); |
1697 | I.eraseFromParent(); |
1698 | return true; |
1699 | } |
1700 | |
1701 | bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( |
1702 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1703 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1704 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1705 | // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. |
1706 | // |
1707 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1708 | // instructions will not be produced, as they are conditional branch |
1709 | // instructions that do not set flags. |
1710 | if (!ProduceNonFlagSettingCondBr) |
1711 | return false; |
1712 | |
1713 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1714 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1715 | auto Pred = |
1716 | static_cast<CmpInst::Predicate>(ICmp.getOperand(i: 1).getPredicate()); |
1717 | Register LHS = ICmp.getOperand(i: 2).getReg(); |
1718 | Register RHS = ICmp.getOperand(i: 3).getReg(); |
1719 | |
1720 | // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. |
1721 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1722 | MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1723 | |
1724 | // When we can emit a TB(N)Z, prefer that. |
1725 | // |
1726 | // Handle non-commutative condition codes first. |
1727 | // Note that we don't want to do this when we have a G_AND because it can |
1728 | // become a tst. The tst will make the test bit in the TB(N)Z redundant. |
1729 | if (VRegAndVal && !AndInst) { |
1730 | int64_t C = VRegAndVal->Value.getSExtValue(); |
1731 | |
1732 | // When we have a greater-than comparison, we can just test if the msb is |
1733 | // zero. |
1734 | if (C == -1 && Pred == CmpInst::ICMP_SGT) { |
1735 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1736 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1737 | I.eraseFromParent(); |
1738 | return true; |
1739 | } |
1740 | |
1741 | // When we have a less than comparison, we can just test if the msb is not |
1742 | // zero. |
1743 | if (C == 0 && Pred == CmpInst::ICMP_SLT) { |
1744 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1745 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ true, DstMBB: DestMBB, MIB); |
1746 | I.eraseFromParent(); |
1747 | return true; |
1748 | } |
1749 | |
1750 | // Inversely, if we have a signed greater-than-or-equal comparison to zero, |
1751 | // we can test if the msb is zero. |
1752 | if (C == 0 && Pred == CmpInst::ICMP_SGE) { |
1753 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1754 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1755 | I.eraseFromParent(); |
1756 | return true; |
1757 | } |
1758 | } |
1759 | |
1760 | // Attempt to handle commutative condition codes. Right now, that's only |
1761 | // eq/ne. |
1762 | if (ICmpInst::isEquality(P: Pred)) { |
1763 | if (!VRegAndVal) { |
1764 | std::swap(a&: RHS, b&: LHS); |
1765 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1766 | AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1767 | } |
1768 | |
1769 | if (VRegAndVal && VRegAndVal->Value == 0) { |
1770 | // If there's a G_AND feeding into this branch, try to fold it away by |
1771 | // emitting a TB(N)Z instead. |
1772 | // |
1773 | // Note: If we have LT, then it *is* possible to fold, but it wouldn't be |
1774 | // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding |
1775 | // would be redundant. |
1776 | if (AndInst && |
1777 | tryOptAndIntoCompareBranch( |
1778 | AndInst&: *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) { |
1779 | I.eraseFromParent(); |
1780 | return true; |
1781 | } |
1782 | |
1783 | // Otherwise, try to emit a CB(N)Z instead. |
1784 | auto LHSTy = MRI.getType(Reg: LHS); |
1785 | if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { |
1786 | emitCBZ(CompareReg: LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); |
1787 | I.eraseFromParent(); |
1788 | return true; |
1789 | } |
1790 | } |
1791 | } |
1792 | |
1793 | return false; |
1794 | } |
1795 | |
1796 | bool AArch64InstructionSelector::selectCompareBranchFedByICmp( |
1797 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1798 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1799 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1800 | if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) |
1801 | return true; |
1802 | |
1803 | // Couldn't optimize. Emit a compare + a Bcc. |
1804 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1805 | auto PredOp = ICmp.getOperand(i: 1); |
1806 | emitIntegerCompare(LHS&: ICmp.getOperand(i: 2), RHS&: ICmp.getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
1807 | const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( |
1808 | P: static_cast<CmpInst::Predicate>(PredOp.getPredicate())); |
1809 | MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC).addMBB(MBB: DestMBB); |
1810 | I.eraseFromParent(); |
1811 | return true; |
1812 | } |
1813 | |
1814 | bool AArch64InstructionSelector::selectCompareBranch( |
1815 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { |
1816 | Register CondReg = I.getOperand(i: 0).getReg(); |
1817 | MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg); |
1818 | // Try to select the G_BRCOND using whatever is feeding the condition if |
1819 | // possible. |
1820 | unsigned CCMIOpc = CCMI->getOpcode(); |
1821 | if (CCMIOpc == TargetOpcode::G_FCMP) |
1822 | return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB); |
1823 | if (CCMIOpc == TargetOpcode::G_ICMP) |
1824 | return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB); |
1825 | |
1826 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1827 | // instructions will not be produced, as they are conditional branch |
1828 | // instructions that do not set flags. |
1829 | if (ProduceNonFlagSettingCondBr) { |
1830 | emitTestBit(TestReg: CondReg, /*Bit = */ 0, /*IsNegative = */ true, |
1831 | DstMBB: I.getOperand(i: 1).getMBB(), MIB); |
1832 | I.eraseFromParent(); |
1833 | return true; |
1834 | } |
1835 | |
1836 | // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. |
1837 | auto TstMI = |
1838 | MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {LLT::scalar(SizeInBits: 32)}, SrcOps: {CondReg}).addImm(Val: 1); |
1839 | constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI); |
1840 | auto Bcc = MIB.buildInstr(Opcode: AArch64::Bcc) |
1841 | .addImm(Val: AArch64CC::NE) |
1842 | .addMBB(MBB: I.getOperand(i: 1).getMBB()); |
1843 | I.eraseFromParent(); |
1844 | return constrainSelectedInstRegOperands(I&: *Bcc, TII, TRI, RBI); |
1845 | } |
1846 | |
1847 | /// Returns the element immediate value of a vector shift operand if found. |
1848 | /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. |
1849 | static std::optional<int64_t> getVectorShiftImm(Register Reg, |
1850 | MachineRegisterInfo &MRI) { |
1851 | assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand" ); |
1852 | MachineInstr *OpMI = MRI.getVRegDef(Reg); |
1853 | return getAArch64VectorSplatScalar(MI: *OpMI, MRI); |
1854 | } |
1855 | |
1856 | /// Matches and returns the shift immediate value for a SHL instruction given |
1857 | /// a shift operand. |
1858 | static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, |
1859 | MachineRegisterInfo &MRI) { |
1860 | std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); |
1861 | if (!ShiftImm) |
1862 | return std::nullopt; |
1863 | // Check the immediate is in range for a SHL. |
1864 | int64_t Imm = *ShiftImm; |
1865 | if (Imm < 0) |
1866 | return std::nullopt; |
1867 | switch (SrcTy.getElementType().getSizeInBits()) { |
1868 | default: |
1869 | LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift" ); |
1870 | return std::nullopt; |
1871 | case 8: |
1872 | if (Imm > 7) |
1873 | return std::nullopt; |
1874 | break; |
1875 | case 16: |
1876 | if (Imm > 15) |
1877 | return std::nullopt; |
1878 | break; |
1879 | case 32: |
1880 | if (Imm > 31) |
1881 | return std::nullopt; |
1882 | break; |
1883 | case 64: |
1884 | if (Imm > 63) |
1885 | return std::nullopt; |
1886 | break; |
1887 | } |
1888 | return Imm; |
1889 | } |
1890 | |
1891 | bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, |
1892 | MachineRegisterInfo &MRI) { |
1893 | assert(I.getOpcode() == TargetOpcode::G_SHL); |
1894 | Register DstReg = I.getOperand(i: 0).getReg(); |
1895 | const LLT Ty = MRI.getType(Reg: DstReg); |
1896 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1897 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1898 | |
1899 | if (!Ty.isVector()) |
1900 | return false; |
1901 | |
1902 | // Check if we have a vector of constants on RHS that we can select as the |
1903 | // immediate form. |
1904 | std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI); |
1905 | |
1906 | unsigned Opc = 0; |
1907 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1908 | Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; |
1909 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1910 | Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; |
1911 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1912 | Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; |
1913 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1914 | Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; |
1915 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1916 | Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; |
1917 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1918 | Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; |
1919 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1920 | Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; |
1921 | } else { |
1922 | LLVM_DEBUG(dbgs() << "Unhandled G_SHL type" ); |
1923 | return false; |
1924 | } |
1925 | |
1926 | auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg}); |
1927 | if (ImmVal) |
1928 | Shl.addImm(Val: *ImmVal); |
1929 | else |
1930 | Shl.addUse(RegNo: Src2Reg); |
1931 | constrainSelectedInstRegOperands(I&: *Shl, TII, TRI, RBI); |
1932 | I.eraseFromParent(); |
1933 | return true; |
1934 | } |
1935 | |
1936 | bool AArch64InstructionSelector::selectVectorAshrLshr( |
1937 | MachineInstr &I, MachineRegisterInfo &MRI) { |
1938 | assert(I.getOpcode() == TargetOpcode::G_ASHR || |
1939 | I.getOpcode() == TargetOpcode::G_LSHR); |
1940 | Register DstReg = I.getOperand(i: 0).getReg(); |
1941 | const LLT Ty = MRI.getType(Reg: DstReg); |
1942 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1943 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1944 | |
1945 | if (!Ty.isVector()) |
1946 | return false; |
1947 | |
1948 | bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; |
1949 | |
1950 | // We expect the immediate case to be lowered in the PostLegalCombiner to |
1951 | // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. |
1952 | |
1953 | // There is not a shift right register instruction, but the shift left |
1954 | // register instruction takes a signed value, where negative numbers specify a |
1955 | // right shift. |
1956 | |
1957 | unsigned Opc = 0; |
1958 | unsigned NegOpc = 0; |
1959 | const TargetRegisterClass *RC = |
1960 | getRegClassForTypeOnBank(Ty, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID)); |
1961 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1962 | Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; |
1963 | NegOpc = AArch64::NEGv2i64; |
1964 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1965 | Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; |
1966 | NegOpc = AArch64::NEGv4i32; |
1967 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1968 | Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; |
1969 | NegOpc = AArch64::NEGv2i32; |
1970 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1971 | Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; |
1972 | NegOpc = AArch64::NEGv4i16; |
1973 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1974 | Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; |
1975 | NegOpc = AArch64::NEGv8i16; |
1976 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1977 | Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; |
1978 | NegOpc = AArch64::NEGv16i8; |
1979 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1980 | Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; |
1981 | NegOpc = AArch64::NEGv8i8; |
1982 | } else { |
1983 | LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type" ); |
1984 | return false; |
1985 | } |
1986 | |
1987 | auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg}); |
1988 | constrainSelectedInstRegOperands(I&: *Neg, TII, TRI, RBI); |
1989 | auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg}); |
1990 | constrainSelectedInstRegOperands(I&: *SShl, TII, TRI, RBI); |
1991 | I.eraseFromParent(); |
1992 | return true; |
1993 | } |
1994 | |
1995 | bool AArch64InstructionSelector::selectVaStartAAPCS( |
1996 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
1997 | return false; |
1998 | } |
1999 | |
2000 | bool AArch64InstructionSelector::selectVaStartDarwin( |
2001 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
2002 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
2003 | Register ListReg = I.getOperand(i: 0).getReg(); |
2004 | |
2005 | Register ArgsAddrReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
2006 | |
2007 | int FrameIdx = FuncInfo->getVarArgsStackIndex(); |
2008 | if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64( |
2009 | CC: MF.getFunction().getCallingConv(), IsVarArg: MF.getFunction().isVarArg())) { |
2010 | FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 |
2011 | ? FuncInfo->getVarArgsGPRIndex() |
2012 | : FuncInfo->getVarArgsStackIndex(); |
2013 | } |
2014 | |
2015 | auto MIB = |
2016 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri)) |
2017 | .addDef(RegNo: ArgsAddrReg) |
2018 | .addFrameIndex(Idx: FrameIdx) |
2019 | .addImm(Val: 0) |
2020 | .addImm(Val: 0); |
2021 | |
2022 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2023 | |
2024 | MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRXui)) |
2025 | .addUse(RegNo: ArgsAddrReg) |
2026 | .addUse(RegNo: ListReg) |
2027 | .addImm(Val: 0) |
2028 | .addMemOperand(MMO: *I.memoperands_begin()); |
2029 | |
2030 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2031 | I.eraseFromParent(); |
2032 | return true; |
2033 | } |
2034 | |
2035 | void AArch64InstructionSelector::materializeLargeCMVal( |
2036 | MachineInstr &I, const Value *V, unsigned OpFlags) { |
2037 | MachineBasicBlock &MBB = *I.getParent(); |
2038 | MachineFunction &MF = *MBB.getParent(); |
2039 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2040 | |
2041 | auto MovZ = MIB.buildInstr(Opc: AArch64::MOVZXi, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {}); |
2042 | MovZ->addOperand(MF, Op: I.getOperand(i: 1)); |
2043 | MovZ->getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_G0 | |
2044 | AArch64II::MO_NC); |
2045 | MovZ->addOperand(MF, Op: MachineOperand::CreateImm(Val: 0)); |
2046 | constrainSelectedInstRegOperands(I&: *MovZ, TII, TRI, RBI); |
2047 | |
2048 | auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, |
2049 | Register ForceDstReg) { |
2050 | Register DstReg = ForceDstReg |
2051 | ? ForceDstReg |
2052 | : MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
2053 | auto MovI = MIB.buildInstr(Opcode: AArch64::MOVKXi).addDef(RegNo: DstReg).addUse(RegNo: SrcReg); |
2054 | if (auto *GV = dyn_cast<GlobalValue>(Val: V)) { |
2055 | MovI->addOperand(MF, Op: MachineOperand::CreateGA( |
2056 | GV, Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags)); |
2057 | } else { |
2058 | MovI->addOperand( |
2059 | MF, Op: MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V), |
2060 | Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags)); |
2061 | } |
2062 | MovI->addOperand(MF, Op: MachineOperand::CreateImm(Val: Offset)); |
2063 | constrainSelectedInstRegOperands(I&: *MovI, TII, TRI, RBI); |
2064 | return DstReg; |
2065 | }; |
2066 | Register DstReg = BuildMovK(MovZ.getReg(Idx: 0), |
2067 | AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); |
2068 | DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); |
2069 | BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(i: 0).getReg()); |
2070 | } |
2071 | |
2072 | bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { |
2073 | MachineBasicBlock &MBB = *I.getParent(); |
2074 | MachineFunction &MF = *MBB.getParent(); |
2075 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2076 | |
2077 | switch (I.getOpcode()) { |
2078 | case TargetOpcode::G_STORE: { |
2079 | bool Changed = contractCrossBankCopyIntoStore(I, MRI); |
2080 | MachineOperand &SrcOp = I.getOperand(i: 0); |
2081 | if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) { |
2082 | // Allow matching with imported patterns for stores of pointers. Unlike |
2083 | // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy |
2084 | // and constrain. |
2085 | auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: SrcOp); |
2086 | Register NewSrc = Copy.getReg(Idx: 0); |
2087 | SrcOp.setReg(NewSrc); |
2088 | RBI.constrainGenericRegister(Reg: NewSrc, RC: AArch64::GPR64RegClass, MRI); |
2089 | Changed = true; |
2090 | } |
2091 | return Changed; |
2092 | } |
2093 | case TargetOpcode::G_PTR_ADD: |
2094 | return convertPtrAddToAdd(I, MRI); |
2095 | case TargetOpcode::G_LOAD: { |
2096 | // For scalar loads of pointers, we try to convert the dest type from p0 |
2097 | // to s64 so that our imported patterns can match. Like with the G_PTR_ADD |
2098 | // conversion, this should be ok because all users should have been |
2099 | // selected already, so the type doesn't matter for them. |
2100 | Register DstReg = I.getOperand(i: 0).getReg(); |
2101 | const LLT DstTy = MRI.getType(Reg: DstReg); |
2102 | if (!DstTy.isPointer()) |
2103 | return false; |
2104 | MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: 64)); |
2105 | return true; |
2106 | } |
2107 | case AArch64::G_DUP: { |
2108 | // Convert the type from p0 to s64 to help selection. |
2109 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2110 | if (!DstTy.isPointerVector()) |
2111 | return false; |
2112 | auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 1).getReg()); |
2113 | MRI.setType(VReg: I.getOperand(i: 0).getReg(), |
2114 | Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64))); |
2115 | MRI.setRegClass(Reg: NewSrc.getReg(Idx: 0), RC: &AArch64::GPR64RegClass); |
2116 | I.getOperand(i: 1).setReg(NewSrc.getReg(Idx: 0)); |
2117 | return true; |
2118 | } |
2119 | case TargetOpcode::G_UITOFP: |
2120 | case TargetOpcode::G_SITOFP: { |
2121 | // If both source and destination regbanks are FPR, then convert the opcode |
2122 | // to G_SITOF so that the importer can select it to an fpr variant. |
2123 | // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank |
2124 | // copy. |
2125 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2126 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2127 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2128 | if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) |
2129 | return false; |
2130 | |
2131 | if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { |
2132 | if (I.getOpcode() == TargetOpcode::G_SITOFP) |
2133 | I.setDesc(TII.get(Opcode: AArch64::G_SITOF)); |
2134 | else |
2135 | I.setDesc(TII.get(Opcode: AArch64::G_UITOF)); |
2136 | return true; |
2137 | } |
2138 | return false; |
2139 | } |
2140 | default: |
2141 | return false; |
2142 | } |
2143 | } |
2144 | |
2145 | /// This lowering tries to look for G_PTR_ADD instructions and then converts |
2146 | /// them to a standard G_ADD with a COPY on the source. |
2147 | /// |
2148 | /// The motivation behind this is to expose the add semantics to the imported |
2149 | /// tablegen patterns. We shouldn't need to check for uses being loads/stores, |
2150 | /// because the selector works bottom up, uses before defs. By the time we |
2151 | /// end up trying to select a G_PTR_ADD, we should have already attempted to |
2152 | /// fold this into addressing modes and were therefore unsuccessful. |
2153 | bool AArch64InstructionSelector::convertPtrAddToAdd( |
2154 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2155 | assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD" ); |
2156 | Register DstReg = I.getOperand(i: 0).getReg(); |
2157 | Register AddOp1Reg = I.getOperand(i: 1).getReg(); |
2158 | const LLT PtrTy = MRI.getType(Reg: DstReg); |
2159 | if (PtrTy.getAddressSpace() != 0) |
2160 | return false; |
2161 | |
2162 | const LLT CastPtrTy = |
2163 | PtrTy.isVector() ? LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) : LLT::scalar(SizeInBits: 64); |
2164 | auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg); |
2165 | // Set regbanks on the registers. |
2166 | if (PtrTy.isVector()) |
2167 | MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::FPRRegBankID)); |
2168 | else |
2169 | MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID)); |
2170 | |
2171 | // Now turn the %dst(p0) = G_PTR_ADD %base, off into: |
2172 | // %dst(intty) = G_ADD %intbase, off |
2173 | I.setDesc(TII.get(Opcode: TargetOpcode::G_ADD)); |
2174 | MRI.setType(VReg: DstReg, Ty: CastPtrTy); |
2175 | I.getOperand(i: 1).setReg(PtrToInt.getReg(Idx: 0)); |
2176 | if (!select(I&: *PtrToInt)) { |
2177 | LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd" ); |
2178 | return false; |
2179 | } |
2180 | |
2181 | // Also take the opportunity here to try to do some optimization. |
2182 | // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. |
2183 | Register NegatedReg; |
2184 | if (!mi_match(R: I.getOperand(i: 2).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg)))) |
2185 | return true; |
2186 | I.getOperand(i: 2).setReg(NegatedReg); |
2187 | I.setDesc(TII.get(Opcode: TargetOpcode::G_SUB)); |
2188 | return true; |
2189 | } |
2190 | |
2191 | bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, |
2192 | MachineRegisterInfo &MRI) { |
2193 | // We try to match the immediate variant of LSL, which is actually an alias |
2194 | // for a special case of UBFM. Otherwise, we fall back to the imported |
2195 | // selector which will match the register variant. |
2196 | assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op" ); |
2197 | const auto &MO = I.getOperand(i: 2); |
2198 | auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI); |
2199 | if (!VRegAndVal) |
2200 | return false; |
2201 | |
2202 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2203 | if (DstTy.isVector()) |
2204 | return false; |
2205 | bool Is64Bit = DstTy.getSizeInBits() == 64; |
2206 | auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO); |
2207 | auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO); |
2208 | |
2209 | if (!Imm1Fn || !Imm2Fn) |
2210 | return false; |
2211 | |
2212 | auto NewI = |
2213 | MIB.buildInstr(Opc: Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, |
2214 | DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {I.getOperand(i: 1).getReg()}); |
2215 | |
2216 | for (auto &RenderFn : *Imm1Fn) |
2217 | RenderFn(NewI); |
2218 | for (auto &RenderFn : *Imm2Fn) |
2219 | RenderFn(NewI); |
2220 | |
2221 | I.eraseFromParent(); |
2222 | return constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI); |
2223 | } |
2224 | |
2225 | bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( |
2226 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2227 | assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE" ); |
2228 | // If we're storing a scalar, it doesn't matter what register bank that |
2229 | // scalar is on. All that matters is the size. |
2230 | // |
2231 | // So, if we see something like this (with a 32-bit scalar as an example): |
2232 | // |
2233 | // %x:gpr(s32) = ... something ... |
2234 | // %y:fpr(s32) = COPY %x:gpr(s32) |
2235 | // G_STORE %y:fpr(s32) |
2236 | // |
2237 | // We can fix this up into something like this: |
2238 | // |
2239 | // G_STORE %x:gpr(s32) |
2240 | // |
2241 | // And then continue the selection process normally. |
2242 | Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: 0).getReg(), MRI); |
2243 | if (!DefDstReg.isValid()) |
2244 | return false; |
2245 | LLT DefDstTy = MRI.getType(Reg: DefDstReg); |
2246 | Register StoreSrcReg = I.getOperand(i: 0).getReg(); |
2247 | LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg); |
2248 | |
2249 | // If we get something strange like a physical register, then we shouldn't |
2250 | // go any further. |
2251 | if (!DefDstTy.isValid()) |
2252 | return false; |
2253 | |
2254 | // Are the source and dst types the same size? |
2255 | if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) |
2256 | return false; |
2257 | |
2258 | if (RBI.getRegBank(Reg: StoreSrcReg, MRI, TRI) == |
2259 | RBI.getRegBank(Reg: DefDstReg, MRI, TRI)) |
2260 | return false; |
2261 | |
2262 | // We have a cross-bank copy, which is entering a store. Let's fold it. |
2263 | I.getOperand(i: 0).setReg(DefDstReg); |
2264 | return true; |
2265 | } |
2266 | |
2267 | bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { |
2268 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2269 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2270 | |
2271 | MachineBasicBlock &MBB = *I.getParent(); |
2272 | MachineFunction &MF = *MBB.getParent(); |
2273 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2274 | |
2275 | switch (I.getOpcode()) { |
2276 | case AArch64::G_DUP: { |
2277 | // Before selecting a DUP instruction, check if it is better selected as a |
2278 | // MOV or load from a constant pool. |
2279 | Register Src = I.getOperand(i: 1).getReg(); |
2280 | auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI); |
2281 | if (!ValAndVReg) |
2282 | return false; |
2283 | LLVMContext &Ctx = MF.getFunction().getContext(); |
2284 | Register Dst = I.getOperand(i: 0).getReg(); |
2285 | auto *CV = ConstantDataVector::getSplat( |
2286 | NumElts: MRI.getType(Reg: Dst).getNumElements(), |
2287 | Elt: ConstantInt::get(Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Src).getSizeInBits()), |
2288 | V: ValAndVReg->Value)); |
2289 | if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI)) |
2290 | return false; |
2291 | I.eraseFromParent(); |
2292 | return true; |
2293 | } |
2294 | case TargetOpcode::G_SEXT: |
2295 | // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV |
2296 | // over a normal extend. |
2297 | if (selectUSMovFromExtend(I, MRI)) |
2298 | return true; |
2299 | return false; |
2300 | case TargetOpcode::G_BR: |
2301 | return false; |
2302 | case TargetOpcode::G_SHL: |
2303 | return earlySelectSHL(I, MRI); |
2304 | case TargetOpcode::G_CONSTANT: { |
2305 | bool IsZero = false; |
2306 | if (I.getOperand(i: 1).isCImm()) |
2307 | IsZero = I.getOperand(i: 1).getCImm()->isZero(); |
2308 | else if (I.getOperand(i: 1).isImm()) |
2309 | IsZero = I.getOperand(i: 1).getImm() == 0; |
2310 | |
2311 | if (!IsZero) |
2312 | return false; |
2313 | |
2314 | Register DefReg = I.getOperand(i: 0).getReg(); |
2315 | LLT Ty = MRI.getType(Reg: DefReg); |
2316 | if (Ty.getSizeInBits() == 64) { |
2317 | I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::XZR, isDef: false); |
2318 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
2319 | } else if (Ty.getSizeInBits() == 32) { |
2320 | I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::WZR, isDef: false); |
2321 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR32RegClass, MRI); |
2322 | } else |
2323 | return false; |
2324 | |
2325 | I.setDesc(TII.get(Opcode: TargetOpcode::COPY)); |
2326 | return true; |
2327 | } |
2328 | |
2329 | case TargetOpcode::G_ADD: { |
2330 | // Check if this is being fed by a G_ICMP on either side. |
2331 | // |
2332 | // (cmp pred, x, y) + z |
2333 | // |
2334 | // In the above case, when the cmp is true, we increment z by 1. So, we can |
2335 | // fold the add into the cset for the cmp by using cinc. |
2336 | // |
2337 | // FIXME: This would probably be a lot nicer in PostLegalizerLowering. |
2338 | Register AddDst = I.getOperand(i: 0).getReg(); |
2339 | Register AddLHS = I.getOperand(i: 1).getReg(); |
2340 | Register AddRHS = I.getOperand(i: 2).getReg(); |
2341 | // Only handle scalars. |
2342 | LLT Ty = MRI.getType(Reg: AddLHS); |
2343 | if (Ty.isVector()) |
2344 | return false; |
2345 | // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 |
2346 | // bits. |
2347 | unsigned Size = Ty.getSizeInBits(); |
2348 | if (Size != 32 && Size != 64) |
2349 | return false; |
2350 | auto MatchCmp = [&](Register Reg) -> MachineInstr * { |
2351 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
2352 | return nullptr; |
2353 | // If the LHS of the add is 32 bits, then we want to fold a 32-bit |
2354 | // compare. |
2355 | if (Size == 32) |
2356 | return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI); |
2357 | // We model scalar compares using 32-bit destinations right now. |
2358 | // If it's a 64-bit compare, it'll have 64-bit sources. |
2359 | Register ZExt; |
2360 | if (!mi_match(R: Reg, MRI, |
2361 | P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt)))))) |
2362 | return nullptr; |
2363 | auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI); |
2364 | if (!Cmp || |
2365 | MRI.getType(Reg: Cmp->getOperand(i: 2).getReg()).getSizeInBits() != 64) |
2366 | return nullptr; |
2367 | return Cmp; |
2368 | }; |
2369 | // Try to match |
2370 | // z + (cmp pred, x, y) |
2371 | MachineInstr *Cmp = MatchCmp(AddRHS); |
2372 | if (!Cmp) { |
2373 | // (cmp pred, x, y) + z |
2374 | std::swap(a&: AddLHS, b&: AddRHS); |
2375 | Cmp = MatchCmp(AddRHS); |
2376 | if (!Cmp) |
2377 | return false; |
2378 | } |
2379 | auto &PredOp = Cmp->getOperand(i: 1); |
2380 | auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); |
2381 | const AArch64CC::CondCode InvCC = |
2382 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
2383 | MIB.setInstrAndDebugLoc(I); |
2384 | emitIntegerCompare(/*LHS=*/Cmp->getOperand(i: 2), |
2385 | /*RHS=*/Cmp->getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
2386 | emitCSINC(/*Dst=*/AddDst, /*Src =*/Src1: AddLHS, /*Src2=*/AddLHS, Pred: InvCC, MIRBuilder&: MIB); |
2387 | I.eraseFromParent(); |
2388 | return true; |
2389 | } |
2390 | case TargetOpcode::G_OR: { |
2391 | // Look for operations that take the lower `Width=Size-ShiftImm` bits of |
2392 | // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via |
2393 | // shifting and masking that we can replace with a BFI (encoded as a BFM). |
2394 | Register Dst = I.getOperand(i: 0).getReg(); |
2395 | LLT Ty = MRI.getType(Reg: Dst); |
2396 | |
2397 | if (!Ty.isScalar()) |
2398 | return false; |
2399 | |
2400 | unsigned Size = Ty.getSizeInBits(); |
2401 | if (Size != 32 && Size != 64) |
2402 | return false; |
2403 | |
2404 | Register ShiftSrc; |
2405 | int64_t ShiftImm; |
2406 | Register MaskSrc; |
2407 | int64_t MaskImm; |
2408 | if (!mi_match( |
2409 | R: Dst, MRI, |
2410 | P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))), |
2411 | R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm)))))) |
2412 | return false; |
2413 | |
2414 | if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) |
2415 | return false; |
2416 | |
2417 | int64_t Immr = Size - ShiftImm; |
2418 | int64_t Imms = Size - ShiftImm - 1; |
2419 | unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; |
2420 | emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB); |
2421 | I.eraseFromParent(); |
2422 | return true; |
2423 | } |
2424 | case TargetOpcode::G_FENCE: { |
2425 | if (I.getOperand(i: 1).getImm() == 0) |
2426 | BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: TargetOpcode::MEMBARRIER)); |
2427 | else |
2428 | BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: AArch64::DMB)) |
2429 | .addImm(Val: I.getOperand(i: 0).getImm() == 4 ? 0x9 : 0xb); |
2430 | I.eraseFromParent(); |
2431 | return true; |
2432 | } |
2433 | default: |
2434 | return false; |
2435 | } |
2436 | } |
2437 | |
2438 | bool AArch64InstructionSelector::select(MachineInstr &I) { |
2439 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2440 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2441 | |
2442 | MachineBasicBlock &MBB = *I.getParent(); |
2443 | MachineFunction &MF = *MBB.getParent(); |
2444 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2445 | |
2446 | const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>(); |
2447 | if (Subtarget->requiresStrictAlign()) { |
2448 | // We don't support this feature yet. |
2449 | LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n" ); |
2450 | return false; |
2451 | } |
2452 | |
2453 | MIB.setInstrAndDebugLoc(I); |
2454 | |
2455 | unsigned Opcode = I.getOpcode(); |
2456 | // G_PHI requires same handling as PHI |
2457 | if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { |
2458 | // Certain non-generic instructions also need some special handling. |
2459 | |
2460 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) |
2461 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2462 | |
2463 | if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { |
2464 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2465 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2466 | |
2467 | const RegClassOrRegBank &RegClassOrBank = |
2468 | MRI.getRegClassOrRegBank(Reg: DefReg); |
2469 | |
2470 | const TargetRegisterClass *DefRC |
2471 | = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); |
2472 | if (!DefRC) { |
2473 | if (!DefTy.isValid()) { |
2474 | LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n" ); |
2475 | return false; |
2476 | } |
2477 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); |
2478 | DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB); |
2479 | if (!DefRC) { |
2480 | LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n" ); |
2481 | return false; |
2482 | } |
2483 | } |
2484 | |
2485 | I.setDesc(TII.get(Opcode: TargetOpcode::PHI)); |
2486 | |
2487 | return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI); |
2488 | } |
2489 | |
2490 | if (I.isCopy()) |
2491 | return selectCopy(I, TII, MRI, TRI, RBI); |
2492 | |
2493 | if (I.isDebugInstr()) |
2494 | return selectDebugInstr(I, MRI, RBI); |
2495 | |
2496 | return true; |
2497 | } |
2498 | |
2499 | |
2500 | if (I.getNumOperands() != I.getNumExplicitOperands()) { |
2501 | LLVM_DEBUG( |
2502 | dbgs() << "Generic instruction has unexpected implicit operands\n" ); |
2503 | return false; |
2504 | } |
2505 | |
2506 | // Try to do some lowering before we start instruction selecting. These |
2507 | // lowerings are purely transformations on the input G_MIR and so selection |
2508 | // must continue after any modification of the instruction. |
2509 | if (preISelLower(I)) { |
2510 | Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. |
2511 | } |
2512 | |
2513 | // There may be patterns where the importer can't deal with them optimally, |
2514 | // but does select it to a suboptimal sequence so our custom C++ selection |
2515 | // code later never has a chance to work on it. Therefore, we have an early |
2516 | // selection attempt here to give priority to certain selection routines |
2517 | // over the imported ones. |
2518 | if (earlySelect(I)) |
2519 | return true; |
2520 | |
2521 | if (selectImpl(I, CoverageInfo&: *CoverageInfo)) |
2522 | return true; |
2523 | |
2524 | LLT Ty = |
2525 | I.getOperand(i: 0).isReg() ? MRI.getType(Reg: I.getOperand(i: 0).getReg()) : LLT{}; |
2526 | |
2527 | switch (Opcode) { |
2528 | case TargetOpcode::G_SBFX: |
2529 | case TargetOpcode::G_UBFX: { |
2530 | static const unsigned OpcTable[2][2] = { |
2531 | {AArch64::UBFMWri, AArch64::UBFMXri}, |
2532 | {AArch64::SBFMWri, AArch64::SBFMXri}}; |
2533 | bool IsSigned = Opcode == TargetOpcode::G_SBFX; |
2534 | unsigned Size = Ty.getSizeInBits(); |
2535 | unsigned Opc = OpcTable[IsSigned][Size == 64]; |
2536 | auto Cst1 = |
2537 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 2).getReg(), MRI); |
2538 | assert(Cst1 && "Should have gotten a constant for src 1?" ); |
2539 | auto Cst2 = |
2540 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 3).getReg(), MRI); |
2541 | assert(Cst2 && "Should have gotten a constant for src 2?" ); |
2542 | auto LSB = Cst1->Value.getZExtValue(); |
2543 | auto Width = Cst2->Value.getZExtValue(); |
2544 | auto BitfieldInst = |
2545 | MIB.buildInstr(Opc, DstOps: {I.getOperand(i: 0)}, SrcOps: {I.getOperand(i: 1)}) |
2546 | .addImm(Val: LSB) |
2547 | .addImm(Val: LSB + Width - 1); |
2548 | I.eraseFromParent(); |
2549 | return constrainSelectedInstRegOperands(I&: *BitfieldInst, TII, TRI, RBI); |
2550 | } |
2551 | case TargetOpcode::G_BRCOND: |
2552 | return selectCompareBranch(I, MF, MRI); |
2553 | |
2554 | case TargetOpcode::G_BRINDIRECT: { |
2555 | const Function &Fn = MF.getFunction(); |
2556 | if (std::optional<uint16_t> BADisc = |
2557 | STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: Fn)) { |
2558 | auto MI = MIB.buildInstr(Opc: AArch64::BRA, DstOps: {}, SrcOps: {I.getOperand(i: 0).getReg()}); |
2559 | MI.addImm(Val: AArch64PACKey::IA); |
2560 | MI.addImm(Val: *BADisc); |
2561 | MI.addReg(/*AddrDisc=*/RegNo: AArch64::XZR); |
2562 | I.eraseFromParent(); |
2563 | return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
2564 | } |
2565 | I.setDesc(TII.get(Opcode: AArch64::BR)); |
2566 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2567 | } |
2568 | |
2569 | case TargetOpcode::G_BRJT: |
2570 | return selectBrJT(I, MRI); |
2571 | |
2572 | case AArch64::G_ADD_LOW: { |
2573 | // This op may have been separated from it's ADRP companion by the localizer |
2574 | // or some other code motion pass. Given that many CPUs will try to |
2575 | // macro fuse these operations anyway, select this into a MOVaddr pseudo |
2576 | // which will later be expanded into an ADRP+ADD pair after scheduling. |
2577 | MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
2578 | if (BaseMI->getOpcode() != AArch64::ADRP) { |
2579 | I.setDesc(TII.get(Opcode: AArch64::ADDXri)); |
2580 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2581 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2582 | } |
2583 | assert(TM.getCodeModel() == CodeModel::Small && |
2584 | "Expected small code model" ); |
2585 | auto Op1 = BaseMI->getOperand(i: 1); |
2586 | auto Op2 = I.getOperand(i: 2); |
2587 | auto MovAddr = MIB.buildInstr(Opc: AArch64::MOVaddr, DstOps: {I.getOperand(i: 0)}, SrcOps: {}) |
2588 | .addGlobalAddress(GV: Op1.getGlobal(), Offset: Op1.getOffset(), |
2589 | TargetFlags: Op1.getTargetFlags()) |
2590 | .addGlobalAddress(GV: Op2.getGlobal(), Offset: Op2.getOffset(), |
2591 | TargetFlags: Op2.getTargetFlags()); |
2592 | I.eraseFromParent(); |
2593 | return constrainSelectedInstRegOperands(I&: *MovAddr, TII, TRI, RBI); |
2594 | } |
2595 | |
2596 | case TargetOpcode::G_FCONSTANT: |
2597 | case TargetOpcode::G_CONSTANT: { |
2598 | const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; |
2599 | |
2600 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
2601 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
2602 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
2603 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
2604 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
2605 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
2606 | |
2607 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2608 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2609 | const unsigned DefSize = DefTy.getSizeInBits(); |
2610 | const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI); |
2611 | |
2612 | // FIXME: Redundant check, but even less readable when factored out. |
2613 | if (isFP) { |
2614 | if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { |
2615 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2616 | << " constant, expected: " << s16 << " or " << s32 |
2617 | << " or " << s64 << " or " << s128 << '\n'); |
2618 | return false; |
2619 | } |
2620 | |
2621 | if (RB.getID() != AArch64::FPRRegBankID) { |
2622 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2623 | << " constant on bank: " << RB |
2624 | << ", expected: FPR\n" ); |
2625 | return false; |
2626 | } |
2627 | |
2628 | // The case when we have 0.0 is covered by tablegen. Reject it here so we |
2629 | // can be sure tablegen works correctly and isn't rescued by this code. |
2630 | // 0.0 is not covered by tablegen for FP128. So we will handle this |
2631 | // scenario in the code here. |
2632 | if (DefSize != 128 && I.getOperand(i: 1).getFPImm()->isExactlyValue(V: 0.0)) |
2633 | return false; |
2634 | } else { |
2635 | // s32 and s64 are covered by tablegen. |
2636 | if (Ty != p0 && Ty != s8 && Ty != s16) { |
2637 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2638 | << " constant, expected: " << s32 << ", " << s64 |
2639 | << ", or " << p0 << '\n'); |
2640 | return false; |
2641 | } |
2642 | |
2643 | if (RB.getID() != AArch64::GPRRegBankID) { |
2644 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2645 | << " constant on bank: " << RB |
2646 | << ", expected: GPR\n" ); |
2647 | return false; |
2648 | } |
2649 | } |
2650 | |
2651 | if (isFP) { |
2652 | const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB); |
2653 | // For 16, 64, and 128b values, emit a constant pool load. |
2654 | switch (DefSize) { |
2655 | default: |
2656 | llvm_unreachable("Unexpected destination size for G_FCONSTANT?" ); |
2657 | case 32: |
2658 | case 64: { |
2659 | bool OptForSize = shouldOptForSize(MF: &MF); |
2660 | const auto &TLI = MF.getSubtarget().getTargetLowering(); |
2661 | // If TLI says that this fpimm is illegal, then we'll expand to a |
2662 | // constant pool load. |
2663 | if (TLI->isFPImmLegal(I.getOperand(i: 1).getFPImm()->getValueAPF(), |
2664 | EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize)) |
2665 | break; |
2666 | [[fallthrough]]; |
2667 | } |
2668 | case 16: |
2669 | case 128: { |
2670 | auto *FPImm = I.getOperand(i: 1).getFPImm(); |
2671 | auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB); |
2672 | if (!LoadMI) { |
2673 | LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n" ); |
2674 | return false; |
2675 | } |
2676 | MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: 0).getReg()}); |
2677 | I.eraseFromParent(); |
2678 | return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI); |
2679 | } |
2680 | } |
2681 | |
2682 | assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size" ); |
2683 | // Either emit a FMOV, or emit a copy to emit a normal mov. |
2684 | const Register DefGPRReg = MRI.createVirtualRegister( |
2685 | RegClass: DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
2686 | MachineOperand &RegOp = I.getOperand(i: 0); |
2687 | RegOp.setReg(DefGPRReg); |
2688 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2689 | MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg}); |
2690 | |
2691 | if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) { |
2692 | LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n" ); |
2693 | return false; |
2694 | } |
2695 | |
2696 | MachineOperand &ImmOp = I.getOperand(i: 1); |
2697 | // FIXME: Is going through int64_t always correct? |
2698 | ImmOp.ChangeToImmediate( |
2699 | ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); |
2700 | } else if (I.getOperand(i: 1).isCImm()) { |
2701 | uint64_t Val = I.getOperand(i: 1).getCImm()->getZExtValue(); |
2702 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2703 | } else if (I.getOperand(i: 1).isImm()) { |
2704 | uint64_t Val = I.getOperand(i: 1).getImm(); |
2705 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2706 | } |
2707 | |
2708 | const unsigned MovOpc = |
2709 | DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
2710 | I.setDesc(TII.get(Opcode: MovOpc)); |
2711 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2712 | return true; |
2713 | } |
2714 | case TargetOpcode::G_EXTRACT: { |
2715 | Register DstReg = I.getOperand(i: 0).getReg(); |
2716 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2717 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2718 | LLT DstTy = MRI.getType(Reg: DstReg); |
2719 | (void)DstTy; |
2720 | unsigned SrcSize = SrcTy.getSizeInBits(); |
2721 | |
2722 | if (SrcTy.getSizeInBits() > 64) { |
2723 | // This should be an extract of an s128, which is like a vector extract. |
2724 | if (SrcTy.getSizeInBits() != 128) |
2725 | return false; |
2726 | // Only support extracting 64 bits from an s128 at the moment. |
2727 | if (DstTy.getSizeInBits() != 64) |
2728 | return false; |
2729 | |
2730 | unsigned Offset = I.getOperand(i: 2).getImm(); |
2731 | if (Offset % 64 != 0) |
2732 | return false; |
2733 | |
2734 | // Check we have the right regbank always. |
2735 | const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
2736 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
2737 | assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!" ); |
2738 | |
2739 | if (SrcRB.getID() == AArch64::GPRRegBankID) { |
2740 | auto NewI = |
2741 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}) |
2742 | .addUse(RegNo: SrcReg, Flags: 0, |
2743 | SubReg: Offset == 0 ? AArch64::sube64 : AArch64::subo64); |
2744 | constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt&: *NewI, |
2745 | RegClass: AArch64::GPR64RegClass, RegMO&: NewI->getOperand(i: 0)); |
2746 | I.eraseFromParent(); |
2747 | return true; |
2748 | } |
2749 | |
2750 | // Emit the same code as a vector extract. |
2751 | // Offset must be a multiple of 64. |
2752 | unsigned LaneIdx = Offset / 64; |
2753 | MachineInstr * = emitExtractVectorElt( |
2754 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: 64), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB); |
2755 | if (!Extract) |
2756 | return false; |
2757 | I.eraseFromParent(); |
2758 | return true; |
2759 | } |
2760 | |
2761 | I.setDesc(TII.get(Opcode: SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); |
2762 | MachineInstrBuilder(MF, I).addImm(Val: I.getOperand(i: 2).getImm() + |
2763 | Ty.getSizeInBits() - 1); |
2764 | |
2765 | if (SrcSize < 64) { |
2766 | assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && |
2767 | "unexpected G_EXTRACT types" ); |
2768 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2769 | } |
2770 | |
2771 | DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2772 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2773 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {}) |
2774 | .addReg(RegNo: DstReg, flags: 0, SubReg: AArch64::sub_32); |
2775 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), |
2776 | RC: AArch64::GPR32RegClass, MRI); |
2777 | I.getOperand(i: 0).setReg(DstReg); |
2778 | |
2779 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2780 | } |
2781 | |
2782 | case TargetOpcode::G_INSERT: { |
2783 | LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 2).getReg()); |
2784 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2785 | unsigned DstSize = DstTy.getSizeInBits(); |
2786 | // Larger inserts are vectors, same-size ones should be something else by |
2787 | // now (split up or turned into COPYs). |
2788 | if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) |
2789 | return false; |
2790 | |
2791 | I.setDesc(TII.get(Opcode: DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); |
2792 | unsigned LSB = I.getOperand(i: 3).getImm(); |
2793 | unsigned Width = MRI.getType(Reg: I.getOperand(i: 2).getReg()).getSizeInBits(); |
2794 | I.getOperand(i: 3).setImm((DstSize - LSB) % DstSize); |
2795 | MachineInstrBuilder(MF, I).addImm(Val: Width - 1); |
2796 | |
2797 | if (DstSize < 64) { |
2798 | assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && |
2799 | "unexpected G_INSERT types" ); |
2800 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2801 | } |
2802 | |
2803 | Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2804 | BuildMI(BB&: MBB, I: I.getIterator(), MIMD: I.getDebugLoc(), |
2805 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG)) |
2806 | .addDef(RegNo: SrcReg) |
2807 | .addImm(Val: 0) |
2808 | .addUse(RegNo: I.getOperand(i: 2).getReg()) |
2809 | .addImm(Val: AArch64::sub_32); |
2810 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 2).getReg(), |
2811 | RC: AArch64::GPR32RegClass, MRI); |
2812 | I.getOperand(i: 2).setReg(SrcReg); |
2813 | |
2814 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2815 | } |
2816 | case TargetOpcode::G_FRAME_INDEX: { |
2817 | // allocas and G_FRAME_INDEX are only supported in addrspace(0). |
2818 | if (Ty != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
2819 | LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty |
2820 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
2821 | return false; |
2822 | } |
2823 | I.setDesc(TII.get(Opcode: AArch64::ADDXri)); |
2824 | |
2825 | // MOs for a #0 shifted immediate. |
2826 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2827 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2828 | |
2829 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2830 | } |
2831 | |
2832 | case TargetOpcode::G_GLOBAL_VALUE: { |
2833 | const GlobalValue *GV = nullptr; |
2834 | unsigned OpFlags; |
2835 | if (I.getOperand(i: 1).isSymbol()) { |
2836 | OpFlags = I.getOperand(i: 1).getTargetFlags(); |
2837 | // Currently only used by "RtLibUseGOT". |
2838 | assert(OpFlags == AArch64II::MO_GOT); |
2839 | } else { |
2840 | GV = I.getOperand(i: 1).getGlobal(); |
2841 | if (GV->isThreadLocal()) |
2842 | return selectTLSGlobalValue(I, MRI); |
2843 | OpFlags = STI.ClassifyGlobalReference(GV, TM); |
2844 | } |
2845 | |
2846 | if (OpFlags & AArch64II::MO_GOT) { |
2847 | I.setDesc(TII.get(Opcode: AArch64::LOADgot)); |
2848 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2849 | } else if (TM.getCodeModel() == CodeModel::Large && |
2850 | !TM.isPositionIndependent()) { |
2851 | // Materialize the global using movz/movk instructions. |
2852 | materializeLargeCMVal(I, V: GV, OpFlags); |
2853 | I.eraseFromParent(); |
2854 | return true; |
2855 | } else if (TM.getCodeModel() == CodeModel::Tiny) { |
2856 | I.setDesc(TII.get(Opcode: AArch64::ADR)); |
2857 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2858 | } else { |
2859 | I.setDesc(TII.get(Opcode: AArch64::MOVaddr)); |
2860 | I.getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); |
2861 | MachineInstrBuilder MIB(MF, I); |
2862 | MIB.addGlobalAddress(GV, Offset: I.getOperand(i: 1).getOffset(), |
2863 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
2864 | } |
2865 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2866 | } |
2867 | |
2868 | case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE: |
2869 | return selectPtrAuthGlobalValue(I, MRI); |
2870 | |
2871 | case TargetOpcode::G_ZEXTLOAD: |
2872 | case TargetOpcode::G_LOAD: |
2873 | case TargetOpcode::G_STORE: { |
2874 | GLoadStore &LdSt = cast<GLoadStore>(Val&: I); |
2875 | bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; |
2876 | LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg()); |
2877 | |
2878 | if (PtrTy != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
2879 | LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy |
2880 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
2881 | return false; |
2882 | } |
2883 | |
2884 | uint64_t MemSizeInBytes = LdSt.getMemSize().getValue(); |
2885 | unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue(); |
2886 | AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); |
2887 | |
2888 | // Need special instructions for atomics that affect ordering. |
2889 | if (Order != AtomicOrdering::NotAtomic && |
2890 | Order != AtomicOrdering::Unordered && |
2891 | Order != AtomicOrdering::Monotonic) { |
2892 | assert(!isa<GZExtLoad>(LdSt)); |
2893 | assert(MemSizeInBytes <= 8 && |
2894 | "128-bit atomics should already be custom-legalized" ); |
2895 | |
2896 | if (isa<GLoad>(Val: LdSt)) { |
2897 | static constexpr unsigned LDAPROpcodes[] = { |
2898 | AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX}; |
2899 | static constexpr unsigned LDAROpcodes[] = { |
2900 | AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; |
2901 | ArrayRef<unsigned> Opcodes = |
2902 | STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent |
2903 | ? LDAPROpcodes |
2904 | : LDAROpcodes; |
2905 | I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)])); |
2906 | } else { |
2907 | static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, |
2908 | AArch64::STLRW, AArch64::STLRX}; |
2909 | Register ValReg = LdSt.getReg(Idx: 0); |
2910 | if (MRI.getType(Reg: ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { |
2911 | // Emit a subreg copy of 32 bits. |
2912 | Register NewVal = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
2913 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {NewVal}, SrcOps: {}) |
2914 | .addReg(RegNo: I.getOperand(i: 0).getReg(), flags: 0, SubReg: AArch64::sub_32); |
2915 | I.getOperand(i: 0).setReg(NewVal); |
2916 | } |
2917 | I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)])); |
2918 | } |
2919 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2920 | return true; |
2921 | } |
2922 | |
2923 | #ifndef NDEBUG |
2924 | const Register PtrReg = LdSt.getPointerReg(); |
2925 | const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); |
2926 | // Check that the pointer register is valid. |
2927 | assert(PtrRB.getID() == AArch64::GPRRegBankID && |
2928 | "Load/Store pointer operand isn't a GPR" ); |
2929 | assert(MRI.getType(PtrReg).isPointer() && |
2930 | "Load/Store pointer operand isn't a pointer" ); |
2931 | #endif |
2932 | |
2933 | const Register ValReg = LdSt.getReg(Idx: 0); |
2934 | const LLT ValTy = MRI.getType(Reg: ValReg); |
2935 | const RegisterBank &RB = *RBI.getRegBank(Reg: ValReg, MRI, TRI); |
2936 | |
2937 | // The code below doesn't support truncating stores, so we need to split it |
2938 | // again. |
2939 | if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
2940 | unsigned SubReg; |
2941 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
2942 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
2943 | if (!getSubRegForClass(RC, TRI, SubReg)) |
2944 | return false; |
2945 | |
2946 | // Generate a subreg copy. |
2947 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {}) |
2948 | .addReg(RegNo: ValReg, flags: 0, SubReg) |
2949 | .getReg(Idx: 0); |
2950 | RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI); |
2951 | LdSt.getOperand(i: 0).setReg(Copy); |
2952 | } else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
2953 | // If this is an any-extending load from the FPR bank, split it into a regular |
2954 | // load + extend. |
2955 | if (RB.getID() == AArch64::FPRRegBankID) { |
2956 | unsigned SubReg; |
2957 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
2958 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
2959 | if (!getSubRegForClass(RC, TRI, SubReg)) |
2960 | return false; |
2961 | Register OldDst = LdSt.getReg(Idx: 0); |
2962 | Register NewDst = |
2963 | MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType()); |
2964 | LdSt.getOperand(i: 0).setReg(NewDst); |
2965 | MRI.setRegBank(Reg: NewDst, RegBank: RB); |
2966 | // Generate a SUBREG_TO_REG to extend it. |
2967 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator())); |
2968 | MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {OldDst}, SrcOps: {}) |
2969 | .addImm(Val: 0) |
2970 | .addUse(RegNo: NewDst) |
2971 | .addImm(Val: SubReg); |
2972 | auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB); |
2973 | RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI); |
2974 | MIB.setInstr(LdSt); |
2975 | } |
2976 | } |
2977 | |
2978 | // Helper lambda for partially selecting I. Either returns the original |
2979 | // instruction with an updated opcode, or a new instruction. |
2980 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { |
2981 | bool IsStore = isa<GStore>(Val: I); |
2982 | const unsigned NewOpc = |
2983 | selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits); |
2984 | if (NewOpc == I.getOpcode()) |
2985 | return nullptr; |
2986 | // Check if we can fold anything into the addressing mode. |
2987 | auto AddrModeFns = |
2988 | selectAddrModeIndexed(Root&: I.getOperand(i: 1), Size: MemSizeInBytes); |
2989 | if (!AddrModeFns) { |
2990 | // Can't fold anything. Use the original instruction. |
2991 | I.setDesc(TII.get(Opcode: NewOpc)); |
2992 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2993 | return &I; |
2994 | } |
2995 | |
2996 | // Folded something. Create a new instruction and return it. |
2997 | auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags()); |
2998 | Register CurValReg = I.getOperand(i: 0).getReg(); |
2999 | IsStore ? NewInst.addUse(RegNo: CurValReg) : NewInst.addDef(RegNo: CurValReg); |
3000 | NewInst.cloneMemRefs(OtherMI: I); |
3001 | for (auto &Fn : *AddrModeFns) |
3002 | Fn(NewInst); |
3003 | I.eraseFromParent(); |
3004 | return &*NewInst; |
3005 | }; |
3006 | |
3007 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); |
3008 | if (!LoadStore) |
3009 | return false; |
3010 | |
3011 | // If we're storing a 0, use WZR/XZR. |
3012 | if (Opcode == TargetOpcode::G_STORE) { |
3013 | auto CVal = getIConstantVRegValWithLookThrough( |
3014 | VReg: LoadStore->getOperand(i: 0).getReg(), MRI); |
3015 | if (CVal && CVal->Value == 0) { |
3016 | switch (LoadStore->getOpcode()) { |
3017 | case AArch64::STRWui: |
3018 | case AArch64::STRHHui: |
3019 | case AArch64::STRBBui: |
3020 | LoadStore->getOperand(i: 0).setReg(AArch64::WZR); |
3021 | break; |
3022 | case AArch64::STRXui: |
3023 | LoadStore->getOperand(i: 0).setReg(AArch64::XZR); |
3024 | break; |
3025 | } |
3026 | } |
3027 | } |
3028 | |
3029 | if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD && |
3030 | ValTy == LLT::scalar(SizeInBits: 64) && MemSizeInBits == 32)) { |
3031 | // The any/zextload from a smaller type to i32 should be handled by the |
3032 | // importer. |
3033 | if (MRI.getType(Reg: LoadStore->getOperand(i: 0).getReg()).getSizeInBits() != 64) |
3034 | return false; |
3035 | // If we have an extending load then change the load's type to be a |
3036 | // narrower reg and zero_extend with SUBREG_TO_REG. |
3037 | Register LdReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
3038 | Register DstReg = LoadStore->getOperand(i: 0).getReg(); |
3039 | LoadStore->getOperand(i: 0).setReg(LdReg); |
3040 | |
3041 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator())); |
3042 | MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DstReg}, SrcOps: {}) |
3043 | .addImm(Val: 0) |
3044 | .addUse(RegNo: LdReg) |
3045 | .addImm(Val: AArch64::sub_32); |
3046 | constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI); |
3047 | return RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64allRegClass, |
3048 | MRI); |
3049 | } |
3050 | return constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI); |
3051 | } |
3052 | |
3053 | case TargetOpcode::G_INDEXED_ZEXTLOAD: |
3054 | case TargetOpcode::G_INDEXED_SEXTLOAD: |
3055 | return selectIndexedExtLoad(I, MRI); |
3056 | case TargetOpcode::G_INDEXED_LOAD: |
3057 | return selectIndexedLoad(I, MRI); |
3058 | case TargetOpcode::G_INDEXED_STORE: |
3059 | return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI); |
3060 | |
3061 | case TargetOpcode::G_LSHR: |
3062 | case TargetOpcode::G_ASHR: |
3063 | if (MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3064 | return selectVectorAshrLshr(I, MRI); |
3065 | [[fallthrough]]; |
3066 | case TargetOpcode::G_SHL: |
3067 | if (Opcode == TargetOpcode::G_SHL && |
3068 | MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3069 | return selectVectorSHL(I, MRI); |
3070 | |
3071 | // These shifts were legalized to have 64 bit shift amounts because we |
3072 | // want to take advantage of the selection patterns that assume the |
3073 | // immediates are s64s, however, selectBinaryOp will assume both operands |
3074 | // will have the same bit size. |
3075 | { |
3076 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3077 | Register ShiftReg = I.getOperand(i: 2).getReg(); |
3078 | const LLT ShiftTy = MRI.getType(Reg: ShiftReg); |
3079 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3080 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && |
3081 | ShiftTy.getSizeInBits() == 64) { |
3082 | assert(!ShiftTy.isVector() && "unexpected vector shift ty" ); |
3083 | // Insert a subregister copy to implement a 64->32 trunc |
3084 | auto Trunc = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {SrcTy}, SrcOps: {}) |
3085 | .addReg(RegNo: ShiftReg, flags: 0, SubReg: AArch64::sub_32); |
3086 | MRI.setRegBank(Reg: Trunc.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID)); |
3087 | I.getOperand(i: 2).setReg(Trunc.getReg(Idx: 0)); |
3088 | } |
3089 | } |
3090 | [[fallthrough]]; |
3091 | case TargetOpcode::G_OR: { |
3092 | // Reject the various things we don't support yet. |
3093 | if (unsupportedBinOp(I, RBI, MRI, TRI)) |
3094 | return false; |
3095 | |
3096 | const unsigned OpSize = Ty.getSizeInBits(); |
3097 | |
3098 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3099 | const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI); |
3100 | |
3101 | const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize); |
3102 | if (NewOpc == I.getOpcode()) |
3103 | return false; |
3104 | |
3105 | I.setDesc(TII.get(Opcode: NewOpc)); |
3106 | // FIXME: Should the type be always reset in setDesc? |
3107 | |
3108 | // Now that we selected an opcode, we need to constrain the register |
3109 | // operands to use appropriate classes. |
3110 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3111 | } |
3112 | |
3113 | case TargetOpcode::G_PTR_ADD: { |
3114 | emitADD(DefReg: I.getOperand(i: 0).getReg(), LHS&: I.getOperand(i: 1), RHS&: I.getOperand(i: 2), MIRBuilder&: MIB); |
3115 | I.eraseFromParent(); |
3116 | return true; |
3117 | } |
3118 | |
3119 | case TargetOpcode::G_SADDE: |
3120 | case TargetOpcode::G_UADDE: |
3121 | case TargetOpcode::G_SSUBE: |
3122 | case TargetOpcode::G_USUBE: |
3123 | case TargetOpcode::G_SADDO: |
3124 | case TargetOpcode::G_UADDO: |
3125 | case TargetOpcode::G_SSUBO: |
3126 | case TargetOpcode::G_USUBO: |
3127 | return selectOverflowOp(I, MRI); |
3128 | |
3129 | case TargetOpcode::G_PTRMASK: { |
3130 | Register MaskReg = I.getOperand(i: 2).getReg(); |
3131 | std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI); |
3132 | // TODO: Implement arbitrary cases |
3133 | if (!MaskVal || !isShiftedMask_64(Value: *MaskVal)) |
3134 | return false; |
3135 | |
3136 | uint64_t Mask = *MaskVal; |
3137 | I.setDesc(TII.get(Opcode: AArch64::ANDXri)); |
3138 | I.getOperand(i: 2).ChangeToImmediate( |
3139 | ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: 64)); |
3140 | |
3141 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3142 | } |
3143 | case TargetOpcode::G_PTRTOINT: |
3144 | case TargetOpcode::G_TRUNC: { |
3145 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3146 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3147 | |
3148 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3149 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3150 | |
3151 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
3152 | const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
3153 | |
3154 | if (DstRB.getID() != SrcRB.getID()) { |
3155 | LLVM_DEBUG( |
3156 | dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ); |
3157 | return false; |
3158 | } |
3159 | |
3160 | if (DstRB.getID() == AArch64::GPRRegBankID) { |
3161 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3162 | if (!DstRC) |
3163 | return false; |
3164 | |
3165 | const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB); |
3166 | if (!SrcRC) |
3167 | return false; |
3168 | |
3169 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
3170 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
3171 | LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n" ); |
3172 | return false; |
3173 | } |
3174 | |
3175 | if (DstRC == SrcRC) { |
3176 | // Nothing to be done |
3177 | } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: 32) && |
3178 | SrcTy == LLT::scalar(SizeInBits: 64)) { |
3179 | llvm_unreachable("TableGen can import this case" ); |
3180 | return false; |
3181 | } else if (DstRC == &AArch64::GPR32RegClass && |
3182 | SrcRC == &AArch64::GPR64RegClass) { |
3183 | I.getOperand(i: 1).setSubReg(AArch64::sub_32); |
3184 | } else { |
3185 | LLVM_DEBUG( |
3186 | dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ); |
3187 | return false; |
3188 | } |
3189 | |
3190 | I.setDesc(TII.get(Opcode: TargetOpcode::COPY)); |
3191 | return true; |
3192 | } else if (DstRB.getID() == AArch64::FPRRegBankID) { |
3193 | if (DstTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) && |
3194 | SrcTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
3195 | I.setDesc(TII.get(Opcode: AArch64::XTNv4i16)); |
3196 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3197 | return true; |
3198 | } |
3199 | |
3200 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { |
3201 | MachineInstr * = emitExtractVectorElt( |
3202 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: 0, MIRBuilder&: MIB); |
3203 | if (!Extract) |
3204 | return false; |
3205 | I.eraseFromParent(); |
3206 | return true; |
3207 | } |
3208 | |
3209 | // We might have a vector G_PTRTOINT, in which case just emit a COPY. |
3210 | if (Opcode == TargetOpcode::G_PTRTOINT) { |
3211 | assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector" ); |
3212 | I.setDesc(TII.get(Opcode: TargetOpcode::COPY)); |
3213 | return selectCopy(I, TII, MRI, TRI, RBI); |
3214 | } |
3215 | } |
3216 | |
3217 | return false; |
3218 | } |
3219 | |
3220 | case TargetOpcode::G_ANYEXT: { |
3221 | if (selectUSMovFromExtend(I, MRI)) |
3222 | return true; |
3223 | |
3224 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3225 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3226 | |
3227 | const RegisterBank &RBDst = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
3228 | if (RBDst.getID() != AArch64::GPRRegBankID) { |
3229 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst |
3230 | << ", expected: GPR\n" ); |
3231 | return false; |
3232 | } |
3233 | |
3234 | const RegisterBank &RBSrc = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
3235 | if (RBSrc.getID() != AArch64::GPRRegBankID) { |
3236 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc |
3237 | << ", expected: GPR\n" ); |
3238 | return false; |
3239 | } |
3240 | |
3241 | const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits(); |
3242 | |
3243 | if (DstSize == 0) { |
3244 | LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n" ); |
3245 | return false; |
3246 | } |
3247 | |
3248 | if (DstSize != 64 && DstSize > 32) { |
3249 | LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize |
3250 | << ", expected: 32 or 64\n" ); |
3251 | return false; |
3252 | } |
3253 | // At this point G_ANYEXT is just like a plain COPY, but we need |
3254 | // to explicitly form the 64-bit value if any. |
3255 | if (DstSize > 32) { |
3256 | Register ExtSrc = MRI.createVirtualRegister(RegClass: &AArch64::GPR64allRegClass); |
3257 | BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG)) |
3258 | .addDef(RegNo: ExtSrc) |
3259 | .addImm(Val: 0) |
3260 | .addUse(RegNo: SrcReg) |
3261 | .addImm(Val: AArch64::sub_32); |
3262 | I.getOperand(i: 1).setReg(ExtSrc); |
3263 | } |
3264 | return selectCopy(I, TII, MRI, TRI, RBI); |
3265 | } |
3266 | |
3267 | case TargetOpcode::G_ZEXT: |
3268 | case TargetOpcode::G_SEXT_INREG: |
3269 | case TargetOpcode::G_SEXT: { |
3270 | if (selectUSMovFromExtend(I, MRI)) |
3271 | return true; |
3272 | |
3273 | unsigned Opcode = I.getOpcode(); |
3274 | const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; |
3275 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3276 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3277 | const LLT DstTy = MRI.getType(Reg: DefReg); |
3278 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3279 | unsigned DstSize = DstTy.getSizeInBits(); |
3280 | unsigned SrcSize = SrcTy.getSizeInBits(); |
3281 | |
3282 | // SEXT_INREG has the same src reg size as dst, the size of the value to be |
3283 | // extended is encoded in the imm. |
3284 | if (Opcode == TargetOpcode::G_SEXT_INREG) |
3285 | SrcSize = I.getOperand(i: 2).getImm(); |
3286 | |
3287 | if (DstTy.isVector()) |
3288 | return false; // Should be handled by imported patterns. |
3289 | |
3290 | assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == |
3291 | AArch64::GPRRegBankID && |
3292 | "Unexpected ext regbank" ); |
3293 | |
3294 | MachineInstr *ExtI; |
3295 | |
3296 | // First check if we're extending the result of a load which has a dest type |
3297 | // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest |
3298 | // GPR register on AArch64 and all loads which are smaller automatically |
3299 | // zero-extend the upper bits. E.g. |
3300 | // %v(s8) = G_LOAD %p, :: (load 1) |
3301 | // %v2(s32) = G_ZEXT %v(s8) |
3302 | if (!IsSigned) { |
3303 | auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI); |
3304 | bool IsGPR = |
3305 | RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; |
3306 | if (LoadMI && IsGPR) { |
3307 | const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); |
3308 | unsigned BytesLoaded = MemOp->getSize().getValue(); |
3309 | if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) |
3310 | return selectCopy(I, TII, MRI, TRI, RBI); |
3311 | } |
3312 | |
3313 | // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) |
3314 | // + SUBREG_TO_REG. |
3315 | if (IsGPR && SrcSize == 32 && DstSize == 64) { |
3316 | Register SubregToRegSrc = |
3317 | MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
3318 | const Register ZReg = AArch64::WZR; |
3319 | MIB.buildInstr(Opc: AArch64::ORRWrs, DstOps: {SubregToRegSrc}, SrcOps: {ZReg, SrcReg}) |
3320 | .addImm(Val: 0); |
3321 | |
3322 | MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {}) |
3323 | .addImm(Val: 0) |
3324 | .addUse(RegNo: SubregToRegSrc) |
3325 | .addImm(Val: AArch64::sub_32); |
3326 | |
3327 | if (!RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, |
3328 | MRI)) { |
3329 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n" ); |
3330 | return false; |
3331 | } |
3332 | |
3333 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass, |
3334 | MRI)) { |
3335 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n" ); |
3336 | return false; |
3337 | } |
3338 | |
3339 | I.eraseFromParent(); |
3340 | return true; |
3341 | } |
3342 | } |
3343 | |
3344 | if (DstSize == 64) { |
3345 | if (Opcode != TargetOpcode::G_SEXT_INREG) { |
3346 | // FIXME: Can we avoid manually doing this? |
3347 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass, |
3348 | MRI)) { |
3349 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) |
3350 | << " operand\n" ); |
3351 | return false; |
3352 | } |
3353 | SrcReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, |
3354 | DstOps: {&AArch64::GPR64RegClass}, SrcOps: {}) |
3355 | .addImm(Val: 0) |
3356 | .addUse(RegNo: SrcReg) |
3357 | .addImm(Val: AArch64::sub_32) |
3358 | .getReg(Idx: 0); |
3359 | } |
3360 | |
3361 | ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, |
3362 | DstOps: {DefReg}, SrcOps: {SrcReg}) |
3363 | .addImm(Val: 0) |
3364 | .addImm(Val: SrcSize - 1); |
3365 | } else if (DstSize <= 32) { |
3366 | ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, |
3367 | DstOps: {DefReg}, SrcOps: {SrcReg}) |
3368 | .addImm(Val: 0) |
3369 | .addImm(Val: SrcSize - 1); |
3370 | } else { |
3371 | return false; |
3372 | } |
3373 | |
3374 | constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI); |
3375 | I.eraseFromParent(); |
3376 | return true; |
3377 | } |
3378 | |
3379 | case TargetOpcode::G_SITOFP: |
3380 | case TargetOpcode::G_UITOFP: |
3381 | case TargetOpcode::G_FPTOSI: |
3382 | case TargetOpcode::G_FPTOUI: { |
3383 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()), |
3384 | SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3385 | const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy); |
3386 | if (NewOpc == Opcode) |
3387 | return false; |
3388 | |
3389 | I.setDesc(TII.get(Opcode: NewOpc)); |
3390 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3391 | I.setFlags(MachineInstr::NoFPExcept); |
3392 | |
3393 | return true; |
3394 | } |
3395 | |
3396 | case TargetOpcode::G_FREEZE: |
3397 | return selectCopy(I, TII, MRI, TRI, RBI); |
3398 | |
3399 | case TargetOpcode::G_INTTOPTR: |
3400 | // The importer is currently unable to import pointer types since they |
3401 | // didn't exist in SelectionDAG. |
3402 | return selectCopy(I, TII, MRI, TRI, RBI); |
3403 | |
3404 | case TargetOpcode::G_BITCAST: |
3405 | // Imported SelectionDAG rules can handle every bitcast except those that |
3406 | // bitcast from a type to the same type. Ideally, these shouldn't occur |
3407 | // but we might not run an optimizer that deletes them. The other exception |
3408 | // is bitcasts involving pointer types, as SelectionDAG has no knowledge |
3409 | // of them. |
3410 | return selectCopy(I, TII, MRI, TRI, RBI); |
3411 | |
3412 | case TargetOpcode::G_SELECT: { |
3413 | auto &Sel = cast<GSelect>(Val&: I); |
3414 | const Register CondReg = Sel.getCondReg(); |
3415 | const Register TReg = Sel.getTrueReg(); |
3416 | const Register FReg = Sel.getFalseReg(); |
3417 | |
3418 | if (tryOptSelect(Sel)) |
3419 | return true; |
3420 | |
3421 | // Make sure to use an unused vreg instead of wzr, so that the peephole |
3422 | // optimizations will be able to optimize these. |
3423 | Register DeadVReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
3424 | auto TstMI = MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {DeadVReg}, SrcOps: {CondReg}) |
3425 | .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 1, regSize: 32)); |
3426 | constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI); |
3427 | if (!emitSelect(Dst: Sel.getReg(Idx: 0), True: TReg, False: FReg, CC: AArch64CC::NE, MIB)) |
3428 | return false; |
3429 | Sel.eraseFromParent(); |
3430 | return true; |
3431 | } |
3432 | case TargetOpcode::G_ICMP: { |
3433 | if (Ty.isVector()) |
3434 | return false; |
3435 | |
3436 | if (Ty != LLT::scalar(SizeInBits: 32)) { |
3437 | LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty |
3438 | << ", expected: " << LLT::scalar(32) << '\n'); |
3439 | return false; |
3440 | } |
3441 | |
3442 | auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3443 | const AArch64CC::CondCode InvCC = |
3444 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
3445 | emitIntegerCompare(LHS&: I.getOperand(i: 2), RHS&: I.getOperand(i: 3), Predicate&: I.getOperand(i: 1), MIRBuilder&: MIB); |
3446 | emitCSINC(/*Dst=*/I.getOperand(i: 0).getReg(), /*Src1=*/AArch64::WZR, |
3447 | /*Src2=*/AArch64::WZR, Pred: InvCC, MIRBuilder&: MIB); |
3448 | I.eraseFromParent(); |
3449 | return true; |
3450 | } |
3451 | |
3452 | case TargetOpcode::G_FCMP: { |
3453 | CmpInst::Predicate Pred = |
3454 | static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3455 | if (!emitFPCompare(LHS: I.getOperand(i: 2).getReg(), RHS: I.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
3456 | Pred) || |
3457 | !emitCSetForFCmp(Dst: I.getOperand(i: 0).getReg(), Pred, MIRBuilder&: MIB)) |
3458 | return false; |
3459 | I.eraseFromParent(); |
3460 | return true; |
3461 | } |
3462 | case TargetOpcode::G_VASTART: |
3463 | return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) |
3464 | : selectVaStartAAPCS(I, MF, MRI); |
3465 | case TargetOpcode::G_INTRINSIC: |
3466 | return selectIntrinsic(I, MRI); |
3467 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
3468 | return selectIntrinsicWithSideEffects(I, MRI); |
3469 | case TargetOpcode::G_IMPLICIT_DEF: { |
3470 | I.setDesc(TII.get(Opcode: TargetOpcode::IMPLICIT_DEF)); |
3471 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3472 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3473 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
3474 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3475 | RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI); |
3476 | return true; |
3477 | } |
3478 | case TargetOpcode::G_BLOCK_ADDR: { |
3479 | Function *BAFn = I.getOperand(i: 1).getBlockAddress()->getFunction(); |
3480 | if (std::optional<uint16_t> BADisc = |
3481 | STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: *BAFn)) { |
3482 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {}); |
3483 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
3484 | MIB.buildInstr(Opcode: AArch64::MOVaddrPAC) |
3485 | .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress()) |
3486 | .addImm(Val: AArch64PACKey::IA) |
3487 | .addReg(/*AddrDisc=*/RegNo: AArch64::XZR) |
3488 | .addImm(Val: *BADisc) |
3489 | .constrainAllUses(TII, TRI, RBI); |
3490 | MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X16)); |
3491 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), |
3492 | RC: AArch64::GPR64RegClass, MRI); |
3493 | I.eraseFromParent(); |
3494 | return true; |
3495 | } |
3496 | if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) { |
3497 | materializeLargeCMVal(I, V: I.getOperand(i: 1).getBlockAddress(), OpFlags: 0); |
3498 | I.eraseFromParent(); |
3499 | return true; |
3500 | } else { |
3501 | I.setDesc(TII.get(Opcode: AArch64::MOVaddrBA)); |
3502 | auto MovMI = BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVaddrBA), |
3503 | DestReg: I.getOperand(i: 0).getReg()) |
3504 | .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress(), |
3505 | /* Offset */ 0, TargetFlags: AArch64II::MO_PAGE) |
3506 | .addBlockAddress( |
3507 | BA: I.getOperand(i: 1).getBlockAddress(), /* Offset */ 0, |
3508 | TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3509 | I.eraseFromParent(); |
3510 | return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI); |
3511 | } |
3512 | } |
3513 | case AArch64::G_DUP: { |
3514 | // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by |
3515 | // imported patterns. Do it manually here. Avoiding generating s16 gpr is |
3516 | // difficult because at RBS we may end up pessimizing the fpr case if we |
3517 | // decided to add an anyextend to fix this. Manual selection is the most |
3518 | // robust solution for now. |
3519 | if (RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() != |
3520 | AArch64::GPRRegBankID) |
3521 | return false; // We expect the fpr regbank case to be imported. |
3522 | LLT VecTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3523 | if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) |
3524 | I.setDesc(TII.get(Opcode: AArch64::DUPv8i8gpr)); |
3525 | else if (VecTy == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) |
3526 | I.setDesc(TII.get(Opcode: AArch64::DUPv16i8gpr)); |
3527 | else if (VecTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) |
3528 | I.setDesc(TII.get(Opcode: AArch64::DUPv4i16gpr)); |
3529 | else if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) |
3530 | I.setDesc(TII.get(Opcode: AArch64::DUPv8i16gpr)); |
3531 | else |
3532 | return false; |
3533 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3534 | } |
3535 | case TargetOpcode::G_BUILD_VECTOR: |
3536 | return selectBuildVector(I, MRI); |
3537 | case TargetOpcode::G_MERGE_VALUES: |
3538 | return selectMergeValues(I, MRI); |
3539 | case TargetOpcode::G_UNMERGE_VALUES: |
3540 | return selectUnmergeValues(I, MRI); |
3541 | case TargetOpcode::G_SHUFFLE_VECTOR: |
3542 | return selectShuffleVector(I, MRI); |
3543 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: |
3544 | return selectExtractElt(I, MRI); |
3545 | case TargetOpcode::G_CONCAT_VECTORS: |
3546 | return selectConcatVectors(I, MRI); |
3547 | case TargetOpcode::G_JUMP_TABLE: |
3548 | return selectJumpTable(I, MRI); |
3549 | case TargetOpcode::G_MEMCPY: |
3550 | case TargetOpcode::G_MEMCPY_INLINE: |
3551 | case TargetOpcode::G_MEMMOVE: |
3552 | case TargetOpcode::G_MEMSET: |
3553 | assert(STI.hasMOPS() && "Shouldn't get here without +mops feature" ); |
3554 | return selectMOPS(I, MRI); |
3555 | } |
3556 | |
3557 | return false; |
3558 | } |
3559 | |
3560 | bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) { |
3561 | MachineIRBuilderState OldMIBState = MIB.getState(); |
3562 | bool Success = select(I); |
3563 | MIB.setState(OldMIBState); |
3564 | return Success; |
3565 | } |
3566 | |
3567 | bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, |
3568 | MachineRegisterInfo &MRI) { |
3569 | unsigned Mopcode; |
3570 | switch (GI.getOpcode()) { |
3571 | case TargetOpcode::G_MEMCPY: |
3572 | case TargetOpcode::G_MEMCPY_INLINE: |
3573 | Mopcode = AArch64::MOPSMemoryCopyPseudo; |
3574 | break; |
3575 | case TargetOpcode::G_MEMMOVE: |
3576 | Mopcode = AArch64::MOPSMemoryMovePseudo; |
3577 | break; |
3578 | case TargetOpcode::G_MEMSET: |
3579 | // For tagged memset see llvm.aarch64.mops.memset.tag |
3580 | Mopcode = AArch64::MOPSMemorySetPseudo; |
3581 | break; |
3582 | } |
3583 | |
3584 | auto &DstPtr = GI.getOperand(i: 0); |
3585 | auto &SrcOrVal = GI.getOperand(i: 1); |
3586 | auto &Size = GI.getOperand(i: 2); |
3587 | |
3588 | // Create copies of the registers that can be clobbered. |
3589 | const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg()); |
3590 | const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg()); |
3591 | const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg()); |
3592 | |
3593 | const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; |
3594 | const auto &SrcValRegClass = |
3595 | IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; |
3596 | |
3597 | // Constrain to specific registers |
3598 | RBI.constrainGenericRegister(Reg: DstPtrCopy, RC: AArch64::GPR64commonRegClass, MRI); |
3599 | RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI); |
3600 | RBI.constrainGenericRegister(Reg: SizeCopy, RC: AArch64::GPR64RegClass, MRI); |
3601 | |
3602 | MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr); |
3603 | MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal); |
3604 | MIB.buildCopy(Res: SizeCopy, Op: Size); |
3605 | |
3606 | // New instruction uses the copied registers because it must update them. |
3607 | // The defs are not used since they don't exist in G_MEM*. They are still |
3608 | // tied. |
3609 | // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE |
3610 | Register DefDstPtr = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass); |
3611 | Register DefSize = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
3612 | if (IsSet) { |
3613 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize}, |
3614 | SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy}); |
3615 | } else { |
3616 | Register DefSrcPtr = MRI.createVirtualRegister(RegClass: &SrcValRegClass); |
3617 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize}, |
3618 | SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy}); |
3619 | } |
3620 | |
3621 | GI.eraseFromParent(); |
3622 | return true; |
3623 | } |
3624 | |
3625 | bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, |
3626 | MachineRegisterInfo &MRI) { |
3627 | assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT" ); |
3628 | Register JTAddr = I.getOperand(i: 0).getReg(); |
3629 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3630 | Register Index = I.getOperand(i: 2).getReg(); |
3631 | |
3632 | MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: 4, PCRelSym: nullptr); |
3633 | |
3634 | // With aarch64-jump-table-hardening, we only expand the jump table dispatch |
3635 | // sequence later, to guarantee the integrity of the intermediate values. |
3636 | if (MF->getFunction().hasFnAttribute(Kind: "aarch64-jump-table-hardening" )) { |
3637 | CodeModel::Model CM = TM.getCodeModel(); |
3638 | if (STI.isTargetMachO()) { |
3639 | if (CM != CodeModel::Small && CM != CodeModel::Large) |
3640 | report_fatal_error(reason: "Unsupported code-model for hardened jump-table" ); |
3641 | } else { |
3642 | // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO. |
3643 | assert(STI.isTargetELF() && |
3644 | "jump table hardening only supported on MachO/ELF" ); |
3645 | if (CM != CodeModel::Small) |
3646 | report_fatal_error(reason: "Unsupported code-model for hardened jump-table" ); |
3647 | } |
3648 | |
3649 | MIB.buildCopy(Res: {AArch64::X16}, Op: I.getOperand(i: 2).getReg()); |
3650 | MIB.buildInstr(Opcode: AArch64::BR_JumpTable) |
3651 | .addJumpTableIndex(Idx: I.getOperand(i: 1).getIndex()); |
3652 | I.eraseFromParent(); |
3653 | return true; |
3654 | } |
3655 | |
3656 | Register TargetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
3657 | Register ScratchReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass); |
3658 | |
3659 | auto JumpTableInst = MIB.buildInstr(Opc: AArch64::JumpTableDest32, |
3660 | DstOps: {TargetReg, ScratchReg}, SrcOps: {JTAddr, Index}) |
3661 | .addJumpTableIndex(Idx: JTI); |
3662 | // Save the jump table info. |
3663 | MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {}, |
3664 | SrcOps: {static_cast<int64_t>(JTI)}); |
3665 | // Build the indirect branch. |
3666 | MIB.buildInstr(Opc: AArch64::BR, DstOps: {}, SrcOps: {TargetReg}); |
3667 | I.eraseFromParent(); |
3668 | return constrainSelectedInstRegOperands(I&: *JumpTableInst, TII, TRI, RBI); |
3669 | } |
3670 | |
3671 | bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, |
3672 | MachineRegisterInfo &MRI) { |
3673 | assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table" ); |
3674 | assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!" ); |
3675 | |
3676 | Register DstReg = I.getOperand(i: 0).getReg(); |
3677 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3678 | // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. |
3679 | auto MovMI = |
3680 | MIB.buildInstr(Opc: AArch64::MOVaddrJT, DstOps: {DstReg}, SrcOps: {}) |
3681 | .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_PAGE) |
3682 | .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3683 | I.eraseFromParent(); |
3684 | return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI); |
3685 | } |
3686 | |
3687 | bool AArch64InstructionSelector::selectTLSGlobalValue( |
3688 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3689 | if (!STI.isTargetMachO()) |
3690 | return false; |
3691 | MachineFunction &MF = *I.getParent()->getParent(); |
3692 | MF.getFrameInfo().setAdjustsStack(true); |
3693 | |
3694 | const auto &GlobalOp = I.getOperand(i: 1); |
3695 | assert(GlobalOp.getOffset() == 0 && |
3696 | "Shouldn't have an offset on TLS globals!" ); |
3697 | const GlobalValue &GV = *GlobalOp.getGlobal(); |
3698 | |
3699 | auto LoadGOT = |
3700 | MIB.buildInstr(Opc: AArch64::LOADgot, DstOps: {&AArch64::GPR64commonRegClass}, SrcOps: {}) |
3701 | .addGlobalAddress(GV: &GV, Offset: 0, TargetFlags: AArch64II::MO_TLS); |
3702 | |
3703 | auto Load = MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {&AArch64::GPR64commonRegClass}, |
3704 | SrcOps: {LoadGOT.getReg(Idx: 0)}) |
3705 | .addImm(Val: 0); |
3706 | |
3707 | MIB.buildCopy(Res: Register(AArch64::X0), Op: LoadGOT.getReg(Idx: 0)); |
3708 | // TLS calls preserve all registers except those that absolutely must be |
3709 | // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be |
3710 | // silly). |
3711 | unsigned Opcode = getBLRCallOpcode(MF); |
3712 | |
3713 | // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0). |
3714 | if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-calls" )) { |
3715 | assert(Opcode == AArch64::BLR); |
3716 | Opcode = AArch64::BLRAAZ; |
3717 | } |
3718 | |
3719 | MIB.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: {Load}) |
3720 | .addUse(RegNo: AArch64::X0, Flags: RegState::Implicit) |
3721 | .addDef(RegNo: AArch64::X0, Flags: RegState::Implicit) |
3722 | .addRegMask(Mask: TRI.getTLSCallPreservedMask()); |
3723 | |
3724 | MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X0)); |
3725 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: AArch64::GPR64RegClass, |
3726 | MRI); |
3727 | I.eraseFromParent(); |
3728 | return true; |
3729 | } |
3730 | |
3731 | MachineInstr *AArch64InstructionSelector::emitScalarToVector( |
3732 | unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, |
3733 | MachineIRBuilder &MIRBuilder) const { |
3734 | auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {}); |
3735 | |
3736 | auto BuildFn = [&](unsigned SubregIndex) { |
3737 | auto Ins = |
3738 | MIRBuilder |
3739 | .buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar}) |
3740 | .addImm(Val: SubregIndex); |
3741 | constrainSelectedInstRegOperands(I&: *Undef, TII, TRI, RBI); |
3742 | constrainSelectedInstRegOperands(I&: *Ins, TII, TRI, RBI); |
3743 | return &*Ins; |
3744 | }; |
3745 | |
3746 | switch (EltSize) { |
3747 | case 8: |
3748 | return BuildFn(AArch64::bsub); |
3749 | case 16: |
3750 | return BuildFn(AArch64::hsub); |
3751 | case 32: |
3752 | return BuildFn(AArch64::ssub); |
3753 | case 64: |
3754 | return BuildFn(AArch64::dsub); |
3755 | default: |
3756 | return nullptr; |
3757 | } |
3758 | } |
3759 | |
3760 | MachineInstr * |
3761 | AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg, |
3762 | MachineIRBuilder &MIB, |
3763 | MachineRegisterInfo &MRI) const { |
3764 | LLT DstTy = MRI.getType(Reg: DstReg); |
3765 | const TargetRegisterClass *RC = |
3766 | getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI)); |
3767 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
3768 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
3769 | return nullptr; |
3770 | } |
3771 | unsigned SubReg = 0; |
3772 | if (!getSubRegForClass(RC, TRI, SubReg)) |
3773 | return nullptr; |
3774 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
3775 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" |
3776 | << DstTy.getSizeInBits() << "\n" ); |
3777 | return nullptr; |
3778 | } |
3779 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}) |
3780 | .addReg(RegNo: SrcReg, flags: 0, SubReg); |
3781 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
3782 | return Copy; |
3783 | } |
3784 | |
3785 | bool AArch64InstructionSelector::selectMergeValues( |
3786 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3787 | assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode" ); |
3788 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3789 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3790 | assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation" ); |
3791 | const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI); |
3792 | |
3793 | if (I.getNumOperands() != 3) |
3794 | return false; |
3795 | |
3796 | // Merging 2 s64s into an s128. |
3797 | if (DstTy == LLT::scalar(SizeInBits: 128)) { |
3798 | if (SrcTy.getSizeInBits() != 64) |
3799 | return false; |
3800 | Register DstReg = I.getOperand(i: 0).getReg(); |
3801 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
3802 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
3803 | auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {}); |
3804 | MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: 0), EltReg: Src1Reg, |
3805 | /* LaneIdx */ 0, RB, MIRBuilder&: MIB); |
3806 | if (!InsMI) |
3807 | return false; |
3808 | MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: 0).getReg(), |
3809 | EltReg: Src2Reg, /* LaneIdx */ 1, RB, MIRBuilder&: MIB); |
3810 | if (!Ins2MI) |
3811 | return false; |
3812 | constrainSelectedInstRegOperands(I&: *InsMI, TII, TRI, RBI); |
3813 | constrainSelectedInstRegOperands(I&: *Ins2MI, TII, TRI, RBI); |
3814 | I.eraseFromParent(); |
3815 | return true; |
3816 | } |
3817 | |
3818 | if (RB.getID() != AArch64::GPRRegBankID) |
3819 | return false; |
3820 | |
3821 | if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) |
3822 | return false; |
3823 | |
3824 | auto *DstRC = &AArch64::GPR64RegClass; |
3825 | Register SubToRegDef = MRI.createVirtualRegister(RegClass: DstRC); |
3826 | MachineInstr &SubRegMI = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
3827 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
3828 | .addDef(RegNo: SubToRegDef) |
3829 | .addImm(Val: 0) |
3830 | .addUse(RegNo: I.getOperand(i: 1).getReg()) |
3831 | .addImm(Val: AArch64::sub_32); |
3832 | Register SubToRegDef2 = MRI.createVirtualRegister(RegClass: DstRC); |
3833 | // Need to anyext the second scalar before we can use bfm |
3834 | MachineInstr &SubRegMI2 = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
3835 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
3836 | .addDef(RegNo: SubToRegDef2) |
3837 | .addImm(Val: 0) |
3838 | .addUse(RegNo: I.getOperand(i: 2).getReg()) |
3839 | .addImm(Val: AArch64::sub_32); |
3840 | MachineInstr &BFM = |
3841 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::BFMXri)) |
3842 | .addDef(RegNo: I.getOperand(i: 0).getReg()) |
3843 | .addUse(RegNo: SubToRegDef) |
3844 | .addUse(RegNo: SubToRegDef2) |
3845 | .addImm(Val: 32) |
3846 | .addImm(Val: 31); |
3847 | constrainSelectedInstRegOperands(I&: SubRegMI, TII, TRI, RBI); |
3848 | constrainSelectedInstRegOperands(I&: SubRegMI2, TII, TRI, RBI); |
3849 | constrainSelectedInstRegOperands(I&: BFM, TII, TRI, RBI); |
3850 | I.eraseFromParent(); |
3851 | return true; |
3852 | } |
3853 | |
3854 | static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &, |
3855 | const unsigned EltSize) { |
3856 | // Choose a lane copy opcode and subregister based off of the size of the |
3857 | // vector's elements. |
3858 | switch (EltSize) { |
3859 | case 8: |
3860 | CopyOpc = AArch64::DUPi8; |
3861 | ExtractSubReg = AArch64::bsub; |
3862 | break; |
3863 | case 16: |
3864 | CopyOpc = AArch64::DUPi16; |
3865 | ExtractSubReg = AArch64::hsub; |
3866 | break; |
3867 | case 32: |
3868 | CopyOpc = AArch64::DUPi32; |
3869 | ExtractSubReg = AArch64::ssub; |
3870 | break; |
3871 | case 64: |
3872 | CopyOpc = AArch64::DUPi64; |
3873 | ExtractSubReg = AArch64::dsub; |
3874 | break; |
3875 | default: |
3876 | // Unknown size, bail out. |
3877 | LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n" ); |
3878 | return false; |
3879 | } |
3880 | return true; |
3881 | } |
3882 | |
3883 | MachineInstr *AArch64InstructionSelector::( |
3884 | std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, |
3885 | Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { |
3886 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
3887 | unsigned CopyOpc = 0; |
3888 | unsigned = 0; |
3889 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) { |
3890 | LLVM_DEBUG( |
3891 | dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ); |
3892 | return nullptr; |
3893 | } |
3894 | |
3895 | const TargetRegisterClass *DstRC = |
3896 | getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true); |
3897 | if (!DstRC) { |
3898 | LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n" ); |
3899 | return nullptr; |
3900 | } |
3901 | |
3902 | const RegisterBank &VecRB = *RBI.getRegBank(Reg: VecReg, MRI, TRI); |
3903 | const LLT &VecTy = MRI.getType(Reg: VecReg); |
3904 | const TargetRegisterClass *VecRC = |
3905 | getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true); |
3906 | if (!VecRC) { |
3907 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n" ); |
3908 | return nullptr; |
3909 | } |
3910 | |
3911 | // The register that we're going to copy into. |
3912 | Register InsertReg = VecReg; |
3913 | if (!DstReg) |
3914 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
3915 | // If the lane index is 0, we just use a subregister COPY. |
3916 | if (LaneIdx == 0) { |
3917 | auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {}) |
3918 | .addReg(RegNo: VecReg, flags: 0, SubReg: ExtractSubReg); |
3919 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
3920 | return &*Copy; |
3921 | } |
3922 | |
3923 | // Lane copies require 128-bit wide registers. If we're dealing with an |
3924 | // unpacked vector, then we need to move up to that width. Insert an implicit |
3925 | // def and a subregister insert to get us there. |
3926 | if (VecTy.getSizeInBits() != 128) { |
3927 | MachineInstr *ScalarToVector = emitScalarToVector( |
3928 | EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: VecReg, MIRBuilder); |
3929 | if (!ScalarToVector) |
3930 | return nullptr; |
3931 | InsertReg = ScalarToVector->getOperand(i: 0).getReg(); |
3932 | } |
3933 | |
3934 | MachineInstr *LaneCopyMI = |
3935 | MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx); |
3936 | constrainSelectedInstRegOperands(I&: *LaneCopyMI, TII, TRI, RBI); |
3937 | |
3938 | // Make sure that we actually constrain the initial copy. |
3939 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
3940 | return LaneCopyMI; |
3941 | } |
3942 | |
3943 | bool AArch64InstructionSelector::( |
3944 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3945 | assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && |
3946 | "unexpected opcode!" ); |
3947 | Register DstReg = I.getOperand(i: 0).getReg(); |
3948 | const LLT NarrowTy = MRI.getType(Reg: DstReg); |
3949 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3950 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
3951 | (void)WideTy; |
3952 | assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && |
3953 | "source register size too small!" ); |
3954 | assert(!NarrowTy.isVector() && "cannot extract vector into vector!" ); |
3955 | |
3956 | // Need the lane index to determine the correct copy opcode. |
3957 | MachineOperand &LaneIdxOp = I.getOperand(i: 2); |
3958 | assert(LaneIdxOp.isReg() && "Lane index operand was not a register?" ); |
3959 | |
3960 | if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
3961 | LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n" ); |
3962 | return false; |
3963 | } |
3964 | |
3965 | // Find the index to extract from. |
3966 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI); |
3967 | if (!VRegAndVal) |
3968 | return false; |
3969 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); |
3970 | |
3971 | |
3972 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
3973 | MachineInstr * = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, |
3974 | LaneIdx, MIRBuilder&: MIB); |
3975 | if (!Extract) |
3976 | return false; |
3977 | |
3978 | I.eraseFromParent(); |
3979 | return true; |
3980 | } |
3981 | |
3982 | bool AArch64InstructionSelector::selectSplitVectorUnmerge( |
3983 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3984 | unsigned NumElts = I.getNumOperands() - 1; |
3985 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
3986 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3987 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3988 | |
3989 | assert(NarrowTy.isVector() && "Expected an unmerge into vectors" ); |
3990 | if (SrcTy.getSizeInBits() > 128) { |
3991 | LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge" ); |
3992 | return false; |
3993 | } |
3994 | |
3995 | // We implement a split vector operation by treating the sub-vectors as |
3996 | // scalars and extracting them. |
3997 | const RegisterBank &DstRB = |
3998 | *RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI); |
3999 | for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { |
4000 | Register Dst = I.getOperand(i: OpIdx).getReg(); |
4001 | MachineInstr * = |
4002 | emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB); |
4003 | if (!Extract) |
4004 | return false; |
4005 | } |
4006 | I.eraseFromParent(); |
4007 | return true; |
4008 | } |
4009 | |
4010 | bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, |
4011 | MachineRegisterInfo &MRI) { |
4012 | assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && |
4013 | "unexpected opcode" ); |
4014 | |
4015 | // TODO: Handle unmerging into GPRs and from scalars to scalars. |
4016 | if (RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI)->getID() != |
4017 | AArch64::FPRRegBankID || |
4018 | RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() != |
4019 | AArch64::FPRRegBankID) { |
4020 | LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " |
4021 | "currently unsupported.\n" ); |
4022 | return false; |
4023 | } |
4024 | |
4025 | // The last operand is the vector source register, and every other operand is |
4026 | // a register to unpack into. |
4027 | unsigned NumElts = I.getNumOperands() - 1; |
4028 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
4029 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
4030 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
4031 | (void)WideTy; |
4032 | assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && |
4033 | "can only unmerge from vector or s128 types!" ); |
4034 | assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && |
4035 | "source register size too small!" ); |
4036 | |
4037 | if (!NarrowTy.isScalar()) |
4038 | return selectSplitVectorUnmerge(I, MRI); |
4039 | |
4040 | // Choose a lane copy opcode and subregister based off of the size of the |
4041 | // vector's elements. |
4042 | unsigned CopyOpc = 0; |
4043 | unsigned = 0; |
4044 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits())) |
4045 | return false; |
4046 | |
4047 | // Set up for the lane copies. |
4048 | MachineBasicBlock &MBB = *I.getParent(); |
4049 | |
4050 | // Stores the registers we'll be copying from. |
4051 | SmallVector<Register, 4> InsertRegs; |
4052 | |
4053 | // We'll use the first register twice, so we only need NumElts-1 registers. |
4054 | unsigned NumInsertRegs = NumElts - 1; |
4055 | |
4056 | // If our elements fit into exactly 128 bits, then we can copy from the source |
4057 | // directly. Otherwise, we need to do a bit of setup with some subregister |
4058 | // inserts. |
4059 | if (NarrowTy.getSizeInBits() * NumElts == 128) { |
4060 | InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); |
4061 | } else { |
4062 | // No. We have to perform subregister inserts. For each insert, create an |
4063 | // implicit def and a subregister insert, and save the register we create. |
4064 | const TargetRegisterClass *RC = getRegClassForTypeOnBank( |
4065 | Ty: LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()), |
4066 | RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI)); |
4067 | unsigned SubReg = 0; |
4068 | bool Found = getSubRegForClass(RC, TRI, SubReg); |
4069 | (void)Found; |
4070 | assert(Found && "expected to find last operand's subeg idx" ); |
4071 | for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { |
4072 | Register ImpDefReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass); |
4073 | MachineInstr &ImpDefMI = |
4074 | *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF), |
4075 | DestReg: ImpDefReg); |
4076 | |
4077 | // Now, create the subregister insert from SrcReg. |
4078 | Register InsertReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass); |
4079 | MachineInstr &InsMI = |
4080 | *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), |
4081 | MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: InsertReg) |
4082 | .addUse(RegNo: ImpDefReg) |
4083 | .addUse(RegNo: SrcReg) |
4084 | .addImm(Val: SubReg); |
4085 | |
4086 | constrainSelectedInstRegOperands(I&: ImpDefMI, TII, TRI, RBI); |
4087 | constrainSelectedInstRegOperands(I&: InsMI, TII, TRI, RBI); |
4088 | |
4089 | // Save the register so that we can copy from it after. |
4090 | InsertRegs.push_back(Elt: InsertReg); |
4091 | } |
4092 | } |
4093 | |
4094 | // Now that we've created any necessary subregister inserts, we can |
4095 | // create the copies. |
4096 | // |
4097 | // Perform the first copy separately as a subregister copy. |
4098 | Register CopyTo = I.getOperand(i: 0).getReg(); |
4099 | auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {}) |
4100 | .addReg(RegNo: InsertRegs[0], flags: 0, SubReg: ExtractSubReg); |
4101 | constrainSelectedInstRegOperands(I&: *FirstCopy, TII, TRI, RBI); |
4102 | |
4103 | // Now, perform the remaining copies as vector lane copies. |
4104 | unsigned LaneIdx = 1; |
4105 | for (Register InsReg : InsertRegs) { |
4106 | Register CopyTo = I.getOperand(i: LaneIdx).getReg(); |
4107 | MachineInstr &CopyInst = |
4108 | *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: CopyOpc), DestReg: CopyTo) |
4109 | .addUse(RegNo: InsReg) |
4110 | .addImm(Val: LaneIdx); |
4111 | constrainSelectedInstRegOperands(I&: CopyInst, TII, TRI, RBI); |
4112 | ++LaneIdx; |
4113 | } |
4114 | |
4115 | // Separately constrain the first copy's destination. Because of the |
4116 | // limitation in constrainOperandRegClass, we can't guarantee that this will |
4117 | // actually be constrained. So, do it ourselves using the second operand. |
4118 | const TargetRegisterClass *RC = |
4119 | MRI.getRegClassOrNull(Reg: I.getOperand(i: 1).getReg()); |
4120 | if (!RC) { |
4121 | LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n" ); |
4122 | return false; |
4123 | } |
4124 | |
4125 | RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI); |
4126 | I.eraseFromParent(); |
4127 | return true; |
4128 | } |
4129 | |
4130 | bool AArch64InstructionSelector::selectConcatVectors( |
4131 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4132 | assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && |
4133 | "Unexpected opcode" ); |
4134 | Register Dst = I.getOperand(i: 0).getReg(); |
4135 | Register Op1 = I.getOperand(i: 1).getReg(); |
4136 | Register Op2 = I.getOperand(i: 2).getReg(); |
4137 | MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB); |
4138 | if (!ConcatMI) |
4139 | return false; |
4140 | I.eraseFromParent(); |
4141 | return true; |
4142 | } |
4143 | |
4144 | unsigned |
4145 | AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, |
4146 | MachineFunction &MF) const { |
4147 | Type *CPTy = CPVal->getType(); |
4148 | Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy); |
4149 | |
4150 | MachineConstantPool *MCP = MF.getConstantPool(); |
4151 | return MCP->getConstantPoolIndex(C: CPVal, Alignment); |
4152 | } |
4153 | |
4154 | MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( |
4155 | const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { |
4156 | const TargetRegisterClass *RC; |
4157 | unsigned Opc; |
4158 | bool IsTiny = TM.getCodeModel() == CodeModel::Tiny; |
4159 | unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType()); |
4160 | switch (Size) { |
4161 | case 16: |
4162 | RC = &AArch64::FPR128RegClass; |
4163 | Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui; |
4164 | break; |
4165 | case 8: |
4166 | RC = &AArch64::FPR64RegClass; |
4167 | Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui; |
4168 | break; |
4169 | case 4: |
4170 | RC = &AArch64::FPR32RegClass; |
4171 | Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui; |
4172 | break; |
4173 | case 2: |
4174 | RC = &AArch64::FPR16RegClass; |
4175 | Opc = AArch64::LDRHui; |
4176 | break; |
4177 | default: |
4178 | LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " |
4179 | << *CPVal->getType()); |
4180 | return nullptr; |
4181 | } |
4182 | |
4183 | MachineInstr *LoadMI = nullptr; |
4184 | auto &MF = MIRBuilder.getMF(); |
4185 | unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); |
4186 | if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) { |
4187 | // Use load(literal) for tiny code model. |
4188 | LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx); |
4189 | } else { |
4190 | auto Adrp = |
4191 | MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {}) |
4192 | .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE); |
4193 | |
4194 | LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {Adrp}) |
4195 | .addConstantPoolIndex( |
4196 | Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
4197 | |
4198 | constrainSelectedInstRegOperands(I&: *Adrp, TII, TRI, RBI); |
4199 | } |
4200 | |
4201 | MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); |
4202 | LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo, |
4203 | F: MachineMemOperand::MOLoad, |
4204 | Size, BaseAlignment: Align(Size))); |
4205 | constrainSelectedInstRegOperands(I&: *LoadMI, TII, TRI, RBI); |
4206 | return LoadMI; |
4207 | } |
4208 | |
4209 | /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given |
4210 | /// size and RB. |
4211 | static std::pair<unsigned, unsigned> |
4212 | getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { |
4213 | unsigned Opc, SubregIdx; |
4214 | if (RB.getID() == AArch64::GPRRegBankID) { |
4215 | if (EltSize == 8) { |
4216 | Opc = AArch64::INSvi8gpr; |
4217 | SubregIdx = AArch64::bsub; |
4218 | } else if (EltSize == 16) { |
4219 | Opc = AArch64::INSvi16gpr; |
4220 | SubregIdx = AArch64::ssub; |
4221 | } else if (EltSize == 32) { |
4222 | Opc = AArch64::INSvi32gpr; |
4223 | SubregIdx = AArch64::ssub; |
4224 | } else if (EltSize == 64) { |
4225 | Opc = AArch64::INSvi64gpr; |
4226 | SubregIdx = AArch64::dsub; |
4227 | } else { |
4228 | llvm_unreachable("invalid elt size!" ); |
4229 | } |
4230 | } else { |
4231 | if (EltSize == 8) { |
4232 | Opc = AArch64::INSvi8lane; |
4233 | SubregIdx = AArch64::bsub; |
4234 | } else if (EltSize == 16) { |
4235 | Opc = AArch64::INSvi16lane; |
4236 | SubregIdx = AArch64::hsub; |
4237 | } else if (EltSize == 32) { |
4238 | Opc = AArch64::INSvi32lane; |
4239 | SubregIdx = AArch64::ssub; |
4240 | } else if (EltSize == 64) { |
4241 | Opc = AArch64::INSvi64lane; |
4242 | SubregIdx = AArch64::dsub; |
4243 | } else { |
4244 | llvm_unreachable("invalid elt size!" ); |
4245 | } |
4246 | } |
4247 | return std::make_pair(x&: Opc, y&: SubregIdx); |
4248 | } |
4249 | |
4250 | MachineInstr *AArch64InstructionSelector::emitInstr( |
4251 | unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
4252 | std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, |
4253 | const ComplexRendererFns &RenderFns) const { |
4254 | assert(Opcode && "Expected an opcode?" ); |
4255 | assert(!isPreISelGenericOpcode(Opcode) && |
4256 | "Function should only be used to produce selected instructions!" ); |
4257 | auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps); |
4258 | if (RenderFns) |
4259 | for (auto &Fn : *RenderFns) |
4260 | Fn(MI); |
4261 | constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
4262 | return &*MI; |
4263 | } |
4264 | |
4265 | MachineInstr *AArch64InstructionSelector::emitAddSub( |
4266 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
4267 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
4268 | MachineIRBuilder &MIRBuilder) const { |
4269 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4270 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4271 | auto Ty = MRI.getType(Reg: LHS.getReg()); |
4272 | assert(!Ty.isVector() && "Expected a scalar or pointer?" ); |
4273 | unsigned Size = Ty.getSizeInBits(); |
4274 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only" ); |
4275 | bool Is32Bit = Size == 32; |
4276 | |
4277 | // INSTRri form with positive arithmetic immediate. |
4278 | if (auto Fns = selectArithImmed(Root&: RHS)) |
4279 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[0][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4280 | MIRBuilder, RenderFns: Fns); |
4281 | |
4282 | // INSTRri form with negative arithmetic immediate. |
4283 | if (auto Fns = selectNegArithImmed(Root&: RHS)) |
4284 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[3][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4285 | MIRBuilder, RenderFns: Fns); |
4286 | |
4287 | // INSTRrx form. |
4288 | if (auto Fns = selectArithExtendedRegister(Root&: RHS)) |
4289 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[4][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4290 | MIRBuilder, RenderFns: Fns); |
4291 | |
4292 | // INSTRrs form. |
4293 | if (auto Fns = selectShiftedRegister(Root&: RHS)) |
4294 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[1][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4295 | MIRBuilder, RenderFns: Fns); |
4296 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[2][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, |
4297 | MIRBuilder); |
4298 | } |
4299 | |
4300 | MachineInstr * |
4301 | AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, |
4302 | MachineOperand &RHS, |
4303 | MachineIRBuilder &MIRBuilder) const { |
4304 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4305 | ._M_elems: {{AArch64::ADDXri, AArch64::ADDWri}, |
4306 | {AArch64::ADDXrs, AArch64::ADDWrs}, |
4307 | {AArch64::ADDXrr, AArch64::ADDWrr}, |
4308 | {AArch64::SUBXri, AArch64::SUBWri}, |
4309 | {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
4310 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder); |
4311 | } |
4312 | |
4313 | MachineInstr * |
4314 | AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, |
4315 | MachineOperand &RHS, |
4316 | MachineIRBuilder &MIRBuilder) const { |
4317 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4318 | ._M_elems: {{AArch64::ADDSXri, AArch64::ADDSWri}, |
4319 | {AArch64::ADDSXrs, AArch64::ADDSWrs}, |
4320 | {AArch64::ADDSXrr, AArch64::ADDSWrr}, |
4321 | {AArch64::SUBSXri, AArch64::SUBSWri}, |
4322 | {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; |
4323 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4324 | } |
4325 | |
4326 | MachineInstr * |
4327 | AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, |
4328 | MachineOperand &RHS, |
4329 | MachineIRBuilder &MIRBuilder) const { |
4330 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4331 | ._M_elems: {{AArch64::SUBSXri, AArch64::SUBSWri}, |
4332 | {AArch64::SUBSXrs, AArch64::SUBSWrs}, |
4333 | {AArch64::SUBSXrr, AArch64::SUBSWrr}, |
4334 | {AArch64::ADDSXri, AArch64::ADDSWri}, |
4335 | {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; |
4336 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4337 | } |
4338 | |
4339 | MachineInstr * |
4340 | AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS, |
4341 | MachineOperand &RHS, |
4342 | MachineIRBuilder &MIRBuilder) const { |
4343 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4344 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4345 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4346 | static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr}; |
4347 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4348 | } |
4349 | |
4350 | MachineInstr * |
4351 | AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, |
4352 | MachineOperand &RHS, |
4353 | MachineIRBuilder &MIRBuilder) const { |
4354 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4355 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4356 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4357 | static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr}; |
4358 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4359 | } |
4360 | |
4361 | MachineInstr * |
4362 | AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
4363 | MachineIRBuilder &MIRBuilder) const { |
4364 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4365 | bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4366 | auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; |
4367 | return emitADDS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder); |
4368 | } |
4369 | |
4370 | MachineInstr * |
4371 | AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, |
4372 | MachineIRBuilder &MIRBuilder) const { |
4373 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4374 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4375 | LLT Ty = MRI.getType(Reg: LHS.getReg()); |
4376 | unsigned RegSize = Ty.getSizeInBits(); |
4377 | bool Is32Bit = (RegSize == 32); |
4378 | const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, |
4379 | {AArch64::ANDSXrs, AArch64::ANDSWrs}, |
4380 | {AArch64::ANDSXrr, AArch64::ANDSWrr}}; |
4381 | // ANDS needs a logical immediate for its immediate form. Check if we can |
4382 | // fold one in. |
4383 | if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) { |
4384 | int64_t Imm = ValAndVReg->Value.getSExtValue(); |
4385 | |
4386 | if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) { |
4387 | auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[0][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}); |
4388 | TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize)); |
4389 | constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI); |
4390 | return &*TstMI; |
4391 | } |
4392 | } |
4393 | |
4394 | if (auto Fns = selectLogicalShiftedRegister(Root&: RHS)) |
4395 | return emitInstr(Opcode: OpcTable[1][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns); |
4396 | return emitInstr(Opcode: OpcTable[2][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder); |
4397 | } |
4398 | |
4399 | MachineInstr *AArch64InstructionSelector::emitIntegerCompare( |
4400 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
4401 | MachineIRBuilder &MIRBuilder) const { |
4402 | assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!" ); |
4403 | assert(Predicate.isPredicate() && "Expected predicate?" ); |
4404 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4405 | LLT CmpTy = MRI.getType(Reg: LHS.getReg()); |
4406 | assert(!CmpTy.isVector() && "Expected scalar or pointer" ); |
4407 | unsigned Size = CmpTy.getSizeInBits(); |
4408 | (void)Size; |
4409 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?" ); |
4410 | // Fold the compare into a cmn or tst if possible. |
4411 | if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) |
4412 | return FoldCmp; |
4413 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg()); |
4414 | return emitSUBS(Dst, LHS, RHS, MIRBuilder); |
4415 | } |
4416 | |
4417 | MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( |
4418 | Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { |
4419 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4420 | #ifndef NDEBUG |
4421 | LLT Ty = MRI.getType(Dst); |
4422 | assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && |
4423 | "Expected a 32-bit scalar register?" ); |
4424 | #endif |
4425 | const Register ZReg = AArch64::WZR; |
4426 | AArch64CC::CondCode CC1, CC2; |
4427 | changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2); |
4428 | auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1); |
4429 | if (CC2 == AArch64CC::AL) |
4430 | return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, |
4431 | MIRBuilder); |
4432 | const TargetRegisterClass *RC = &AArch64::GPR32RegClass; |
4433 | Register Def1Reg = MRI.createVirtualRegister(RegClass: RC); |
4434 | Register Def2Reg = MRI.createVirtualRegister(RegClass: RC); |
4435 | auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2); |
4436 | emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, MIRBuilder); |
4437 | emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC2, MIRBuilder); |
4438 | auto OrMI = MIRBuilder.buildInstr(Opc: AArch64::ORRWrr, DstOps: {Dst}, SrcOps: {Def1Reg, Def2Reg}); |
4439 | constrainSelectedInstRegOperands(I&: *OrMI, TII, TRI, RBI); |
4440 | return &*OrMI; |
4441 | } |
4442 | |
4443 | MachineInstr *AArch64InstructionSelector::emitFPCompare( |
4444 | Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
4445 | std::optional<CmpInst::Predicate> Pred) const { |
4446 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4447 | LLT Ty = MRI.getType(Reg: LHS); |
4448 | if (Ty.isVector()) |
4449 | return nullptr; |
4450 | unsigned OpSize = Ty.getSizeInBits(); |
4451 | assert(OpSize == 16 || OpSize == 32 || OpSize == 64); |
4452 | |
4453 | // If this is a compare against +0.0, then we don't have |
4454 | // to explicitly materialize a constant. |
4455 | const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI); |
4456 | bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); |
4457 | |
4458 | auto IsEqualityPred = [](CmpInst::Predicate P) { |
4459 | return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || |
4460 | P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; |
4461 | }; |
4462 | if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { |
4463 | // Try commutating the operands. |
4464 | const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI); |
4465 | if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { |
4466 | ShouldUseImm = true; |
4467 | std::swap(a&: LHS, b&: RHS); |
4468 | } |
4469 | } |
4470 | unsigned CmpOpcTbl[2][3] = { |
4471 | {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr}, |
4472 | {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}}; |
4473 | unsigned CmpOpc = |
4474 | CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)]; |
4475 | |
4476 | // Partially build the compare. Decide if we need to add a use for the |
4477 | // third operand based off whether or not we're comparing against 0.0. |
4478 | auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS); |
4479 | CmpMI.setMIFlags(MachineInstr::NoFPExcept); |
4480 | if (!ShouldUseImm) |
4481 | CmpMI.addUse(RegNo: RHS); |
4482 | constrainSelectedInstRegOperands(I&: *CmpMI, TII, TRI, RBI); |
4483 | return &*CmpMI; |
4484 | } |
4485 | |
4486 | MachineInstr *AArch64InstructionSelector::emitVectorConcat( |
4487 | std::optional<Register> Dst, Register Op1, Register Op2, |
4488 | MachineIRBuilder &MIRBuilder) const { |
4489 | // We implement a vector concat by: |
4490 | // 1. Use scalar_to_vector to insert the lower vector into the larger dest |
4491 | // 2. Insert the upper vector into the destination's upper element |
4492 | // TODO: some of this code is common with G_BUILD_VECTOR handling. |
4493 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4494 | |
4495 | const LLT Op1Ty = MRI.getType(Reg: Op1); |
4496 | const LLT Op2Ty = MRI.getType(Reg: Op2); |
4497 | |
4498 | if (Op1Ty != Op2Ty) { |
4499 | LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys" ); |
4500 | return nullptr; |
4501 | } |
4502 | assert(Op1Ty.isVector() && "Expected a vector for vector concat" ); |
4503 | |
4504 | if (Op1Ty.getSizeInBits() >= 128) { |
4505 | LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors" ); |
4506 | return nullptr; |
4507 | } |
4508 | |
4509 | // At the moment we just support 64 bit vector concats. |
4510 | if (Op1Ty.getSizeInBits() != 64) { |
4511 | LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors" ); |
4512 | return nullptr; |
4513 | } |
4514 | |
4515 | const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits()); |
4516 | const RegisterBank &FPRBank = *RBI.getRegBank(Reg: Op1, MRI, TRI); |
4517 | const TargetRegisterClass *DstRC = |
4518 | getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: 2), RB: FPRBank); |
4519 | |
4520 | MachineInstr *WidenedOp1 = |
4521 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder); |
4522 | MachineInstr *WidenedOp2 = |
4523 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder); |
4524 | if (!WidenedOp1 || !WidenedOp2) { |
4525 | LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value" ); |
4526 | return nullptr; |
4527 | } |
4528 | |
4529 | // Now do the insert of the upper element. |
4530 | unsigned InsertOpc, InsSubRegIdx; |
4531 | std::tie(args&: InsertOpc, args&: InsSubRegIdx) = |
4532 | getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits()); |
4533 | |
4534 | if (!Dst) |
4535 | Dst = MRI.createVirtualRegister(RegClass: DstRC); |
4536 | auto InsElt = |
4537 | MIRBuilder |
4538 | .buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: 0).getReg()}) |
4539 | .addImm(Val: 1) /* Lane index */ |
4540 | .addUse(RegNo: WidenedOp2->getOperand(i: 0).getReg()) |
4541 | .addImm(Val: 0); |
4542 | constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI); |
4543 | return &*InsElt; |
4544 | } |
4545 | |
4546 | MachineInstr * |
4547 | AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, |
4548 | Register Src2, AArch64CC::CondCode Pred, |
4549 | MachineIRBuilder &MIRBuilder) const { |
4550 | auto &MRI = *MIRBuilder.getMRI(); |
4551 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst); |
4552 | // If we used a register class, then this won't necessarily have an LLT. |
4553 | // Compute the size based off whether or not we have a class or bank. |
4554 | unsigned Size; |
4555 | if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) |
4556 | Size = TRI.getRegSizeInBits(RC: *RC); |
4557 | else |
4558 | Size = MRI.getType(Reg: Dst).getSizeInBits(); |
4559 | // Some opcodes use s1. |
4560 | assert(Size <= 64 && "Expected 64 bits or less only!" ); |
4561 | static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; |
4562 | unsigned Opc = OpcTable[Size == 64]; |
4563 | auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred); |
4564 | constrainSelectedInstRegOperands(I&: *CSINC, TII, TRI, RBI); |
4565 | return &*CSINC; |
4566 | } |
4567 | |
4568 | MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I, |
4569 | Register CarryReg) { |
4570 | MachineRegisterInfo *MRI = MIB.getMRI(); |
4571 | unsigned Opcode = I.getOpcode(); |
4572 | |
4573 | // If the instruction is a SUB, we need to negate the carry, |
4574 | // because borrowing is indicated by carry-flag == 0. |
4575 | bool NeedsNegatedCarry = |
4576 | (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE); |
4577 | |
4578 | // If the previous instruction will already produce the correct carry, do not |
4579 | // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences |
4580 | // generated during legalization of wide add/sub. This optimization depends on |
4581 | // these sequences not being interrupted by other instructions. |
4582 | // We have to select the previous instruction before the carry-using |
4583 | // instruction is deleted by the calling function, otherwise the previous |
4584 | // instruction might become dead and would get deleted. |
4585 | MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg); |
4586 | if (SrcMI == I.getPrevNode()) { |
4587 | if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) { |
4588 | bool ProducesNegatedCarry = CarrySrcMI->isSub(); |
4589 | if (NeedsNegatedCarry == ProducesNegatedCarry && |
4590 | CarrySrcMI->isUnsigned() && |
4591 | CarrySrcMI->getCarryOutReg() == CarryReg && |
4592 | selectAndRestoreState(I&: *SrcMI)) |
4593 | return nullptr; |
4594 | } |
4595 | } |
4596 | |
4597 | Register DeadReg = MRI->createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
4598 | |
4599 | if (NeedsNegatedCarry) { |
4600 | // (0 - Carry) sets !C in NZCV when Carry == 1 |
4601 | Register ZReg = AArch64::WZR; |
4602 | return emitInstr(Opcode: AArch64::SUBSWrr, DstOps: {DeadReg}, SrcOps: {ZReg, CarryReg}, MIRBuilder&: MIB); |
4603 | } |
4604 | |
4605 | // (Carry - 1) sets !C in NZCV when Carry == 0 |
4606 | auto Fns = select12BitValueWithLeftShift(Immed: 1); |
4607 | return emitInstr(Opcode: AArch64::SUBSWri, DstOps: {DeadReg}, SrcOps: {CarryReg}, MIRBuilder&: MIB, RenderFns: Fns); |
4608 | } |
4609 | |
4610 | bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I, |
4611 | MachineRegisterInfo &MRI) { |
4612 | auto &CarryMI = cast<GAddSubCarryOut>(Val&: I); |
4613 | |
4614 | if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) { |
4615 | // Set NZCV carry according to carry-in VReg |
4616 | emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg()); |
4617 | } |
4618 | |
4619 | // Emit the operation and get the correct condition code. |
4620 | auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(), |
4621 | LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB); |
4622 | |
4623 | Register CarryOutReg = CarryMI.getCarryOutReg(); |
4624 | |
4625 | // Don't convert carry-out to VReg if it is never used |
4626 | if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) { |
4627 | // Now, put the overflow result in the register given by the first operand |
4628 | // to the overflow op. CSINC increments the result when the predicate is |
4629 | // false, so to get the increment when it's true, we need to use the |
4630 | // inverse. In this case, we want to increment when carry is set. |
4631 | Register ZReg = AArch64::WZR; |
4632 | emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg, |
4633 | Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB); |
4634 | } |
4635 | |
4636 | I.eraseFromParent(); |
4637 | return true; |
4638 | } |
4639 | |
4640 | std::pair<MachineInstr *, AArch64CC::CondCode> |
4641 | AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, |
4642 | MachineOperand &LHS, |
4643 | MachineOperand &RHS, |
4644 | MachineIRBuilder &MIRBuilder) const { |
4645 | switch (Opcode) { |
4646 | default: |
4647 | llvm_unreachable("Unexpected opcode!" ); |
4648 | case TargetOpcode::G_SADDO: |
4649 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4650 | case TargetOpcode::G_UADDO: |
4651 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4652 | case TargetOpcode::G_SSUBO: |
4653 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4654 | case TargetOpcode::G_USUBO: |
4655 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4656 | case TargetOpcode::G_SADDE: |
4657 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4658 | case TargetOpcode::G_UADDE: |
4659 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4660 | case TargetOpcode::G_SSUBE: |
4661 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4662 | case TargetOpcode::G_USUBE: |
4663 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4664 | } |
4665 | } |
4666 | |
4667 | /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be |
4668 | /// expressed as a conjunction. |
4669 | /// \param CanNegate Set to true if we can negate the whole sub-tree just by |
4670 | /// changing the conditions on the CMP tests. |
4671 | /// (this means we can call emitConjunctionRec() with |
4672 | /// Negate==true on this sub-tree) |
4673 | /// \param MustBeFirst Set to true if this subtree needs to be negated and we |
4674 | /// cannot do the negation naturally. We are required to |
4675 | /// emit the subtree first in this case. |
4676 | /// \param WillNegate Is true if are called when the result of this |
4677 | /// subexpression must be negated. This happens when the |
4678 | /// outer expression is an OR. We can use this fact to know |
4679 | /// that we have a double negation (or (or ...) ...) that |
4680 | /// can be implemented for free. |
4681 | static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, |
4682 | bool WillNegate, MachineRegisterInfo &MRI, |
4683 | unsigned Depth = 0) { |
4684 | if (!MRI.hasOneNonDBGUse(RegNo: Val)) |
4685 | return false; |
4686 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4687 | unsigned Opcode = ValDef->getOpcode(); |
4688 | if (isa<GAnyCmp>(Val: ValDef)) { |
4689 | CanNegate = true; |
4690 | MustBeFirst = false; |
4691 | return true; |
4692 | } |
4693 | // Protect against exponential runtime and stack overflow. |
4694 | if (Depth > 6) |
4695 | return false; |
4696 | if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { |
4697 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4698 | Register O0 = ValDef->getOperand(i: 1).getReg(); |
4699 | Register O1 = ValDef->getOperand(i: 2).getReg(); |
4700 | bool CanNegateL; |
4701 | bool MustBeFirstL; |
4702 | if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4703 | return false; |
4704 | bool CanNegateR; |
4705 | bool MustBeFirstR; |
4706 | if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4707 | return false; |
4708 | |
4709 | if (MustBeFirstL && MustBeFirstR) |
4710 | return false; |
4711 | |
4712 | if (IsOR) { |
4713 | // For an OR expression we need to be able to naturally negate at least |
4714 | // one side or we cannot do the transformation at all. |
4715 | if (!CanNegateL && !CanNegateR) |
4716 | return false; |
4717 | // If we the result of the OR will be negated and we can naturally negate |
4718 | // the leaves, then this sub-tree as a whole negates naturally. |
4719 | CanNegate = WillNegate && CanNegateL && CanNegateR; |
4720 | // If we cannot naturally negate the whole sub-tree, then this must be |
4721 | // emitted first. |
4722 | MustBeFirst = !CanNegate; |
4723 | } else { |
4724 | assert(Opcode == TargetOpcode::G_AND && "Must be G_AND" ); |
4725 | // We cannot naturally negate an AND operation. |
4726 | CanNegate = false; |
4727 | MustBeFirst = MustBeFirstL || MustBeFirstR; |
4728 | } |
4729 | return true; |
4730 | } |
4731 | return false; |
4732 | } |
4733 | |
4734 | MachineInstr *AArch64InstructionSelector::emitConditionalComparison( |
4735 | Register LHS, Register RHS, CmpInst::Predicate CC, |
4736 | AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, |
4737 | MachineIRBuilder &MIB) const { |
4738 | auto &MRI = *MIB.getMRI(); |
4739 | LLT OpTy = MRI.getType(Reg: LHS); |
4740 | unsigned CCmpOpc; |
4741 | std::optional<ValueAndVReg> C; |
4742 | if (CmpInst::isIntPredicate(P: CC)) { |
4743 | assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64); |
4744 | C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
4745 | if (!C || C->Value.sgt(RHS: 31) || C->Value.slt(RHS: -31)) |
4746 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; |
4747 | else if (C->Value.ule(RHS: 31)) |
4748 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi; |
4749 | else |
4750 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi; |
4751 | } else { |
4752 | assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 || |
4753 | OpTy.getSizeInBits() == 64); |
4754 | switch (OpTy.getSizeInBits()) { |
4755 | case 16: |
4756 | assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons" ); |
4757 | CCmpOpc = AArch64::FCCMPHrr; |
4758 | break; |
4759 | case 32: |
4760 | CCmpOpc = AArch64::FCCMPSrr; |
4761 | break; |
4762 | case 64: |
4763 | CCmpOpc = AArch64::FCCMPDrr; |
4764 | break; |
4765 | default: |
4766 | return nullptr; |
4767 | } |
4768 | } |
4769 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
4770 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC); |
4771 | auto CCmp = |
4772 | MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS}); |
4773 | if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi) |
4774 | CCmp.addImm(Val: C->Value.getZExtValue()); |
4775 | else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi) |
4776 | CCmp.addImm(Val: C->Value.abs().getZExtValue()); |
4777 | else |
4778 | CCmp.addReg(RegNo: RHS); |
4779 | CCmp.addImm(Val: NZCV).addImm(Val: Predicate); |
4780 | constrainSelectedInstRegOperands(I&: *CCmp, TII, TRI, RBI); |
4781 | return &*CCmp; |
4782 | } |
4783 | |
4784 | MachineInstr *AArch64InstructionSelector::emitConjunctionRec( |
4785 | Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, |
4786 | AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { |
4787 | // We're at a tree leaf, produce a conditional comparison operation. |
4788 | auto &MRI = *MIB.getMRI(); |
4789 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4790 | unsigned Opcode = ValDef->getOpcode(); |
4791 | if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) { |
4792 | Register LHS = Cmp->getLHSReg(); |
4793 | Register RHS = Cmp->getRHSReg(); |
4794 | CmpInst::Predicate CC = Cmp->getCond(); |
4795 | if (Negate) |
4796 | CC = CmpInst::getInversePredicate(pred: CC); |
4797 | if (isa<GICmp>(Val: Cmp)) { |
4798 | OutCC = changeICMPPredToAArch64CC(P: CC); |
4799 | } else { |
4800 | // Handle special FP cases. |
4801 | AArch64CC::CondCode ; |
4802 | changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC); |
4803 | // Some floating point conditions can't be tested with a single condition |
4804 | // code. Construct an additional comparison in this case. |
4805 | if (ExtraCC != AArch64CC::AL) { |
4806 | MachineInstr *; |
4807 | if (!CCOp) |
4808 | ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC); |
4809 | else |
4810 | ExtraCmp = |
4811 | emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB); |
4812 | CCOp = ExtraCmp->getOperand(i: 0).getReg(); |
4813 | Predicate = ExtraCC; |
4814 | } |
4815 | } |
4816 | |
4817 | // Produce a normal comparison if we are first in the chain |
4818 | if (!CCOp) { |
4819 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS); |
4820 | if (isa<GICmp>(Val: Cmp)) |
4821 | return emitSUBS(Dst, LHS&: Cmp->getOperand(i: 2), RHS&: Cmp->getOperand(i: 3), MIRBuilder&: MIB); |
4822 | return emitFPCompare(LHS: Cmp->getOperand(i: 2).getReg(), |
4823 | RHS: Cmp->getOperand(i: 3).getReg(), MIRBuilder&: MIB); |
4824 | } |
4825 | // Otherwise produce a ccmp. |
4826 | return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); |
4827 | } |
4828 | assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree" ); |
4829 | |
4830 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4831 | |
4832 | Register LHS = ValDef->getOperand(i: 1).getReg(); |
4833 | bool CanNegateL; |
4834 | bool MustBeFirstL; |
4835 | bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI); |
4836 | assert(ValidL && "Valid conjunction/disjunction tree" ); |
4837 | (void)ValidL; |
4838 | |
4839 | Register RHS = ValDef->getOperand(i: 2).getReg(); |
4840 | bool CanNegateR; |
4841 | bool MustBeFirstR; |
4842 | bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI); |
4843 | assert(ValidR && "Valid conjunction/disjunction tree" ); |
4844 | (void)ValidR; |
4845 | |
4846 | // Swap sub-tree that must come first to the right side. |
4847 | if (MustBeFirstL) { |
4848 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree" ); |
4849 | std::swap(a&: LHS, b&: RHS); |
4850 | std::swap(a&: CanNegateL, b&: CanNegateR); |
4851 | std::swap(a&: MustBeFirstL, b&: MustBeFirstR); |
4852 | } |
4853 | |
4854 | bool NegateR; |
4855 | bool NegateAfterR; |
4856 | bool NegateL; |
4857 | bool NegateAfterAll; |
4858 | if (Opcode == TargetOpcode::G_OR) { |
4859 | // Swap the sub-tree that we can negate naturally to the left. |
4860 | if (!CanNegateL) { |
4861 | assert(CanNegateR && "at least one side must be negatable" ); |
4862 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree" ); |
4863 | assert(!Negate); |
4864 | std::swap(a&: LHS, b&: RHS); |
4865 | NegateR = false; |
4866 | NegateAfterR = true; |
4867 | } else { |
4868 | // Negate the left sub-tree if possible, otherwise negate the result. |
4869 | NegateR = CanNegateR; |
4870 | NegateAfterR = !CanNegateR; |
4871 | } |
4872 | NegateL = true; |
4873 | NegateAfterAll = !Negate; |
4874 | } else { |
4875 | assert(Opcode == TargetOpcode::G_AND && |
4876 | "Valid conjunction/disjunction tree" ); |
4877 | assert(!Negate && "Valid conjunction/disjunction tree" ); |
4878 | |
4879 | NegateL = false; |
4880 | NegateR = false; |
4881 | NegateAfterR = false; |
4882 | NegateAfterAll = false; |
4883 | } |
4884 | |
4885 | // Emit sub-trees. |
4886 | AArch64CC::CondCode RHSCC; |
4887 | MachineInstr *CmpR = |
4888 | emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB); |
4889 | if (NegateAfterR) |
4890 | RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC); |
4891 | MachineInstr *CmpL = emitConjunctionRec( |
4892 | Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: 0).getReg(), Predicate: RHSCC, MIB); |
4893 | if (NegateAfterAll) |
4894 | OutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
4895 | return CmpL; |
4896 | } |
4897 | |
4898 | MachineInstr *AArch64InstructionSelector::emitConjunction( |
4899 | Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { |
4900 | bool DummyCanNegate; |
4901 | bool DummyMustBeFirst; |
4902 | if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false, |
4903 | MRI&: *MIB.getMRI())) |
4904 | return nullptr; |
4905 | return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register(), Predicate: AArch64CC::AL, MIB); |
4906 | } |
4907 | |
4908 | bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, |
4909 | MachineInstr &CondMI) { |
4910 | AArch64CC::CondCode AArch64CC; |
4911 | MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB); |
4912 | if (!ConjMI) |
4913 | return false; |
4914 | |
4915 | emitSelect(Dst: SelI.getReg(Idx: 0), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB); |
4916 | SelI.eraseFromParent(); |
4917 | return true; |
4918 | } |
4919 | |
4920 | bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { |
4921 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
4922 | // We want to recognize this pattern: |
4923 | // |
4924 | // $z = G_FCMP pred, $x, $y |
4925 | // ... |
4926 | // $w = G_SELECT $z, $a, $b |
4927 | // |
4928 | // Where the value of $z is *only* ever used by the G_SELECT (possibly with |
4929 | // some copies/truncs in between.) |
4930 | // |
4931 | // If we see this, then we can emit something like this: |
4932 | // |
4933 | // fcmp $x, $y |
4934 | // fcsel $w, $a, $b, pred |
4935 | // |
4936 | // Rather than emitting both of the rather long sequences in the standard |
4937 | // G_FCMP/G_SELECT select methods. |
4938 | |
4939 | // First, check if the condition is defined by a compare. |
4940 | MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
4941 | |
4942 | // We can only fold if all of the defs have one use. |
4943 | Register CondDefReg = CondDef->getOperand(i: 0).getReg(); |
4944 | if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) { |
4945 | // Unless it's another select. |
4946 | for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) { |
4947 | if (CondDef == &UI) |
4948 | continue; |
4949 | if (UI.getOpcode() != TargetOpcode::G_SELECT) |
4950 | return false; |
4951 | } |
4952 | } |
4953 | |
4954 | // Is the condition defined by a compare? |
4955 | unsigned CondOpc = CondDef->getOpcode(); |
4956 | if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { |
4957 | if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef)) |
4958 | return true; |
4959 | return false; |
4960 | } |
4961 | |
4962 | AArch64CC::CondCode CondCode; |
4963 | if (CondOpc == TargetOpcode::G_ICMP) { |
4964 | auto Pred = |
4965 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
4966 | CondCode = changeICMPPredToAArch64CC(P: Pred); |
4967 | emitIntegerCompare(LHS&: CondDef->getOperand(i: 2), RHS&: CondDef->getOperand(i: 3), |
4968 | Predicate&: CondDef->getOperand(i: 1), MIRBuilder&: MIB); |
4969 | } else { |
4970 | // Get the condition code for the select. |
4971 | auto Pred = |
4972 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
4973 | AArch64CC::CondCode CondCode2; |
4974 | changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2); |
4975 | |
4976 | // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two |
4977 | // instructions to emit the comparison. |
4978 | // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be |
4979 | // unnecessary. |
4980 | if (CondCode2 != AArch64CC::AL) |
4981 | return false; |
4982 | |
4983 | if (!emitFPCompare(LHS: CondDef->getOperand(i: 2).getReg(), |
4984 | RHS: CondDef->getOperand(i: 3).getReg(), MIRBuilder&: MIB)) { |
4985 | LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n" ); |
4986 | return false; |
4987 | } |
4988 | } |
4989 | |
4990 | // Emit the select. |
4991 | emitSelect(Dst: I.getOperand(i: 0).getReg(), True: I.getOperand(i: 2).getReg(), |
4992 | False: I.getOperand(i: 3).getReg(), CC: CondCode, MIB); |
4993 | I.eraseFromParent(); |
4994 | return true; |
4995 | } |
4996 | |
4997 | MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( |
4998 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
4999 | MachineIRBuilder &MIRBuilder) const { |
5000 | assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && |
5001 | "Unexpected MachineOperand" ); |
5002 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5003 | // We want to find this sort of thing: |
5004 | // x = G_SUB 0, y |
5005 | // G_ICMP z, x |
5006 | // |
5007 | // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. |
5008 | // e.g: |
5009 | // |
5010 | // cmn z, y |
5011 | |
5012 | // Check if the RHS or LHS of the G_ICMP is defined by a SUB |
5013 | MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
5014 | MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
5015 | auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); |
5016 | // Given this: |
5017 | // |
5018 | // x = G_SUB 0, y |
5019 | // G_ICMP x, z |
5020 | // |
5021 | // Produce this: |
5022 | // |
5023 | // cmn y, z |
5024 | if (isCMN(MaybeSub: LHSDef, Pred: P, MRI)) |
5025 | return emitCMN(LHS&: LHSDef->getOperand(i: 2), RHS, MIRBuilder); |
5026 | |
5027 | // Same idea here, but with the RHS of the compare instead: |
5028 | // |
5029 | // Given this: |
5030 | // |
5031 | // x = G_SUB 0, y |
5032 | // G_ICMP z, x |
5033 | // |
5034 | // Produce this: |
5035 | // |
5036 | // cmn z, y |
5037 | if (isCMN(MaybeSub: RHSDef, Pred: P, MRI)) |
5038 | return emitCMN(LHS, RHS&: RHSDef->getOperand(i: 2), MIRBuilder); |
5039 | |
5040 | // Given this: |
5041 | // |
5042 | // z = G_AND x, y |
5043 | // G_ICMP z, 0 |
5044 | // |
5045 | // Produce this if the compare is signed: |
5046 | // |
5047 | // tst x, y |
5048 | if (!CmpInst::isUnsigned(predicate: P) && LHSDef && |
5049 | LHSDef->getOpcode() == TargetOpcode::G_AND) { |
5050 | // Make sure that the RHS is 0. |
5051 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI); |
5052 | if (!ValAndVReg || ValAndVReg->Value != 0) |
5053 | return nullptr; |
5054 | |
5055 | return emitTST(LHS&: LHSDef->getOperand(i: 1), |
5056 | RHS&: LHSDef->getOperand(i: 2), MIRBuilder); |
5057 | } |
5058 | |
5059 | return nullptr; |
5060 | } |
5061 | |
5062 | bool AArch64InstructionSelector::selectShuffleVector( |
5063 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5064 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5065 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
5066 | const LLT Src1Ty = MRI.getType(Reg: Src1Reg); |
5067 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
5068 | const LLT Src2Ty = MRI.getType(Reg: Src2Reg); |
5069 | ArrayRef<int> Mask = I.getOperand(i: 3).getShuffleMask(); |
5070 | |
5071 | MachineBasicBlock &MBB = *I.getParent(); |
5072 | MachineFunction &MF = *MBB.getParent(); |
5073 | LLVMContext &Ctx = MF.getFunction().getContext(); |
5074 | |
5075 | // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if |
5076 | // it's originated from a <1 x T> type. Those should have been lowered into |
5077 | // G_BUILD_VECTOR earlier. |
5078 | if (!Src1Ty.isVector() || !Src2Ty.isVector()) { |
5079 | LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n" ); |
5080 | return false; |
5081 | } |
5082 | |
5083 | unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; |
5084 | |
5085 | SmallVector<Constant *, 64> CstIdxs; |
5086 | for (int Val : Mask) { |
5087 | // For now, any undef indexes we'll just assume to be 0. This should be |
5088 | // optimized in future, e.g. to select DUP etc. |
5089 | Val = Val < 0 ? 0 : Val; |
5090 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { |
5091 | unsigned Offset = Byte + Val * BytesPerElt; |
5092 | CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset)); |
5093 | } |
5094 | } |
5095 | |
5096 | // Use a constant pool to load the index vector for TBL. |
5097 | Constant *CPVal = ConstantVector::get(V: CstIdxs); |
5098 | MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB); |
5099 | if (!IndexLoad) { |
5100 | LLVM_DEBUG(dbgs() << "Could not load from a constant pool" ); |
5101 | return false; |
5102 | } |
5103 | |
5104 | if (DstTy.getSizeInBits() != 128) { |
5105 | assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty" ); |
5106 | // This case can be done with TBL1. |
5107 | MachineInstr *Concat = |
5108 | emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB); |
5109 | if (!Concat) { |
5110 | LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1" ); |
5111 | return false; |
5112 | } |
5113 | |
5114 | // The constant pool load will be 64 bits, so need to convert to FPR128 reg. |
5115 | IndexLoad = emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, |
5116 | Scalar: IndexLoad->getOperand(i: 0).getReg(), MIRBuilder&: MIB); |
5117 | |
5118 | auto TBL1 = MIB.buildInstr( |
5119 | Opc: AArch64::TBLv16i8One, DstOps: {&AArch64::FPR128RegClass}, |
5120 | SrcOps: {Concat->getOperand(i: 0).getReg(), IndexLoad->getOperand(i: 0).getReg()}); |
5121 | constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI); |
5122 | |
5123 | auto Copy = |
5124 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {}) |
5125 | .addReg(RegNo: TBL1.getReg(Idx: 0), flags: 0, SubReg: AArch64::dsub); |
5126 | RBI.constrainGenericRegister(Reg: Copy.getReg(Idx: 0), RC: AArch64::FPR64RegClass, MRI); |
5127 | I.eraseFromParent(); |
5128 | return true; |
5129 | } |
5130 | |
5131 | // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive |
5132 | // Q registers for regalloc. |
5133 | SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; |
5134 | auto RegSeq = createQTuple(Regs, MIB); |
5135 | auto TBL2 = MIB.buildInstr(Opc: AArch64::TBLv16i8Two, DstOps: {I.getOperand(i: 0)}, |
5136 | SrcOps: {RegSeq, IndexLoad->getOperand(i: 0)}); |
5137 | constrainSelectedInstRegOperands(I&: *TBL2, TII, TRI, RBI); |
5138 | I.eraseFromParent(); |
5139 | return true; |
5140 | } |
5141 | |
5142 | MachineInstr *AArch64InstructionSelector::emitLaneInsert( |
5143 | std::optional<Register> DstReg, Register SrcReg, Register EltReg, |
5144 | unsigned LaneIdx, const RegisterBank &RB, |
5145 | MachineIRBuilder &MIRBuilder) const { |
5146 | MachineInstr *InsElt = nullptr; |
5147 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5148 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5149 | |
5150 | // Create a register to define with the insert if one wasn't passed in. |
5151 | if (!DstReg) |
5152 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
5153 | |
5154 | unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits(); |
5155 | unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; |
5156 | |
5157 | if (RB.getID() == AArch64::FPRRegBankID) { |
5158 | auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder); |
5159 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5160 | .addImm(Val: LaneIdx) |
5161 | .addUse(RegNo: InsSub->getOperand(i: 0).getReg()) |
5162 | .addImm(Val: 0); |
5163 | } else { |
5164 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5165 | .addImm(Val: LaneIdx) |
5166 | .addUse(RegNo: EltReg); |
5167 | } |
5168 | |
5169 | constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI); |
5170 | return InsElt; |
5171 | } |
5172 | |
5173 | bool AArch64InstructionSelector::selectUSMovFromExtend( |
5174 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5175 | if (MI.getOpcode() != TargetOpcode::G_SEXT && |
5176 | MI.getOpcode() != TargetOpcode::G_ZEXT && |
5177 | MI.getOpcode() != TargetOpcode::G_ANYEXT) |
5178 | return false; |
5179 | bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; |
5180 | const Register DefReg = MI.getOperand(i: 0).getReg(); |
5181 | const LLT DstTy = MRI.getType(Reg: DefReg); |
5182 | unsigned DstSize = DstTy.getSizeInBits(); |
5183 | |
5184 | if (DstSize != 32 && DstSize != 64) |
5185 | return false; |
5186 | |
5187 | MachineInstr * = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT, |
5188 | Reg: MI.getOperand(i: 1).getReg(), MRI); |
5189 | int64_t Lane; |
5190 | if (!Extract || !mi_match(R: Extract->getOperand(i: 2).getReg(), MRI, P: m_ICst(Cst&: Lane))) |
5191 | return false; |
5192 | Register Src0 = Extract->getOperand(i: 1).getReg(); |
5193 | |
5194 | const LLT &VecTy = MRI.getType(Reg: Src0); |
5195 | |
5196 | if (VecTy.getSizeInBits() != 128) { |
5197 | const MachineInstr *ScalarToVector = emitScalarToVector( |
5198 | EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: Src0, MIRBuilder&: MIB); |
5199 | assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!" ); |
5200 | Src0 = ScalarToVector->getOperand(i: 0).getReg(); |
5201 | } |
5202 | |
5203 | unsigned Opcode; |
5204 | if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) |
5205 | Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; |
5206 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) |
5207 | Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; |
5208 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) |
5209 | Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; |
5210 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) |
5211 | Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; |
5212 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) |
5213 | Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; |
5214 | else |
5215 | llvm_unreachable("Unexpected type combo for S/UMov!" ); |
5216 | |
5217 | // We may need to generate one of these, depending on the type and sign of the |
5218 | // input: |
5219 | // DstReg = SMOV Src0, Lane; |
5220 | // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; |
5221 | MachineInstr *ExtI = nullptr; |
5222 | if (DstSize == 64 && !IsSigned) { |
5223 | Register NewReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
5224 | MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5225 | ExtI = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {}) |
5226 | .addImm(Val: 0) |
5227 | .addUse(RegNo: NewReg) |
5228 | .addImm(Val: AArch64::sub_32); |
5229 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
5230 | } else |
5231 | ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5232 | |
5233 | constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI); |
5234 | MI.eraseFromParent(); |
5235 | return true; |
5236 | } |
5237 | |
5238 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8( |
5239 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5240 | unsigned int Op; |
5241 | if (DstSize == 128) { |
5242 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5243 | return nullptr; |
5244 | Op = AArch64::MOVIv16b_ns; |
5245 | } else { |
5246 | Op = AArch64::MOVIv8b_ns; |
5247 | } |
5248 | |
5249 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5250 | |
5251 | if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) { |
5252 | Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val); |
5253 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5254 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5255 | return &*Mov; |
5256 | } |
5257 | return nullptr; |
5258 | } |
5259 | |
5260 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16( |
5261 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5262 | bool Inv) { |
5263 | |
5264 | unsigned int Op; |
5265 | if (DstSize == 128) { |
5266 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5267 | return nullptr; |
5268 | Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16; |
5269 | } else { |
5270 | Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16; |
5271 | } |
5272 | |
5273 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5274 | uint64_t Shift; |
5275 | |
5276 | if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) { |
5277 | Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val); |
5278 | Shift = 0; |
5279 | } else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) { |
5280 | Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val); |
5281 | Shift = 8; |
5282 | } else |
5283 | return nullptr; |
5284 | |
5285 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5286 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5287 | return &*Mov; |
5288 | } |
5289 | |
5290 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32( |
5291 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5292 | bool Inv) { |
5293 | |
5294 | unsigned int Op; |
5295 | if (DstSize == 128) { |
5296 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5297 | return nullptr; |
5298 | Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32; |
5299 | } else { |
5300 | Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32; |
5301 | } |
5302 | |
5303 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5304 | uint64_t Shift; |
5305 | |
5306 | if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) { |
5307 | Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val); |
5308 | Shift = 0; |
5309 | } else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) { |
5310 | Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val); |
5311 | Shift = 8; |
5312 | } else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) { |
5313 | Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val); |
5314 | Shift = 16; |
5315 | } else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) { |
5316 | Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val); |
5317 | Shift = 24; |
5318 | } else |
5319 | return nullptr; |
5320 | |
5321 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5322 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5323 | return &*Mov; |
5324 | } |
5325 | |
5326 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64( |
5327 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5328 | |
5329 | unsigned int Op; |
5330 | if (DstSize == 128) { |
5331 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5332 | return nullptr; |
5333 | Op = AArch64::MOVIv2d_ns; |
5334 | } else { |
5335 | Op = AArch64::MOVID; |
5336 | } |
5337 | |
5338 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5339 | if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) { |
5340 | Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val); |
5341 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5342 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5343 | return &*Mov; |
5344 | } |
5345 | return nullptr; |
5346 | } |
5347 | |
5348 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s( |
5349 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5350 | bool Inv) { |
5351 | |
5352 | unsigned int Op; |
5353 | if (DstSize == 128) { |
5354 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5355 | return nullptr; |
5356 | Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl; |
5357 | } else { |
5358 | Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl; |
5359 | } |
5360 | |
5361 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5362 | uint64_t Shift; |
5363 | |
5364 | if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) { |
5365 | Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val); |
5366 | Shift = 264; |
5367 | } else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) { |
5368 | Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val); |
5369 | Shift = 272; |
5370 | } else |
5371 | return nullptr; |
5372 | |
5373 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5374 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5375 | return &*Mov; |
5376 | } |
5377 | |
5378 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP( |
5379 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5380 | |
5381 | unsigned int Op; |
5382 | bool IsWide = false; |
5383 | if (DstSize == 128) { |
5384 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5385 | return nullptr; |
5386 | Op = AArch64::FMOVv4f32_ns; |
5387 | IsWide = true; |
5388 | } else { |
5389 | Op = AArch64::FMOVv2f32_ns; |
5390 | } |
5391 | |
5392 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5393 | |
5394 | if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) { |
5395 | Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val); |
5396 | } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) { |
5397 | Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val); |
5398 | Op = AArch64::FMOVv2f64_ns; |
5399 | } else |
5400 | return nullptr; |
5401 | |
5402 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5403 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5404 | return &*Mov; |
5405 | } |
5406 | |
5407 | bool AArch64InstructionSelector::selectIndexedExtLoad( |
5408 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5409 | auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI); |
5410 | Register Dst = ExtLd.getDstReg(); |
5411 | Register WriteBack = ExtLd.getWritebackReg(); |
5412 | Register Base = ExtLd.getBaseReg(); |
5413 | Register Offset = ExtLd.getOffsetReg(); |
5414 | LLT Ty = MRI.getType(Reg: Dst); |
5415 | assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs. |
5416 | unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits(); |
5417 | bool IsPre = ExtLd.isPre(); |
5418 | bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd); |
5419 | bool InsertIntoXReg = false; |
5420 | bool IsDst64 = Ty.getSizeInBits() == 64; |
5421 | |
5422 | unsigned Opc = 0; |
5423 | LLT NewLdDstTy; |
5424 | LLT s32 = LLT::scalar(SizeInBits: 32); |
5425 | LLT s64 = LLT::scalar(SizeInBits: 64); |
5426 | |
5427 | if (MemSizeBits == 8) { |
5428 | if (IsSExt) { |
5429 | if (IsDst64) |
5430 | Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; |
5431 | else |
5432 | Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; |
5433 | NewLdDstTy = IsDst64 ? s64 : s32; |
5434 | } else { |
5435 | Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; |
5436 | InsertIntoXReg = IsDst64; |
5437 | NewLdDstTy = s32; |
5438 | } |
5439 | } else if (MemSizeBits == 16) { |
5440 | if (IsSExt) { |
5441 | if (IsDst64) |
5442 | Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; |
5443 | else |
5444 | Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; |
5445 | NewLdDstTy = IsDst64 ? s64 : s32; |
5446 | } else { |
5447 | Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; |
5448 | InsertIntoXReg = IsDst64; |
5449 | NewLdDstTy = s32; |
5450 | } |
5451 | } else if (MemSizeBits == 32) { |
5452 | if (IsSExt) { |
5453 | Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; |
5454 | NewLdDstTy = s64; |
5455 | } else { |
5456 | Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
5457 | InsertIntoXReg = IsDst64; |
5458 | NewLdDstTy = s32; |
5459 | } |
5460 | } else { |
5461 | llvm_unreachable("Unexpected size for indexed load" ); |
5462 | } |
5463 | |
5464 | if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5465 | return false; // We should be on gpr. |
5466 | |
5467 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5468 | if (!Cst) |
5469 | return false; // Shouldn't happen, but just in case. |
5470 | |
5471 | auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base}) |
5472 | .addImm(Val: Cst->getSExtValue()); |
5473 | LdMI.cloneMemRefs(OtherMI: ExtLd); |
5474 | constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI); |
5475 | // Make sure to select the load with the MemTy as the dest type, and then |
5476 | // insert into X reg if needed. |
5477 | if (InsertIntoXReg) { |
5478 | // Generate a SUBREG_TO_REG. |
5479 | auto SubToReg = MIB.buildInstr(Opc: TargetOpcode::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {}) |
5480 | .addImm(Val: 0) |
5481 | .addUse(RegNo: LdMI.getReg(Idx: 1)) |
5482 | .addImm(Val: AArch64::sub_32); |
5483 | RBI.constrainGenericRegister(Reg: SubToReg.getReg(Idx: 0), RC: AArch64::GPR64RegClass, |
5484 | MRI); |
5485 | } else { |
5486 | auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: 1)); |
5487 | selectCopy(I&: *Copy, TII, MRI, TRI, RBI); |
5488 | } |
5489 | MI.eraseFromParent(); |
5490 | |
5491 | return true; |
5492 | } |
5493 | |
5494 | bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI, |
5495 | MachineRegisterInfo &MRI) { |
5496 | auto &Ld = cast<GIndexedLoad>(Val&: MI); |
5497 | Register Dst = Ld.getDstReg(); |
5498 | Register WriteBack = Ld.getWritebackReg(); |
5499 | Register Base = Ld.getBaseReg(); |
5500 | Register Offset = Ld.getOffsetReg(); |
5501 | assert(MRI.getType(Dst).getSizeInBits() <= 128 && |
5502 | "Unexpected type for indexed load" ); |
5503 | unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes(); |
5504 | |
5505 | if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes()) |
5506 | return selectIndexedExtLoad(MI, MRI); |
5507 | |
5508 | unsigned Opc = 0; |
5509 | if (Ld.isPre()) { |
5510 | static constexpr unsigned GPROpcodes[] = { |
5511 | AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre, |
5512 | AArch64::LDRXpre}; |
5513 | static constexpr unsigned FPROpcodes[] = { |
5514 | AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre, |
5515 | AArch64::LDRQpre}; |
5516 | if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5517 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5518 | else |
5519 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5520 | } else { |
5521 | static constexpr unsigned GPROpcodes[] = { |
5522 | AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost, |
5523 | AArch64::LDRXpost}; |
5524 | static constexpr unsigned FPROpcodes[] = { |
5525 | AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost, |
5526 | AArch64::LDRDpost, AArch64::LDRQpost}; |
5527 | if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5528 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5529 | else |
5530 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5531 | } |
5532 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5533 | if (!Cst) |
5534 | return false; // Shouldn't happen, but just in case. |
5535 | auto LdMI = |
5536 | MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst->getSExtValue()); |
5537 | LdMI.cloneMemRefs(OtherMI: Ld); |
5538 | constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI); |
5539 | MI.eraseFromParent(); |
5540 | return true; |
5541 | } |
5542 | |
5543 | bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I, |
5544 | MachineRegisterInfo &MRI) { |
5545 | Register Dst = I.getWritebackReg(); |
5546 | Register Val = I.getValueReg(); |
5547 | Register Base = I.getBaseReg(); |
5548 | Register Offset = I.getOffsetReg(); |
5549 | LLT ValTy = MRI.getType(Reg: Val); |
5550 | assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store" ); |
5551 | |
5552 | unsigned Opc = 0; |
5553 | if (I.isPre()) { |
5554 | static constexpr unsigned GPROpcodes[] = { |
5555 | AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre, |
5556 | AArch64::STRXpre}; |
5557 | static constexpr unsigned FPROpcodes[] = { |
5558 | AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre, |
5559 | AArch64::STRQpre}; |
5560 | |
5561 | if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5562 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5563 | else |
5564 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5565 | } else { |
5566 | static constexpr unsigned GPROpcodes[] = { |
5567 | AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost, |
5568 | AArch64::STRXpost}; |
5569 | static constexpr unsigned FPROpcodes[] = { |
5570 | AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost, |
5571 | AArch64::STRDpost, AArch64::STRQpost}; |
5572 | |
5573 | if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5574 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5575 | else |
5576 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5577 | } |
5578 | |
5579 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5580 | if (!Cst) |
5581 | return false; // Shouldn't happen, but just in case. |
5582 | auto Str = |
5583 | MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst->getSExtValue()); |
5584 | Str.cloneMemRefs(OtherMI: I); |
5585 | constrainSelectedInstRegOperands(I&: *Str, TII, TRI, RBI); |
5586 | I.eraseFromParent(); |
5587 | return true; |
5588 | } |
5589 | |
5590 | MachineInstr * |
5591 | AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, |
5592 | MachineIRBuilder &MIRBuilder, |
5593 | MachineRegisterInfo &MRI) { |
5594 | LLT DstTy = MRI.getType(Reg: Dst); |
5595 | unsigned DstSize = DstTy.getSizeInBits(); |
5596 | if (CV->isNullValue()) { |
5597 | if (DstSize == 128) { |
5598 | auto Mov = |
5599 | MIRBuilder.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {Dst}, SrcOps: {}).addImm(Val: 0); |
5600 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5601 | return &*Mov; |
5602 | } |
5603 | |
5604 | if (DstSize == 64) { |
5605 | auto Mov = |
5606 | MIRBuilder |
5607 | .buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {&AArch64::FPR128RegClass}, SrcOps: {}) |
5608 | .addImm(Val: 0); |
5609 | auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {Dst}, SrcOps: {}) |
5610 | .addReg(RegNo: Mov.getReg(Idx: 0), flags: 0, SubReg: AArch64::dsub); |
5611 | RBI.constrainGenericRegister(Reg: Dst, RC: AArch64::FPR64RegClass, MRI); |
5612 | return &*Copy; |
5613 | } |
5614 | } |
5615 | |
5616 | if (CV->getSplatValue()) { |
5617 | APInt DefBits = APInt::getSplat(NewLen: DstSize, V: CV->getUniqueInteger()); |
5618 | auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * { |
5619 | MachineInstr *NewOp; |
5620 | bool Inv = false; |
5621 | if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5622 | (NewOp = |
5623 | tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5624 | (NewOp = |
5625 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5626 | (NewOp = |
5627 | tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5628 | (NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5629 | (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder))) |
5630 | return NewOp; |
5631 | |
5632 | DefBits = ~DefBits; |
5633 | Inv = true; |
5634 | if ((NewOp = |
5635 | tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5636 | (NewOp = |
5637 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5638 | (NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv))) |
5639 | return NewOp; |
5640 | return nullptr; |
5641 | }; |
5642 | |
5643 | if (auto *NewOp = TryMOVIWithBits(DefBits)) |
5644 | return NewOp; |
5645 | |
5646 | // See if a fneg of the constant can be materialized with a MOVI, etc |
5647 | auto TryWithFNeg = [&](APInt DefBits, int NumBits, |
5648 | unsigned NegOpc) -> MachineInstr * { |
5649 | // FNegate each sub-element of the constant |
5650 | APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: 1).zext(width: DstSize); |
5651 | APInt NegBits(DstSize, 0); |
5652 | unsigned NumElts = DstSize / NumBits; |
5653 | for (unsigned i = 0; i < NumElts; i++) |
5654 | NegBits |= Neg << (NumBits * i); |
5655 | NegBits = DefBits ^ NegBits; |
5656 | |
5657 | // Try to create the new constants with MOVI, and if so generate a fneg |
5658 | // for it. |
5659 | if (auto *NewOp = TryMOVIWithBits(NegBits)) { |
5660 | Register NewDst = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass); |
5661 | NewOp->getOperand(i: 0).setReg(NewDst); |
5662 | return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst}); |
5663 | } |
5664 | return nullptr; |
5665 | }; |
5666 | MachineInstr *R; |
5667 | if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) || |
5668 | (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) || |
5669 | (STI.hasFullFP16() && |
5670 | (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16)))) |
5671 | return R; |
5672 | } |
5673 | |
5674 | auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder); |
5675 | if (!CPLoad) { |
5676 | LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!" ); |
5677 | return nullptr; |
5678 | } |
5679 | |
5680 | auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: 0)); |
5681 | RBI.constrainGenericRegister( |
5682 | Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: 0).getReg()), MRI); |
5683 | return &*Copy; |
5684 | } |
5685 | |
5686 | bool AArch64InstructionSelector::tryOptConstantBuildVec( |
5687 | MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { |
5688 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5689 | unsigned DstSize = DstTy.getSizeInBits(); |
5690 | assert(DstSize <= 128 && "Unexpected build_vec type!" ); |
5691 | if (DstSize < 32) |
5692 | return false; |
5693 | // Check if we're building a constant vector, in which case we want to |
5694 | // generate a constant pool load instead of a vector insert sequence. |
5695 | SmallVector<Constant *, 16> Csts; |
5696 | for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { |
5697 | // Try to find G_CONSTANT or G_FCONSTANT |
5698 | auto *OpMI = |
5699 | getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI); |
5700 | if (OpMI) |
5701 | Csts.emplace_back( |
5702 | Args: const_cast<ConstantInt *>(OpMI->getOperand(i: 1).getCImm())); |
5703 | else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT, |
5704 | Reg: I.getOperand(i: Idx).getReg(), MRI))) |
5705 | Csts.emplace_back( |
5706 | Args: const_cast<ConstantFP *>(OpMI->getOperand(i: 1).getFPImm())); |
5707 | else |
5708 | return false; |
5709 | } |
5710 | Constant *CV = ConstantVector::get(V: Csts); |
5711 | if (!emitConstantVector(Dst: I.getOperand(i: 0).getReg(), CV, MIRBuilder&: MIB, MRI)) |
5712 | return false; |
5713 | I.eraseFromParent(); |
5714 | return true; |
5715 | } |
5716 | |
5717 | bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( |
5718 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5719 | // Given: |
5720 | // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef |
5721 | // |
5722 | // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. |
5723 | Register Dst = I.getOperand(i: 0).getReg(); |
5724 | Register EltReg = I.getOperand(i: 1).getReg(); |
5725 | LLT EltTy = MRI.getType(Reg: EltReg); |
5726 | // If the index isn't on the same bank as its elements, then this can't be a |
5727 | // SUBREG_TO_REG. |
5728 | const RegisterBank &EltRB = *RBI.getRegBank(Reg: EltReg, MRI, TRI); |
5729 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: Dst, MRI, TRI); |
5730 | if (EltRB != DstRB) |
5731 | return false; |
5732 | if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: 2), P: [&MRI](const MachineOperand &Op) { |
5733 | return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI); |
5734 | })) |
5735 | return false; |
5736 | unsigned SubReg; |
5737 | const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB); |
5738 | if (!EltRC) |
5739 | return false; |
5740 | const TargetRegisterClass *DstRC = |
5741 | getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB); |
5742 | if (!DstRC) |
5743 | return false; |
5744 | if (!getSubRegForClass(RC: EltRC, TRI, SubReg)) |
5745 | return false; |
5746 | auto SubregToReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {}) |
5747 | .addImm(Val: 0) |
5748 | .addUse(RegNo: EltReg) |
5749 | .addImm(Val: SubReg); |
5750 | I.eraseFromParent(); |
5751 | constrainSelectedInstRegOperands(I&: *SubregToReg, TII, TRI, RBI); |
5752 | return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI); |
5753 | } |
5754 | |
5755 | bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, |
5756 | MachineRegisterInfo &MRI) { |
5757 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5758 | // Until we port more of the optimized selections, for now just use a vector |
5759 | // insert sequence. |
5760 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5761 | const LLT EltTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
5762 | unsigned EltSize = EltTy.getSizeInBits(); |
5763 | |
5764 | if (tryOptConstantBuildVec(I, DstTy, MRI)) |
5765 | return true; |
5766 | if (tryOptBuildVecToSubregToReg(I, MRI)) |
5767 | return true; |
5768 | |
5769 | if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64) |
5770 | return false; // Don't support all element types yet. |
5771 | const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI); |
5772 | |
5773 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5774 | MachineInstr *ScalarToVec = |
5775 | emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC, |
5776 | Scalar: I.getOperand(i: 1).getReg(), MIRBuilder&: MIB); |
5777 | if (!ScalarToVec) |
5778 | return false; |
5779 | |
5780 | Register DstVec = ScalarToVec->getOperand(i: 0).getReg(); |
5781 | unsigned DstSize = DstTy.getSizeInBits(); |
5782 | |
5783 | // Keep track of the last MI we inserted. Later on, we might be able to save |
5784 | // a copy using it. |
5785 | MachineInstr *PrevMI = ScalarToVec; |
5786 | for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { |
5787 | // Note that if we don't do a subregister copy, we can end up making an |
5788 | // extra register. |
5789 | Register OpReg = I.getOperand(i).getReg(); |
5790 | // Do not emit inserts for undefs |
5791 | if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) { |
5792 | PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - 1, RB, MIRBuilder&: MIB); |
5793 | DstVec = PrevMI->getOperand(i: 0).getReg(); |
5794 | } |
5795 | } |
5796 | |
5797 | // If DstTy's size in bits is less than 128, then emit a subregister copy |
5798 | // from DstVec to the last register we've defined. |
5799 | if (DstSize < 128) { |
5800 | // Force this to be FPR using the destination vector. |
5801 | const TargetRegisterClass *RC = |
5802 | getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI)); |
5803 | if (!RC) |
5804 | return false; |
5805 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
5806 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
5807 | return false; |
5808 | } |
5809 | |
5810 | unsigned SubReg = 0; |
5811 | if (!getSubRegForClass(RC, TRI, SubReg)) |
5812 | return false; |
5813 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
5814 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize |
5815 | << "\n" ); |
5816 | return false; |
5817 | } |
5818 | |
5819 | Register Reg = MRI.createVirtualRegister(RegClass: RC); |
5820 | Register DstReg = I.getOperand(i: 0).getReg(); |
5821 | |
5822 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: 0, SubReg); |
5823 | MachineOperand &RegOp = I.getOperand(i: 1); |
5824 | RegOp.setReg(Reg); |
5825 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
5826 | } else { |
5827 | // We either have a vector with all elements (except the first one) undef or |
5828 | // at least one non-undef non-first element. In the first case, we need to |
5829 | // constrain the output register ourselves as we may have generated an |
5830 | // INSERT_SUBREG operation which is a generic operation for which the |
5831 | // output regclass cannot be automatically chosen. |
5832 | // |
5833 | // In the second case, there is no need to do this as it may generate an |
5834 | // instruction like INSvi32gpr where the regclass can be automatically |
5835 | // chosen. |
5836 | // |
5837 | // Also, we save a copy by re-using the destination register on the final |
5838 | // insert. |
5839 | PrevMI->getOperand(i: 0).setReg(I.getOperand(i: 0).getReg()); |
5840 | constrainSelectedInstRegOperands(I&: *PrevMI, TII, TRI, RBI); |
5841 | |
5842 | Register DstReg = PrevMI->getOperand(i: 0).getReg(); |
5843 | if (PrevMI == ScalarToVec && DstReg.isVirtual()) { |
5844 | const TargetRegisterClass *RC = |
5845 | getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI)); |
5846 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
5847 | } |
5848 | } |
5849 | |
5850 | I.eraseFromParent(); |
5851 | return true; |
5852 | } |
5853 | |
5854 | bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, |
5855 | unsigned NumVecs, |
5856 | MachineInstr &I) { |
5857 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
5858 | assert(Opc && "Expected an opcode?" ); |
5859 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
5860 | auto &MRI = *MIB.getMRI(); |
5861 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5862 | unsigned Size = Ty.getSizeInBits(); |
5863 | assert((Size == 64 || Size == 128) && |
5864 | "Destination must be 64 bits or 128 bits?" ); |
5865 | unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; |
5866 | auto Ptr = I.getOperand(i: I.getNumOperands() - 1).getReg(); |
5867 | assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?" ); |
5868 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr}); |
5869 | Load.cloneMemRefs(OtherMI: I); |
5870 | constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI); |
5871 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
5872 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
5873 | auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {}) |
5874 | .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx); |
5875 | // Emit the subreg copies and immediately select them. |
5876 | // FIXME: We should refactor our copy code into an emitCopy helper and |
5877 | // clean up uses of this pattern elsewhere in the selector. |
5878 | selectCopy(I&: *Vec, TII, MRI, TRI, RBI); |
5879 | } |
5880 | return true; |
5881 | } |
5882 | |
5883 | bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic( |
5884 | unsigned Opc, unsigned NumVecs, MachineInstr &I) { |
5885 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
5886 | assert(Opc && "Expected an opcode?" ); |
5887 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
5888 | auto &MRI = *MIB.getMRI(); |
5889 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5890 | bool Narrow = Ty.getSizeInBits() == 64; |
5891 | |
5892 | auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1; |
5893 | SmallVector<Register, 4> Regs(NumVecs); |
5894 | std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(), |
5895 | unary_op: [](auto MO) { return MO.getReg(); }); |
5896 | |
5897 | if (Narrow) { |
5898 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
5899 | return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB) |
5900 | ->getOperand(i: 0) |
5901 | .getReg(); |
5902 | }); |
5903 | Ty = Ty.multiplyElements(Factor: 2); |
5904 | } |
5905 | |
5906 | Register Tuple = createQTuple(Regs, MIB); |
5907 | auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI); |
5908 | if (!LaneNo) |
5909 | return false; |
5910 | |
5911 | Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg(); |
5912 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {}) |
5913 | .addReg(RegNo: Tuple) |
5914 | .addImm(Val: LaneNo->getZExtValue()) |
5915 | .addReg(RegNo: Ptr); |
5916 | Load.cloneMemRefs(OtherMI: I); |
5917 | constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI); |
5918 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
5919 | unsigned SubReg = AArch64::qsub0; |
5920 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
5921 | auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, |
5922 | DstOps: {Narrow ? DstOp(&AArch64::FPR128RegClass) |
5923 | : DstOp(I.getOperand(i: Idx).getReg())}, |
5924 | SrcOps: {}) |
5925 | .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx); |
5926 | Register WideReg = Vec.getReg(Idx: 0); |
5927 | // Emit the subreg copies and immediately select them. |
5928 | selectCopy(I&: *Vec, TII, MRI, TRI, RBI); |
5929 | if (Narrow && |
5930 | !emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI)) |
5931 | return false; |
5932 | } |
5933 | return true; |
5934 | } |
5935 | |
5936 | void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I, |
5937 | unsigned NumVecs, |
5938 | unsigned Opc) { |
5939 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
5940 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
5941 | Register Ptr = I.getOperand(i: 1 + NumVecs).getReg(); |
5942 | |
5943 | SmallVector<Register, 2> Regs(NumVecs); |
5944 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
5945 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
5946 | |
5947 | Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) |
5948 | : createDTuple(Regs, MIB); |
5949 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr}); |
5950 | Store.cloneMemRefs(OtherMI: I); |
5951 | constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI); |
5952 | } |
5953 | |
5954 | bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic( |
5955 | MachineInstr &I, unsigned NumVecs, unsigned Opc) { |
5956 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
5957 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
5958 | bool Narrow = Ty.getSizeInBits() == 64; |
5959 | |
5960 | SmallVector<Register, 2> Regs(NumVecs); |
5961 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
5962 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
5963 | |
5964 | if (Narrow) |
5965 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
5966 | return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB) |
5967 | ->getOperand(i: 0) |
5968 | .getReg(); |
5969 | }); |
5970 | |
5971 | Register Tuple = createQTuple(Regs, MIB); |
5972 | |
5973 | auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: 1 + NumVecs).getReg(), MRI); |
5974 | if (!LaneNo) |
5975 | return false; |
5976 | Register Ptr = I.getOperand(i: 1 + NumVecs + 1).getReg(); |
5977 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {}) |
5978 | .addReg(RegNo: Tuple) |
5979 | .addImm(Val: LaneNo->getZExtValue()) |
5980 | .addReg(RegNo: Ptr); |
5981 | Store.cloneMemRefs(OtherMI: I); |
5982 | constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI); |
5983 | return true; |
5984 | } |
5985 | |
5986 | bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( |
5987 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5988 | // Find the intrinsic ID. |
5989 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
5990 | |
5991 | const LLT S8 = LLT::scalar(SizeInBits: 8); |
5992 | const LLT S16 = LLT::scalar(SizeInBits: 16); |
5993 | const LLT S32 = LLT::scalar(SizeInBits: 32); |
5994 | const LLT S64 = LLT::scalar(SizeInBits: 64); |
5995 | const LLT P0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
5996 | // Select the instruction. |
5997 | switch (IntrinID) { |
5998 | default: |
5999 | return false; |
6000 | case Intrinsic::aarch64_ldxp: |
6001 | case Intrinsic::aarch64_ldaxp: { |
6002 | auto NewI = MIB.buildInstr( |
6003 | Opc: IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, |
6004 | DstOps: {I.getOperand(i: 0).getReg(), I.getOperand(i: 1).getReg()}, |
6005 | SrcOps: {I.getOperand(i: 3)}); |
6006 | NewI.cloneMemRefs(OtherMI: I); |
6007 | constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI); |
6008 | break; |
6009 | } |
6010 | case Intrinsic::aarch64_neon_ld1x2: { |
6011 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6012 | unsigned Opc = 0; |
6013 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6014 | Opc = AArch64::LD1Twov8b; |
6015 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6016 | Opc = AArch64::LD1Twov16b; |
6017 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6018 | Opc = AArch64::LD1Twov4h; |
6019 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6020 | Opc = AArch64::LD1Twov8h; |
6021 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6022 | Opc = AArch64::LD1Twov2s; |
6023 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6024 | Opc = AArch64::LD1Twov4s; |
6025 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6026 | Opc = AArch64::LD1Twov2d; |
6027 | else if (Ty == S64 || Ty == P0) |
6028 | Opc = AArch64::LD1Twov1d; |
6029 | else |
6030 | llvm_unreachable("Unexpected type for ld1x2!" ); |
6031 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6032 | break; |
6033 | } |
6034 | case Intrinsic::aarch64_neon_ld1x3: { |
6035 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6036 | unsigned Opc = 0; |
6037 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6038 | Opc = AArch64::LD1Threev8b; |
6039 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6040 | Opc = AArch64::LD1Threev16b; |
6041 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6042 | Opc = AArch64::LD1Threev4h; |
6043 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6044 | Opc = AArch64::LD1Threev8h; |
6045 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6046 | Opc = AArch64::LD1Threev2s; |
6047 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6048 | Opc = AArch64::LD1Threev4s; |
6049 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6050 | Opc = AArch64::LD1Threev2d; |
6051 | else if (Ty == S64 || Ty == P0) |
6052 | Opc = AArch64::LD1Threev1d; |
6053 | else |
6054 | llvm_unreachable("Unexpected type for ld1x3!" ); |
6055 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6056 | break; |
6057 | } |
6058 | case Intrinsic::aarch64_neon_ld1x4: { |
6059 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6060 | unsigned Opc = 0; |
6061 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6062 | Opc = AArch64::LD1Fourv8b; |
6063 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6064 | Opc = AArch64::LD1Fourv16b; |
6065 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6066 | Opc = AArch64::LD1Fourv4h; |
6067 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6068 | Opc = AArch64::LD1Fourv8h; |
6069 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6070 | Opc = AArch64::LD1Fourv2s; |
6071 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6072 | Opc = AArch64::LD1Fourv4s; |
6073 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6074 | Opc = AArch64::LD1Fourv2d; |
6075 | else if (Ty == S64 || Ty == P0) |
6076 | Opc = AArch64::LD1Fourv1d; |
6077 | else |
6078 | llvm_unreachable("Unexpected type for ld1x4!" ); |
6079 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6080 | break; |
6081 | } |
6082 | case Intrinsic::aarch64_neon_ld2: { |
6083 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6084 | unsigned Opc = 0; |
6085 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6086 | Opc = AArch64::LD2Twov8b; |
6087 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6088 | Opc = AArch64::LD2Twov16b; |
6089 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6090 | Opc = AArch64::LD2Twov4h; |
6091 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6092 | Opc = AArch64::LD2Twov8h; |
6093 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6094 | Opc = AArch64::LD2Twov2s; |
6095 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6096 | Opc = AArch64::LD2Twov4s; |
6097 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6098 | Opc = AArch64::LD2Twov2d; |
6099 | else if (Ty == S64 || Ty == P0) |
6100 | Opc = AArch64::LD1Twov1d; |
6101 | else |
6102 | llvm_unreachable("Unexpected type for ld2!" ); |
6103 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6104 | break; |
6105 | } |
6106 | case Intrinsic::aarch64_neon_ld2lane: { |
6107 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6108 | unsigned Opc; |
6109 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6110 | Opc = AArch64::LD2i8; |
6111 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6112 | Opc = AArch64::LD2i16; |
6113 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6114 | Opc = AArch64::LD2i32; |
6115 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6116 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6117 | Opc = AArch64::LD2i64; |
6118 | else |
6119 | llvm_unreachable("Unexpected type for st2lane!" ); |
6120 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 2, I)) |
6121 | return false; |
6122 | break; |
6123 | } |
6124 | case Intrinsic::aarch64_neon_ld2r: { |
6125 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6126 | unsigned Opc = 0; |
6127 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6128 | Opc = AArch64::LD2Rv8b; |
6129 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6130 | Opc = AArch64::LD2Rv16b; |
6131 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6132 | Opc = AArch64::LD2Rv4h; |
6133 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6134 | Opc = AArch64::LD2Rv8h; |
6135 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6136 | Opc = AArch64::LD2Rv2s; |
6137 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6138 | Opc = AArch64::LD2Rv4s; |
6139 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6140 | Opc = AArch64::LD2Rv2d; |
6141 | else if (Ty == S64 || Ty == P0) |
6142 | Opc = AArch64::LD2Rv1d; |
6143 | else |
6144 | llvm_unreachable("Unexpected type for ld2r!" ); |
6145 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6146 | break; |
6147 | } |
6148 | case Intrinsic::aarch64_neon_ld3: { |
6149 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6150 | unsigned Opc = 0; |
6151 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6152 | Opc = AArch64::LD3Threev8b; |
6153 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6154 | Opc = AArch64::LD3Threev16b; |
6155 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6156 | Opc = AArch64::LD3Threev4h; |
6157 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6158 | Opc = AArch64::LD3Threev8h; |
6159 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6160 | Opc = AArch64::LD3Threev2s; |
6161 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6162 | Opc = AArch64::LD3Threev4s; |
6163 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6164 | Opc = AArch64::LD3Threev2d; |
6165 | else if (Ty == S64 || Ty == P0) |
6166 | Opc = AArch64::LD1Threev1d; |
6167 | else |
6168 | llvm_unreachable("Unexpected type for ld3!" ); |
6169 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6170 | break; |
6171 | } |
6172 | case Intrinsic::aarch64_neon_ld3lane: { |
6173 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6174 | unsigned Opc; |
6175 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6176 | Opc = AArch64::LD3i8; |
6177 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6178 | Opc = AArch64::LD3i16; |
6179 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6180 | Opc = AArch64::LD3i32; |
6181 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6182 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6183 | Opc = AArch64::LD3i64; |
6184 | else |
6185 | llvm_unreachable("Unexpected type for st3lane!" ); |
6186 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 3, I)) |
6187 | return false; |
6188 | break; |
6189 | } |
6190 | case Intrinsic::aarch64_neon_ld3r: { |
6191 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6192 | unsigned Opc = 0; |
6193 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6194 | Opc = AArch64::LD3Rv8b; |
6195 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6196 | Opc = AArch64::LD3Rv16b; |
6197 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6198 | Opc = AArch64::LD3Rv4h; |
6199 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6200 | Opc = AArch64::LD3Rv8h; |
6201 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6202 | Opc = AArch64::LD3Rv2s; |
6203 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6204 | Opc = AArch64::LD3Rv4s; |
6205 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6206 | Opc = AArch64::LD3Rv2d; |
6207 | else if (Ty == S64 || Ty == P0) |
6208 | Opc = AArch64::LD3Rv1d; |
6209 | else |
6210 | llvm_unreachable("Unexpected type for ld3r!" ); |
6211 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6212 | break; |
6213 | } |
6214 | case Intrinsic::aarch64_neon_ld4: { |
6215 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6216 | unsigned Opc = 0; |
6217 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6218 | Opc = AArch64::LD4Fourv8b; |
6219 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6220 | Opc = AArch64::LD4Fourv16b; |
6221 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6222 | Opc = AArch64::LD4Fourv4h; |
6223 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6224 | Opc = AArch64::LD4Fourv8h; |
6225 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6226 | Opc = AArch64::LD4Fourv2s; |
6227 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6228 | Opc = AArch64::LD4Fourv4s; |
6229 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6230 | Opc = AArch64::LD4Fourv2d; |
6231 | else if (Ty == S64 || Ty == P0) |
6232 | Opc = AArch64::LD1Fourv1d; |
6233 | else |
6234 | llvm_unreachable("Unexpected type for ld4!" ); |
6235 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6236 | break; |
6237 | } |
6238 | case Intrinsic::aarch64_neon_ld4lane: { |
6239 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6240 | unsigned Opc; |
6241 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6242 | Opc = AArch64::LD4i8; |
6243 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6244 | Opc = AArch64::LD4i16; |
6245 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6246 | Opc = AArch64::LD4i32; |
6247 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6248 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6249 | Opc = AArch64::LD4i64; |
6250 | else |
6251 | llvm_unreachable("Unexpected type for st4lane!" ); |
6252 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 4, I)) |
6253 | return false; |
6254 | break; |
6255 | } |
6256 | case Intrinsic::aarch64_neon_ld4r: { |
6257 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6258 | unsigned Opc = 0; |
6259 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6260 | Opc = AArch64::LD4Rv8b; |
6261 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6262 | Opc = AArch64::LD4Rv16b; |
6263 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6264 | Opc = AArch64::LD4Rv4h; |
6265 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6266 | Opc = AArch64::LD4Rv8h; |
6267 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6268 | Opc = AArch64::LD4Rv2s; |
6269 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6270 | Opc = AArch64::LD4Rv4s; |
6271 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6272 | Opc = AArch64::LD4Rv2d; |
6273 | else if (Ty == S64 || Ty == P0) |
6274 | Opc = AArch64::LD4Rv1d; |
6275 | else |
6276 | llvm_unreachable("Unexpected type for ld4r!" ); |
6277 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6278 | break; |
6279 | } |
6280 | case Intrinsic::aarch64_neon_st1x2: { |
6281 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6282 | unsigned Opc; |
6283 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6284 | Opc = AArch64::ST1Twov8b; |
6285 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6286 | Opc = AArch64::ST1Twov16b; |
6287 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6288 | Opc = AArch64::ST1Twov4h; |
6289 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6290 | Opc = AArch64::ST1Twov8h; |
6291 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6292 | Opc = AArch64::ST1Twov2s; |
6293 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6294 | Opc = AArch64::ST1Twov4s; |
6295 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6296 | Opc = AArch64::ST1Twov2d; |
6297 | else if (Ty == S64 || Ty == P0) |
6298 | Opc = AArch64::ST1Twov1d; |
6299 | else |
6300 | llvm_unreachable("Unexpected type for st1x2!" ); |
6301 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6302 | break; |
6303 | } |
6304 | case Intrinsic::aarch64_neon_st1x3: { |
6305 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6306 | unsigned Opc; |
6307 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6308 | Opc = AArch64::ST1Threev8b; |
6309 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6310 | Opc = AArch64::ST1Threev16b; |
6311 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6312 | Opc = AArch64::ST1Threev4h; |
6313 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6314 | Opc = AArch64::ST1Threev8h; |
6315 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6316 | Opc = AArch64::ST1Threev2s; |
6317 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6318 | Opc = AArch64::ST1Threev4s; |
6319 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6320 | Opc = AArch64::ST1Threev2d; |
6321 | else if (Ty == S64 || Ty == P0) |
6322 | Opc = AArch64::ST1Threev1d; |
6323 | else |
6324 | llvm_unreachable("Unexpected type for st1x3!" ); |
6325 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6326 | break; |
6327 | } |
6328 | case Intrinsic::aarch64_neon_st1x4: { |
6329 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6330 | unsigned Opc; |
6331 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6332 | Opc = AArch64::ST1Fourv8b; |
6333 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6334 | Opc = AArch64::ST1Fourv16b; |
6335 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6336 | Opc = AArch64::ST1Fourv4h; |
6337 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6338 | Opc = AArch64::ST1Fourv8h; |
6339 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6340 | Opc = AArch64::ST1Fourv2s; |
6341 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6342 | Opc = AArch64::ST1Fourv4s; |
6343 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6344 | Opc = AArch64::ST1Fourv2d; |
6345 | else if (Ty == S64 || Ty == P0) |
6346 | Opc = AArch64::ST1Fourv1d; |
6347 | else |
6348 | llvm_unreachable("Unexpected type for st1x4!" ); |
6349 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6350 | break; |
6351 | } |
6352 | case Intrinsic::aarch64_neon_st2: { |
6353 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6354 | unsigned Opc; |
6355 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6356 | Opc = AArch64::ST2Twov8b; |
6357 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6358 | Opc = AArch64::ST2Twov16b; |
6359 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6360 | Opc = AArch64::ST2Twov4h; |
6361 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6362 | Opc = AArch64::ST2Twov8h; |
6363 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6364 | Opc = AArch64::ST2Twov2s; |
6365 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6366 | Opc = AArch64::ST2Twov4s; |
6367 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6368 | Opc = AArch64::ST2Twov2d; |
6369 | else if (Ty == S64 || Ty == P0) |
6370 | Opc = AArch64::ST1Twov1d; |
6371 | else |
6372 | llvm_unreachable("Unexpected type for st2!" ); |
6373 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6374 | break; |
6375 | } |
6376 | case Intrinsic::aarch64_neon_st3: { |
6377 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6378 | unsigned Opc; |
6379 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6380 | Opc = AArch64::ST3Threev8b; |
6381 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6382 | Opc = AArch64::ST3Threev16b; |
6383 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6384 | Opc = AArch64::ST3Threev4h; |
6385 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6386 | Opc = AArch64::ST3Threev8h; |
6387 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6388 | Opc = AArch64::ST3Threev2s; |
6389 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6390 | Opc = AArch64::ST3Threev4s; |
6391 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6392 | Opc = AArch64::ST3Threev2d; |
6393 | else if (Ty == S64 || Ty == P0) |
6394 | Opc = AArch64::ST1Threev1d; |
6395 | else |
6396 | llvm_unreachable("Unexpected type for st3!" ); |
6397 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6398 | break; |
6399 | } |
6400 | case Intrinsic::aarch64_neon_st4: { |
6401 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6402 | unsigned Opc; |
6403 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6404 | Opc = AArch64::ST4Fourv8b; |
6405 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6406 | Opc = AArch64::ST4Fourv16b; |
6407 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6408 | Opc = AArch64::ST4Fourv4h; |
6409 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6410 | Opc = AArch64::ST4Fourv8h; |
6411 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6412 | Opc = AArch64::ST4Fourv2s; |
6413 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6414 | Opc = AArch64::ST4Fourv4s; |
6415 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6416 | Opc = AArch64::ST4Fourv2d; |
6417 | else if (Ty == S64 || Ty == P0) |
6418 | Opc = AArch64::ST1Fourv1d; |
6419 | else |
6420 | llvm_unreachable("Unexpected type for st4!" ); |
6421 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6422 | break; |
6423 | } |
6424 | case Intrinsic::aarch64_neon_st2lane: { |
6425 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6426 | unsigned Opc; |
6427 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6428 | Opc = AArch64::ST2i8; |
6429 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6430 | Opc = AArch64::ST2i16; |
6431 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6432 | Opc = AArch64::ST2i32; |
6433 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6434 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6435 | Opc = AArch64::ST2i64; |
6436 | else |
6437 | llvm_unreachable("Unexpected type for st2lane!" ); |
6438 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 2, Opc)) |
6439 | return false; |
6440 | break; |
6441 | } |
6442 | case Intrinsic::aarch64_neon_st3lane: { |
6443 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6444 | unsigned Opc; |
6445 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6446 | Opc = AArch64::ST3i8; |
6447 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6448 | Opc = AArch64::ST3i16; |
6449 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6450 | Opc = AArch64::ST3i32; |
6451 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6452 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6453 | Opc = AArch64::ST3i64; |
6454 | else |
6455 | llvm_unreachable("Unexpected type for st3lane!" ); |
6456 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 3, Opc)) |
6457 | return false; |
6458 | break; |
6459 | } |
6460 | case Intrinsic::aarch64_neon_st4lane: { |
6461 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6462 | unsigned Opc; |
6463 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6464 | Opc = AArch64::ST4i8; |
6465 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6466 | Opc = AArch64::ST4i16; |
6467 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6468 | Opc = AArch64::ST4i32; |
6469 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6470 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6471 | Opc = AArch64::ST4i64; |
6472 | else |
6473 | llvm_unreachable("Unexpected type for st4lane!" ); |
6474 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 4, Opc)) |
6475 | return false; |
6476 | break; |
6477 | } |
6478 | case Intrinsic::aarch64_mops_memset_tag: { |
6479 | // Transform |
6480 | // %dst:gpr(p0) = \ |
6481 | // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), |
6482 | // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) |
6483 | // where %dst is updated, into |
6484 | // %Rd:GPR64common, %Rn:GPR64) = \ |
6485 | // MOPSMemorySetTaggingPseudo \ |
6486 | // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 |
6487 | // where Rd and Rn are tied. |
6488 | // It is expected that %val has been extended to s64 in legalization. |
6489 | // Note that the order of the size/value operands are swapped. |
6490 | |
6491 | Register DstDef = I.getOperand(i: 0).getReg(); |
6492 | // I.getOperand(1) is the intrinsic function |
6493 | Register DstUse = I.getOperand(i: 2).getReg(); |
6494 | Register ValUse = I.getOperand(i: 3).getReg(); |
6495 | Register SizeUse = I.getOperand(i: 4).getReg(); |
6496 | |
6497 | // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. |
6498 | // Therefore an additional virtual register is requried for the updated size |
6499 | // operand. This value is not accessible via the semantics of the intrinsic. |
6500 | Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
6501 | |
6502 | auto Memset = MIB.buildInstr(Opc: AArch64::MOPSMemorySetTaggingPseudo, |
6503 | DstOps: {DstDef, SizeDef}, SrcOps: {DstUse, SizeUse, ValUse}); |
6504 | Memset.cloneMemRefs(OtherMI: I); |
6505 | constrainSelectedInstRegOperands(I&: *Memset, TII, TRI, RBI); |
6506 | break; |
6507 | } |
6508 | } |
6509 | |
6510 | I.eraseFromParent(); |
6511 | return true; |
6512 | } |
6513 | |
6514 | bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, |
6515 | MachineRegisterInfo &MRI) { |
6516 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
6517 | |
6518 | switch (IntrinID) { |
6519 | default: |
6520 | break; |
6521 | case Intrinsic::aarch64_crypto_sha1h: { |
6522 | Register DstReg = I.getOperand(i: 0).getReg(); |
6523 | Register SrcReg = I.getOperand(i: 2).getReg(); |
6524 | |
6525 | // FIXME: Should this be an assert? |
6526 | if (MRI.getType(Reg: DstReg).getSizeInBits() != 32 || |
6527 | MRI.getType(Reg: SrcReg).getSizeInBits() != 32) |
6528 | return false; |
6529 | |
6530 | // The operation has to happen on FPRs. Set up some new FPR registers for |
6531 | // the source and destination if they are on GPRs. |
6532 | if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
6533 | SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass); |
6534 | MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: 2)}); |
6535 | |
6536 | // Make sure the copy ends up getting constrained properly. |
6537 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 2).getReg(), |
6538 | RC: AArch64::GPR32RegClass, MRI); |
6539 | } |
6540 | |
6541 | if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) |
6542 | DstReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass); |
6543 | |
6544 | // Actually insert the instruction. |
6545 | auto SHA1Inst = MIB.buildInstr(Opc: AArch64::SHA1Hrr, DstOps: {DstReg}, SrcOps: {SrcReg}); |
6546 | constrainSelectedInstRegOperands(I&: *SHA1Inst, TII, TRI, RBI); |
6547 | |
6548 | // Did we create a new register for the destination? |
6549 | if (DstReg != I.getOperand(i: 0).getReg()) { |
6550 | // Yep. Copy the result of the instruction back into the original |
6551 | // destination. |
6552 | MIB.buildCopy(Res: {I.getOperand(i: 0)}, Op: {DstReg}); |
6553 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), |
6554 | RC: AArch64::GPR32RegClass, MRI); |
6555 | } |
6556 | |
6557 | I.eraseFromParent(); |
6558 | return true; |
6559 | } |
6560 | case Intrinsic::ptrauth_resign: { |
6561 | Register DstReg = I.getOperand(i: 0).getReg(); |
6562 | Register ValReg = I.getOperand(i: 2).getReg(); |
6563 | uint64_t AUTKey = I.getOperand(i: 3).getImm(); |
6564 | Register AUTDisc = I.getOperand(i: 4).getReg(); |
6565 | uint64_t PACKey = I.getOperand(i: 5).getImm(); |
6566 | Register PACDisc = I.getOperand(i: 6).getReg(); |
6567 | |
6568 | Register AUTAddrDisc = AUTDisc; |
6569 | uint16_t AUTConstDiscC = 0; |
6570 | std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) = |
6571 | extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI); |
6572 | |
6573 | Register PACAddrDisc = PACDisc; |
6574 | uint16_t PACConstDiscC = 0; |
6575 | std::tie(args&: PACConstDiscC, args&: PACAddrDisc) = |
6576 | extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI); |
6577 | |
6578 | MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg}); |
6579 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
6580 | MIB.buildInstr(Opcode: AArch64::AUTPAC) |
6581 | .addImm(Val: AUTKey) |
6582 | .addImm(Val: AUTConstDiscC) |
6583 | .addUse(RegNo: AUTAddrDisc) |
6584 | .addImm(Val: PACKey) |
6585 | .addImm(Val: PACConstDiscC) |
6586 | .addUse(RegNo: PACAddrDisc) |
6587 | .constrainAllUses(TII, TRI, RBI); |
6588 | MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16)); |
6589 | |
6590 | RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI); |
6591 | I.eraseFromParent(); |
6592 | return true; |
6593 | } |
6594 | case Intrinsic::ptrauth_auth: { |
6595 | Register DstReg = I.getOperand(i: 0).getReg(); |
6596 | Register ValReg = I.getOperand(i: 2).getReg(); |
6597 | uint64_t AUTKey = I.getOperand(i: 3).getImm(); |
6598 | Register AUTDisc = I.getOperand(i: 4).getReg(); |
6599 | |
6600 | Register AUTAddrDisc = AUTDisc; |
6601 | uint16_t AUTConstDiscC = 0; |
6602 | std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) = |
6603 | extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI); |
6604 | |
6605 | MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg}); |
6606 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
6607 | MIB.buildInstr(Opcode: AArch64::AUT) |
6608 | .addImm(Val: AUTKey) |
6609 | .addImm(Val: AUTConstDiscC) |
6610 | .addUse(RegNo: AUTAddrDisc) |
6611 | .constrainAllUses(TII, TRI, RBI); |
6612 | MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16)); |
6613 | |
6614 | RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI); |
6615 | I.eraseFromParent(); |
6616 | return true; |
6617 | } |
6618 | case Intrinsic::frameaddress: |
6619 | case Intrinsic::returnaddress: { |
6620 | MachineFunction &MF = *I.getParent()->getParent(); |
6621 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6622 | |
6623 | unsigned Depth = I.getOperand(i: 2).getImm(); |
6624 | Register DstReg = I.getOperand(i: 0).getReg(); |
6625 | RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI); |
6626 | |
6627 | if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { |
6628 | if (!MFReturnAddr) { |
6629 | // Insert the copy from LR/X30 into the entry block, before it can be |
6630 | // clobbered by anything. |
6631 | MFI.setReturnAddressIsTaken(true); |
6632 | MFReturnAddr = getFunctionLiveInPhysReg( |
6633 | MF, TII, PhysReg: AArch64::LR, RC: AArch64::GPR64RegClass, DL: I.getDebugLoc()); |
6634 | } |
6635 | |
6636 | if (STI.hasPAuth()) { |
6637 | MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {MFReturnAddr}); |
6638 | } else { |
6639 | MIB.buildCopy(Res: {Register(AArch64::LR)}, Op: {MFReturnAddr}); |
6640 | MIB.buildInstr(Opcode: AArch64::XPACLRI); |
6641 | MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)}); |
6642 | } |
6643 | |
6644 | I.eraseFromParent(); |
6645 | return true; |
6646 | } |
6647 | |
6648 | MFI.setFrameAddressIsTaken(true); |
6649 | Register FrameAddr(AArch64::FP); |
6650 | while (Depth--) { |
6651 | Register NextFrame = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass); |
6652 | auto Ldr = |
6653 | MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {NextFrame}, SrcOps: {FrameAddr}).addImm(Val: 0); |
6654 | constrainSelectedInstRegOperands(I&: *Ldr, TII, TRI, RBI); |
6655 | FrameAddr = NextFrame; |
6656 | } |
6657 | |
6658 | if (IntrinID == Intrinsic::frameaddress) |
6659 | MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr}); |
6660 | else { |
6661 | MFI.setReturnAddressIsTaken(true); |
6662 | |
6663 | if (STI.hasPAuth()) { |
6664 | Register TmpReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
6665 | MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {TmpReg}, SrcOps: {FrameAddr}).addImm(Val: 1); |
6666 | MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {TmpReg}); |
6667 | } else { |
6668 | MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {Register(AArch64::LR)}, SrcOps: {FrameAddr}) |
6669 | .addImm(Val: 1); |
6670 | MIB.buildInstr(Opcode: AArch64::XPACLRI); |
6671 | MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)}); |
6672 | } |
6673 | } |
6674 | |
6675 | I.eraseFromParent(); |
6676 | return true; |
6677 | } |
6678 | case Intrinsic::aarch64_neon_tbl2: |
6679 | SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBLv8i8Two, Opc2: AArch64::TBLv16i8Two, isExt: false); |
6680 | return true; |
6681 | case Intrinsic::aarch64_neon_tbl3: |
6682 | SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBLv8i8Three, Opc2: AArch64::TBLv16i8Three, |
6683 | isExt: false); |
6684 | return true; |
6685 | case Intrinsic::aarch64_neon_tbl4: |
6686 | SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBLv8i8Four, Opc2: AArch64::TBLv16i8Four, isExt: false); |
6687 | return true; |
6688 | case Intrinsic::aarch64_neon_tbx2: |
6689 | SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBXv8i8Two, Opc2: AArch64::TBXv16i8Two, isExt: true); |
6690 | return true; |
6691 | case Intrinsic::aarch64_neon_tbx3: |
6692 | SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBXv8i8Three, Opc2: AArch64::TBXv16i8Three, isExt: true); |
6693 | return true; |
6694 | case Intrinsic::aarch64_neon_tbx4: |
6695 | SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBXv8i8Four, Opc2: AArch64::TBXv16i8Four, isExt: true); |
6696 | return true; |
6697 | case Intrinsic::swift_async_context_addr: |
6698 | auto Sub = MIB.buildInstr(Opc: AArch64::SUBXri, DstOps: {I.getOperand(i: 0).getReg()}, |
6699 | SrcOps: {Register(AArch64::FP)}) |
6700 | .addImm(Val: 8) |
6701 | .addImm(Val: 0); |
6702 | constrainSelectedInstRegOperands(I&: *Sub, TII, TRI, RBI); |
6703 | |
6704 | MF->getFrameInfo().setFrameAddressIsTaken(true); |
6705 | MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
6706 | I.eraseFromParent(); |
6707 | return true; |
6708 | } |
6709 | return false; |
6710 | } |
6711 | |
6712 | // G_PTRAUTH_GLOBAL_VALUE lowering |
6713 | // |
6714 | // We have 3 lowering alternatives to choose from: |
6715 | // - MOVaddrPAC: similar to MOVaddr, with added PAC. |
6716 | // If the GV doesn't need a GOT load (i.e., is locally defined) |
6717 | // materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC. |
6718 | // |
6719 | // - LOADgotPAC: similar to LOADgot, with added PAC. |
6720 | // If the GV needs a GOT load, materialize the pointer using the usual |
6721 | // GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT |
6722 | // section is assumed to be read-only (for example, via relro mechanism). See |
6723 | // LowerMOVaddrPAC. |
6724 | // |
6725 | // - LOADauthptrstatic: similar to LOADgot, but use a |
6726 | // special stub slot instead of a GOT slot. |
6727 | // Load a signed pointer for symbol 'sym' from a stub slot named |
6728 | // 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation |
6729 | // resolving. This usually lowers to adrp+ldr, but also emits an entry into |
6730 | // .data with an |
6731 | // @AUTH relocation. See LowerLOADauthptrstatic. |
6732 | // |
6733 | // All 3 are pseudos that are expand late to longer sequences: this lets us |
6734 | // provide integrity guarantees on the to-be-signed intermediate values. |
6735 | // |
6736 | // LOADauthptrstatic is undesirable because it requires a large section filled |
6737 | // with often similarly-signed pointers, making it a good harvesting target. |
6738 | // Thus, it's only used for ptrauth references to extern_weak to avoid null |
6739 | // checks. |
6740 | |
6741 | bool AArch64InstructionSelector::selectPtrAuthGlobalValue( |
6742 | MachineInstr &I, MachineRegisterInfo &MRI) const { |
6743 | Register DefReg = I.getOperand(i: 0).getReg(); |
6744 | Register Addr = I.getOperand(i: 1).getReg(); |
6745 | uint64_t Key = I.getOperand(i: 2).getImm(); |
6746 | Register AddrDisc = I.getOperand(i: 3).getReg(); |
6747 | uint64_t Disc = I.getOperand(i: 4).getImm(); |
6748 | int64_t Offset = 0; |
6749 | |
6750 | if (Key > AArch64PACKey::LAST) |
6751 | report_fatal_error(reason: "key in ptrauth global out of range [0, " + |
6752 | Twine((int)AArch64PACKey::LAST) + "]" ); |
6753 | |
6754 | // Blend only works if the integer discriminator is 16-bit wide. |
6755 | if (!isUInt<16>(x: Disc)) |
6756 | report_fatal_error( |
6757 | reason: "constant discriminator in ptrauth global out of range [0, 0xffff]" ); |
6758 | |
6759 | // Choosing between 3 lowering alternatives is target-specific. |
6760 | if (!STI.isTargetELF() && !STI.isTargetMachO()) |
6761 | report_fatal_error(reason: "ptrauth global lowering only supported on MachO/ELF" ); |
6762 | |
6763 | if (!MRI.hasOneDef(RegNo: Addr)) |
6764 | return false; |
6765 | |
6766 | // First match any offset we take from the real global. |
6767 | const MachineInstr *DefMI = &*MRI.def_instr_begin(RegNo: Addr); |
6768 | if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) { |
6769 | Register OffsetReg = DefMI->getOperand(i: 2).getReg(); |
6770 | if (!MRI.hasOneDef(RegNo: OffsetReg)) |
6771 | return false; |
6772 | const MachineInstr &OffsetMI = *MRI.def_instr_begin(RegNo: OffsetReg); |
6773 | if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT) |
6774 | return false; |
6775 | |
6776 | Addr = DefMI->getOperand(i: 1).getReg(); |
6777 | if (!MRI.hasOneDef(RegNo: Addr)) |
6778 | return false; |
6779 | |
6780 | DefMI = &*MRI.def_instr_begin(RegNo: Addr); |
6781 | Offset = OffsetMI.getOperand(i: 1).getCImm()->getSExtValue(); |
6782 | } |
6783 | |
6784 | // We should be left with a genuine unauthenticated GlobalValue. |
6785 | const GlobalValue *GV; |
6786 | if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { |
6787 | GV = DefMI->getOperand(i: 1).getGlobal(); |
6788 | Offset += DefMI->getOperand(i: 1).getOffset(); |
6789 | } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) { |
6790 | GV = DefMI->getOperand(i: 2).getGlobal(); |
6791 | Offset += DefMI->getOperand(i: 2).getOffset(); |
6792 | } else { |
6793 | return false; |
6794 | } |
6795 | |
6796 | MachineIRBuilder MIB(I); |
6797 | |
6798 | // Classify the reference to determine whether it needs a GOT load. |
6799 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); |
6800 | const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0); |
6801 | assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) && |
6802 | "unsupported non-GOT op flags on ptrauth global reference" ); |
6803 | assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) && |
6804 | "unsupported non-GOT reference to weak ptrauth global" ); |
6805 | |
6806 | std::optional<APInt> AddrDiscVal = getIConstantVRegVal(VReg: AddrDisc, MRI); |
6807 | bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0; |
6808 | |
6809 | // Non-extern_weak: |
6810 | // - No GOT load needed -> MOVaddrPAC |
6811 | // - GOT load for non-extern_weak -> LOADgotPAC |
6812 | // Note that we disallow extern_weak refs to avoid null checks later. |
6813 | if (!GV->hasExternalWeakLinkage()) { |
6814 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {}); |
6815 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
6816 | MIB.buildInstr(Opcode: NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC) |
6817 | .addGlobalAddress(GV, Offset) |
6818 | .addImm(Val: Key) |
6819 | .addReg(RegNo: HasAddrDisc ? AddrDisc : AArch64::XZR) |
6820 | .addImm(Val: Disc) |
6821 | .constrainAllUses(TII, TRI, RBI); |
6822 | MIB.buildCopy(Res: DefReg, Op: Register(AArch64::X16)); |
6823 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
6824 | I.eraseFromParent(); |
6825 | return true; |
6826 | } |
6827 | |
6828 | // extern_weak -> LOADauthptrstatic |
6829 | |
6830 | // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the |
6831 | // offset alone as a pointer if the symbol wasn't available, which would |
6832 | // probably break null checks in users. Ptrauth complicates things further: |
6833 | // error out. |
6834 | if (Offset != 0) |
6835 | report_fatal_error( |
6836 | reason: "unsupported non-zero offset in weak ptrauth global reference" ); |
6837 | |
6838 | if (HasAddrDisc) |
6839 | report_fatal_error(reason: "unsupported weak addr-div ptrauth global" ); |
6840 | |
6841 | MIB.buildInstr(Opc: AArch64::LOADauthptrstatic, DstOps: {DefReg}, SrcOps: {}) |
6842 | .addGlobalAddress(GV, Offset) |
6843 | .addImm(Val: Key) |
6844 | .addImm(Val: Disc); |
6845 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
6846 | |
6847 | I.eraseFromParent(); |
6848 | return true; |
6849 | } |
6850 | |
6851 | void AArch64InstructionSelector::SelectTable(MachineInstr &I, |
6852 | MachineRegisterInfo &MRI, |
6853 | unsigned NumVec, unsigned Opc1, |
6854 | unsigned Opc2, bool isExt) { |
6855 | Register DstReg = I.getOperand(i: 0).getReg(); |
6856 | unsigned Opc = MRI.getType(Reg: DstReg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8) ? Opc1 : Opc2; |
6857 | |
6858 | // Create the REG_SEQUENCE |
6859 | SmallVector<Register, 4> Regs; |
6860 | for (unsigned i = 0; i < NumVec; i++) |
6861 | Regs.push_back(Elt: I.getOperand(i: i + 2 + isExt).getReg()); |
6862 | Register RegSeq = createQTuple(Regs, MIB); |
6863 | |
6864 | Register IdxReg = I.getOperand(i: 2 + NumVec + isExt).getReg(); |
6865 | MachineInstrBuilder Instr; |
6866 | if (isExt) { |
6867 | Register Reg = I.getOperand(i: 2).getReg(); |
6868 | Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Reg, RegSeq, IdxReg}); |
6869 | } else |
6870 | Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {RegSeq, IdxReg}); |
6871 | constrainSelectedInstRegOperands(I&: *Instr, TII, TRI, RBI); |
6872 | I.eraseFromParent(); |
6873 | } |
6874 | |
6875 | InstructionSelector::ComplexRendererFns |
6876 | AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { |
6877 | auto MaybeImmed = getImmedFromMO(Root); |
6878 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
6879 | return std::nullopt; |
6880 | uint64_t Enc = (32 - *MaybeImmed) & 0x1f; |
6881 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6882 | } |
6883 | |
6884 | InstructionSelector::ComplexRendererFns |
6885 | AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { |
6886 | auto MaybeImmed = getImmedFromMO(Root); |
6887 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
6888 | return std::nullopt; |
6889 | uint64_t Enc = 31 - *MaybeImmed; |
6890 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6891 | } |
6892 | |
6893 | InstructionSelector::ComplexRendererFns |
6894 | AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { |
6895 | auto MaybeImmed = getImmedFromMO(Root); |
6896 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
6897 | return std::nullopt; |
6898 | uint64_t Enc = (64 - *MaybeImmed) & 0x3f; |
6899 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6900 | } |
6901 | |
6902 | InstructionSelector::ComplexRendererFns |
6903 | AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { |
6904 | auto MaybeImmed = getImmedFromMO(Root); |
6905 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
6906 | return std::nullopt; |
6907 | uint64_t Enc = 63 - *MaybeImmed; |
6908 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
6909 | } |
6910 | |
6911 | /// Helper to select an immediate value that can be represented as a 12-bit |
6912 | /// value shifted left by either 0 or 12. If it is possible to do so, return |
6913 | /// the immediate and shift value. If not, return std::nullopt. |
6914 | /// |
6915 | /// Used by selectArithImmed and selectNegArithImmed. |
6916 | InstructionSelector::ComplexRendererFns |
6917 | AArch64InstructionSelector::select12BitValueWithLeftShift( |
6918 | uint64_t Immed) const { |
6919 | unsigned ShiftAmt; |
6920 | if (Immed >> 12 == 0) { |
6921 | ShiftAmt = 0; |
6922 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { |
6923 | ShiftAmt = 12; |
6924 | Immed = Immed >> 12; |
6925 | } else |
6926 | return std::nullopt; |
6927 | |
6928 | unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt); |
6929 | return {{ |
6930 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); }, |
6931 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); }, |
6932 | }}; |
6933 | } |
6934 | |
6935 | /// SelectArithImmed - Select an immediate value that can be represented as |
6936 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with |
6937 | /// Val set to the 12-bit value and Shift set to the shifter operand. |
6938 | InstructionSelector::ComplexRendererFns |
6939 | AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { |
6940 | // This function is called from the addsub_shifted_imm ComplexPattern, |
6941 | // which lists [imm] as the list of opcode it's interested in, however |
6942 | // we still need to check whether the operand is actually an immediate |
6943 | // here because the ComplexPattern opcode list is only used in |
6944 | // root-level opcode matching. |
6945 | auto MaybeImmed = getImmedFromMO(Root); |
6946 | if (MaybeImmed == std::nullopt) |
6947 | return std::nullopt; |
6948 | return select12BitValueWithLeftShift(Immed: *MaybeImmed); |
6949 | } |
6950 | |
6951 | /// SelectNegArithImmed - As above, but negates the value before trying to |
6952 | /// select it. |
6953 | InstructionSelector::ComplexRendererFns |
6954 | AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { |
6955 | // We need a register here, because we need to know if we have a 64 or 32 |
6956 | // bit immediate. |
6957 | if (!Root.isReg()) |
6958 | return std::nullopt; |
6959 | auto MaybeImmed = getImmedFromMO(Root); |
6960 | if (MaybeImmed == std::nullopt) |
6961 | return std::nullopt; |
6962 | uint64_t Immed = *MaybeImmed; |
6963 | |
6964 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" |
6965 | // have the opposite effect on the C flag, so this pattern mustn't match under |
6966 | // those circumstances. |
6967 | if (Immed == 0) |
6968 | return std::nullopt; |
6969 | |
6970 | // Check if we're dealing with a 32-bit type on the root or a 64-bit type on |
6971 | // the root. |
6972 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
6973 | if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == 32) |
6974 | Immed = ~((uint32_t)Immed) + 1; |
6975 | else |
6976 | Immed = ~Immed + 1ULL; |
6977 | |
6978 | if (Immed & 0xFFFFFFFFFF000000ULL) |
6979 | return std::nullopt; |
6980 | |
6981 | Immed &= 0xFFFFFFULL; |
6982 | return select12BitValueWithLeftShift(Immed); |
6983 | } |
6984 | |
6985 | /// Checks if we are sure that folding MI into load/store addressing mode is |
6986 | /// beneficial or not. |
6987 | /// |
6988 | /// Returns: |
6989 | /// - true if folding MI would be beneficial. |
6990 | /// - false if folding MI would be bad. |
6991 | /// - std::nullopt if it is not sure whether folding MI is beneficial. |
6992 | /// |
6993 | /// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example: |
6994 | /// |
6995 | /// %13:gpr(s64) = G_CONSTANT i64 1 |
6996 | /// %8:gpr(s64) = G_SHL %6, %13(s64) |
6997 | /// %9:gpr(p0) = G_PTR_ADD %0, %8(s64) |
6998 | /// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16)) |
6999 | std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode( |
7000 | MachineInstr &MI, const MachineRegisterInfo &MRI) const { |
7001 | if (MI.getOpcode() == AArch64::G_SHL) { |
7002 | // Address operands with shifts are free, except for running on subtargets |
7003 | // with AddrLSLSlow14. |
7004 | if (const auto ValAndVeg = getIConstantVRegValWithLookThrough( |
7005 | VReg: MI.getOperand(i: 2).getReg(), MRI)) { |
7006 | const APInt ShiftVal = ValAndVeg->Value; |
7007 | |
7008 | // Don't fold if we know this will be slow. |
7009 | return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4)); |
7010 | } |
7011 | } |
7012 | return std::nullopt; |
7013 | } |
7014 | |
7015 | /// Return true if it is worth folding MI into an extended register. That is, |
7016 | /// if it's safe to pull it into the addressing mode of a load or store as a |
7017 | /// shift. |
7018 | /// \p IsAddrOperand whether the def of MI is used as an address operand |
7019 | /// (e.g. feeding into an LDR/STR). |
7020 | bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( |
7021 | MachineInstr &MI, const MachineRegisterInfo &MRI, |
7022 | bool IsAddrOperand) const { |
7023 | |
7024 | // Always fold if there is one use, or if we're optimizing for size. |
7025 | Register DefReg = MI.getOperand(i: 0).getReg(); |
7026 | if (MRI.hasOneNonDBGUse(RegNo: DefReg) || |
7027 | MI.getParent()->getParent()->getFunction().hasOptSize()) |
7028 | return true; |
7029 | |
7030 | if (IsAddrOperand) { |
7031 | // If we are already sure that folding MI is good or bad, return the result. |
7032 | if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI)) |
7033 | return *Worth; |
7034 | |
7035 | // Fold G_PTR_ADD if its offset operand can be folded |
7036 | if (MI.getOpcode() == AArch64::G_PTR_ADD) { |
7037 | MachineInstr *OffsetInst = |
7038 | getDefIgnoringCopies(Reg: MI.getOperand(i: 2).getReg(), MRI); |
7039 | |
7040 | // Note, we already know G_PTR_ADD is used by at least two instructions. |
7041 | // If we are also sure about whether folding is beneficial or not, |
7042 | // return the result. |
7043 | if (const auto Worth = isWorthFoldingIntoAddrMode(MI&: *OffsetInst, MRI)) |
7044 | return *Worth; |
7045 | } |
7046 | } |
7047 | |
7048 | // FIXME: Consider checking HasALULSLFast as appropriate. |
7049 | |
7050 | // We have a fastpath, so folding a shift in and potentially computing it |
7051 | // many times may be beneficial. Check if this is only used in memory ops. |
7052 | // If it is, then we should fold. |
7053 | return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg), |
7054 | P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); |
7055 | } |
7056 | |
7057 | static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { |
7058 | switch (Type) { |
7059 | case AArch64_AM::SXTB: |
7060 | case AArch64_AM::SXTH: |
7061 | case AArch64_AM::SXTW: |
7062 | return true; |
7063 | default: |
7064 | return false; |
7065 | } |
7066 | } |
7067 | |
7068 | InstructionSelector::ComplexRendererFns |
7069 | AArch64InstructionSelector::selectExtendedSHL( |
7070 | MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, |
7071 | unsigned SizeInBytes, bool WantsExt) const { |
7072 | assert(Base.isReg() && "Expected base to be a register operand" ); |
7073 | assert(Offset.isReg() && "Expected offset to be a register operand" ); |
7074 | |
7075 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7076 | MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg()); |
7077 | |
7078 | unsigned OffsetOpc = OffsetInst->getOpcode(); |
7079 | bool LookedThroughZExt = false; |
7080 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { |
7081 | // Try to look through a ZEXT. |
7082 | if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) |
7083 | return std::nullopt; |
7084 | |
7085 | OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: 1).getReg()); |
7086 | OffsetOpc = OffsetInst->getOpcode(); |
7087 | LookedThroughZExt = true; |
7088 | |
7089 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) |
7090 | return std::nullopt; |
7091 | } |
7092 | // Make sure that the memory op is a valid size. |
7093 | int64_t LegalShiftVal = Log2_32(Value: SizeInBytes); |
7094 | if (LegalShiftVal == 0) |
7095 | return std::nullopt; |
7096 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI, IsAddrOperand: true)) |
7097 | return std::nullopt; |
7098 | |
7099 | // Now, try to find the specific G_CONSTANT. Start by assuming that the |
7100 | // register we will offset is the LHS, and the register containing the |
7101 | // constant is the RHS. |
7102 | Register OffsetReg = OffsetInst->getOperand(i: 1).getReg(); |
7103 | Register ConstantReg = OffsetInst->getOperand(i: 2).getReg(); |
7104 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
7105 | if (!ValAndVReg) { |
7106 | // We didn't get a constant on the RHS. If the opcode is a shift, then |
7107 | // we're done. |
7108 | if (OffsetOpc == TargetOpcode::G_SHL) |
7109 | return std::nullopt; |
7110 | |
7111 | // If we have a G_MUL, we can use either register. Try looking at the RHS. |
7112 | std::swap(a&: OffsetReg, b&: ConstantReg); |
7113 | ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
7114 | if (!ValAndVReg) |
7115 | return std::nullopt; |
7116 | } |
7117 | |
7118 | // The value must fit into 3 bits, and must be positive. Make sure that is |
7119 | // true. |
7120 | int64_t ImmVal = ValAndVReg->Value.getSExtValue(); |
7121 | |
7122 | // Since we're going to pull this into a shift, the constant value must be |
7123 | // a power of 2. If we got a multiply, then we need to check this. |
7124 | if (OffsetOpc == TargetOpcode::G_MUL) { |
7125 | if (!llvm::has_single_bit<uint32_t>(Value: ImmVal)) |
7126 | return std::nullopt; |
7127 | |
7128 | // Got a power of 2. So, the amount we'll shift is the log base-2 of that. |
7129 | ImmVal = Log2_32(Value: ImmVal); |
7130 | } |
7131 | |
7132 | if ((ImmVal & 0x7) != ImmVal) |
7133 | return std::nullopt; |
7134 | |
7135 | // We are only allowed to shift by LegalShiftVal. This shift value is built |
7136 | // into the instruction, so we can't just use whatever we want. |
7137 | if (ImmVal != LegalShiftVal) |
7138 | return std::nullopt; |
7139 | |
7140 | unsigned SignExtend = 0; |
7141 | if (WantsExt) { |
7142 | // Check if the offset is defined by an extend, unless we looked through a |
7143 | // G_ZEXT earlier. |
7144 | if (!LookedThroughZExt) { |
7145 | MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI); |
7146 | auto Ext = getExtendTypeForInst(MI&: *ExtInst, MRI, IsLoadStore: true); |
7147 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7148 | return std::nullopt; |
7149 | |
7150 | SignExtend = isSignExtendShiftType(Type: Ext) ? 1 : 0; |
7151 | // We only support SXTW for signed extension here. |
7152 | if (SignExtend && Ext != AArch64_AM::SXTW) |
7153 | return std::nullopt; |
7154 | OffsetReg = ExtInst->getOperand(i: 1).getReg(); |
7155 | } |
7156 | |
7157 | // Need a 32-bit wide register here. |
7158 | MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg())); |
7159 | OffsetReg = moveScalarRegClass(Reg: OffsetReg, RC: AArch64::GPR32RegClass, MIB); |
7160 | } |
7161 | |
7162 | // We can use the LHS of the GEP as the base, and the LHS of the shift as an |
7163 | // offset. Signify that we are shifting by setting the shift flag to 1. |
7164 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); }, |
7165 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); }, |
7166 | [=](MachineInstrBuilder &MIB) { |
7167 | // Need to add both immediates here to make sure that they are both |
7168 | // added to the instruction. |
7169 | MIB.addImm(Val: SignExtend); |
7170 | MIB.addImm(Val: 1); |
7171 | }}}; |
7172 | } |
7173 | |
7174 | /// This is used for computing addresses like this: |
7175 | /// |
7176 | /// ldr x1, [x2, x3, lsl #3] |
7177 | /// |
7178 | /// Where x2 is the base register, and x3 is an offset register. The shift-left |
7179 | /// is a constant value specific to this load instruction. That is, we'll never |
7180 | /// see anything other than a 3 here (which corresponds to the size of the |
7181 | /// element being loaded.) |
7182 | InstructionSelector::ComplexRendererFns |
7183 | AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( |
7184 | MachineOperand &Root, unsigned SizeInBytes) const { |
7185 | if (!Root.isReg()) |
7186 | return std::nullopt; |
7187 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7188 | |
7189 | // We want to find something like this: |
7190 | // |
7191 | // val = G_CONSTANT LegalShiftVal |
7192 | // shift = G_SHL off_reg val |
7193 | // ptr = G_PTR_ADD base_reg shift |
7194 | // x = G_LOAD ptr |
7195 | // |
7196 | // And fold it into this addressing mode: |
7197 | // |
7198 | // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] |
7199 | |
7200 | // Check if we can find the G_PTR_ADD. |
7201 | MachineInstr *PtrAdd = |
7202 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7203 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI, IsAddrOperand: true)) |
7204 | return std::nullopt; |
7205 | |
7206 | // Now, try to match an opcode which will match our specific offset. |
7207 | // We want a G_SHL or a G_MUL. |
7208 | MachineInstr *OffsetInst = |
7209 | getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7210 | return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: 1), |
7211 | Offset&: OffsetInst->getOperand(i: 0), SizeInBytes, |
7212 | /*WantsExt=*/false); |
7213 | } |
7214 | |
7215 | /// This is used for computing addresses like this: |
7216 | /// |
7217 | /// ldr x1, [x2, x3] |
7218 | /// |
7219 | /// Where x2 is the base register, and x3 is an offset register. |
7220 | /// |
7221 | /// When possible (or profitable) to fold a G_PTR_ADD into the address |
7222 | /// calculation, this will do so. Otherwise, it will return std::nullopt. |
7223 | InstructionSelector::ComplexRendererFns |
7224 | AArch64InstructionSelector::selectAddrModeRegisterOffset( |
7225 | MachineOperand &Root) const { |
7226 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7227 | |
7228 | // We need a GEP. |
7229 | MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg()); |
7230 | if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) |
7231 | return std::nullopt; |
7232 | |
7233 | // If this is used more than once, let's not bother folding. |
7234 | // TODO: Check if they are memory ops. If they are, then we can still fold |
7235 | // without having to recompute anything. |
7236 | if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: 0).getReg())) |
7237 | return std::nullopt; |
7238 | |
7239 | // Base is the GEP's LHS, offset is its RHS. |
7240 | return {{[=](MachineInstrBuilder &MIB) { |
7241 | MIB.addUse(RegNo: Gep->getOperand(i: 1).getReg()); |
7242 | }, |
7243 | [=](MachineInstrBuilder &MIB) { |
7244 | MIB.addUse(RegNo: Gep->getOperand(i: 2).getReg()); |
7245 | }, |
7246 | [=](MachineInstrBuilder &MIB) { |
7247 | // Need to add both immediates here to make sure that they are both |
7248 | // added to the instruction. |
7249 | MIB.addImm(Val: 0); |
7250 | MIB.addImm(Val: 0); |
7251 | }}}; |
7252 | } |
7253 | |
7254 | /// This is intended to be equivalent to selectAddrModeXRO in |
7255 | /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. |
7256 | InstructionSelector::ComplexRendererFns |
7257 | AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, |
7258 | unsigned SizeInBytes) const { |
7259 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7260 | if (!Root.isReg()) |
7261 | return std::nullopt; |
7262 | MachineInstr *PtrAdd = |
7263 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7264 | if (!PtrAdd) |
7265 | return std::nullopt; |
7266 | |
7267 | // Check for an immediates which cannot be encoded in the [base + imm] |
7268 | // addressing mode, and can't be encoded in an add/sub. If this happens, we'll |
7269 | // end up with code like: |
7270 | // |
7271 | // mov x0, wide |
7272 | // add x1 base, x0 |
7273 | // ldr x2, [x1, x0] |
7274 | // |
7275 | // In this situation, we can use the [base, xreg] addressing mode to save an |
7276 | // add/sub: |
7277 | // |
7278 | // mov x0, wide |
7279 | // ldr x2, [base, x0] |
7280 | auto ValAndVReg = |
7281 | getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7282 | if (ValAndVReg) { |
7283 | unsigned Scale = Log2_32(Value: SizeInBytes); |
7284 | int64_t ImmOff = ValAndVReg->Value.getSExtValue(); |
7285 | |
7286 | // Skip immediates that can be selected in the load/store addresing |
7287 | // mode. |
7288 | if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && |
7289 | ImmOff < (0x1000 << Scale)) |
7290 | return std::nullopt; |
7291 | |
7292 | // Helper lambda to decide whether or not it is preferable to emit an add. |
7293 | auto isPreferredADD = [](int64_t ImmOff) { |
7294 | // Constants in [0x0, 0xfff] can be encoded in an add. |
7295 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) |
7296 | return true; |
7297 | |
7298 | // Can it be encoded in an add lsl #12? |
7299 | if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) |
7300 | return false; |
7301 | |
7302 | // It can be encoded in an add lsl #12, but we may not want to. If it is |
7303 | // possible to select this as a single movz, then prefer that. A single |
7304 | // movz is faster than an add with a shift. |
7305 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && |
7306 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; |
7307 | }; |
7308 | |
7309 | // If the immediate can be encoded in a single add/sub, then bail out. |
7310 | if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) |
7311 | return std::nullopt; |
7312 | } |
7313 | |
7314 | // Try to fold shifts into the addressing mode. |
7315 | auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); |
7316 | if (AddrModeFns) |
7317 | return AddrModeFns; |
7318 | |
7319 | // If that doesn't work, see if it's possible to fold in registers from |
7320 | // a GEP. |
7321 | return selectAddrModeRegisterOffset(Root); |
7322 | } |
7323 | |
7324 | /// This is used for computing addresses like this: |
7325 | /// |
7326 | /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] |
7327 | /// |
7328 | /// Where we have a 64-bit base register, a 32-bit offset register, and an |
7329 | /// extend (which may or may not be signed). |
7330 | InstructionSelector::ComplexRendererFns |
7331 | AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, |
7332 | unsigned SizeInBytes) const { |
7333 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7334 | |
7335 | MachineInstr *PtrAdd = |
7336 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7337 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI, IsAddrOperand: true)) |
7338 | return std::nullopt; |
7339 | |
7340 | MachineOperand &LHS = PtrAdd->getOperand(i: 1); |
7341 | MachineOperand &RHS = PtrAdd->getOperand(i: 2); |
7342 | MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
7343 | |
7344 | // The first case is the same as selectAddrModeXRO, except we need an extend. |
7345 | // In this case, we try to find a shift and extend, and fold them into the |
7346 | // addressing mode. |
7347 | // |
7348 | // E.g. |
7349 | // |
7350 | // off_reg = G_Z/S/ANYEXT ext_reg |
7351 | // val = G_CONSTANT LegalShiftVal |
7352 | // shift = G_SHL off_reg val |
7353 | // ptr = G_PTR_ADD base_reg shift |
7354 | // x = G_LOAD ptr |
7355 | // |
7356 | // In this case we can get a load like this: |
7357 | // |
7358 | // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] |
7359 | auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: 0), |
7360 | SizeInBytes, /*WantsExt=*/true); |
7361 | if (ExtendedShl) |
7362 | return ExtendedShl; |
7363 | |
7364 | // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. |
7365 | // |
7366 | // e.g. |
7367 | // ldr something, [base_reg, ext_reg, sxtw] |
7368 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI, IsAddrOperand: true)) |
7369 | return std::nullopt; |
7370 | |
7371 | // Check if this is an extend. We'll get an extend type if it is. |
7372 | AArch64_AM::ShiftExtendType Ext = |
7373 | getExtendTypeForInst(MI&: *OffsetInst, MRI, /*IsLoadStore=*/true); |
7374 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7375 | return std::nullopt; |
7376 | |
7377 | // Need a 32-bit wide register. |
7378 | MachineIRBuilder MIB(*PtrAdd); |
7379 | Register ExtReg = moveScalarRegClass(Reg: OffsetInst->getOperand(i: 1).getReg(), |
7380 | RC: AArch64::GPR32RegClass, MIB); |
7381 | unsigned SignExtend = Ext == AArch64_AM::SXTW; |
7382 | |
7383 | // Base is LHS, offset is ExtReg. |
7384 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); }, |
7385 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7386 | [=](MachineInstrBuilder &MIB) { |
7387 | MIB.addImm(Val: SignExtend); |
7388 | MIB.addImm(Val: 0); |
7389 | }}}; |
7390 | } |
7391 | |
7392 | /// Select a "register plus unscaled signed 9-bit immediate" address. This |
7393 | /// should only match when there is an offset that is not valid for a scaled |
7394 | /// immediate addressing mode. The "Size" argument is the size in bytes of the |
7395 | /// memory reference, which is needed here to know what is valid for a scaled |
7396 | /// immediate. |
7397 | InstructionSelector::ComplexRendererFns |
7398 | AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, |
7399 | unsigned Size) const { |
7400 | MachineRegisterInfo &MRI = |
7401 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7402 | |
7403 | if (!Root.isReg()) |
7404 | return std::nullopt; |
7405 | |
7406 | if (!isBaseWithConstantOffset(Root, MRI)) |
7407 | return std::nullopt; |
7408 | |
7409 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7410 | |
7411 | MachineOperand &OffImm = RootDef->getOperand(i: 2); |
7412 | if (!OffImm.isReg()) |
7413 | return std::nullopt; |
7414 | MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg()); |
7415 | if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) |
7416 | return std::nullopt; |
7417 | int64_t RHSC; |
7418 | MachineOperand &RHSOp1 = RHS->getOperand(i: 1); |
7419 | if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) |
7420 | return std::nullopt; |
7421 | RHSC = RHSOp1.getCImm()->getSExtValue(); |
7422 | |
7423 | if (RHSC >= -256 && RHSC < 256) { |
7424 | MachineOperand &Base = RootDef->getOperand(i: 1); |
7425 | return {{ |
7426 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); }, |
7427 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); }, |
7428 | }}; |
7429 | } |
7430 | return std::nullopt; |
7431 | } |
7432 | |
7433 | InstructionSelector::ComplexRendererFns |
7434 | AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, |
7435 | unsigned Size, |
7436 | MachineRegisterInfo &MRI) const { |
7437 | if (RootDef.getOpcode() != AArch64::G_ADD_LOW) |
7438 | return std::nullopt; |
7439 | MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: 1).getReg()); |
7440 | if (Adrp.getOpcode() != AArch64::ADRP) |
7441 | return std::nullopt; |
7442 | |
7443 | // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. |
7444 | auto Offset = Adrp.getOperand(i: 1).getOffset(); |
7445 | if (Offset % Size != 0) |
7446 | return std::nullopt; |
7447 | |
7448 | auto GV = Adrp.getOperand(i: 1).getGlobal(); |
7449 | if (GV->isThreadLocal()) |
7450 | return std::nullopt; |
7451 | |
7452 | auto &MF = *RootDef.getParent()->getParent(); |
7453 | if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size) |
7454 | return std::nullopt; |
7455 | |
7456 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget()); |
7457 | MachineIRBuilder MIRBuilder(RootDef); |
7458 | Register AdrpReg = Adrp.getOperand(i: 0).getReg(); |
7459 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); }, |
7460 | [=](MachineInstrBuilder &MIB) { |
7461 | MIB.addGlobalAddress(GV, Offset, |
7462 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | |
7463 | AArch64II::MO_NC); |
7464 | }}}; |
7465 | } |
7466 | |
7467 | /// Select a "register plus scaled unsigned 12-bit immediate" address. The |
7468 | /// "Size" argument is the size in bytes of the memory reference, which |
7469 | /// determines the scale. |
7470 | InstructionSelector::ComplexRendererFns |
7471 | AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, |
7472 | unsigned Size) const { |
7473 | MachineFunction &MF = *Root.getParent()->getParent()->getParent(); |
7474 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7475 | |
7476 | if (!Root.isReg()) |
7477 | return std::nullopt; |
7478 | |
7479 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7480 | if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { |
7481 | return {{ |
7482 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: 1)); }, |
7483 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7484 | }}; |
7485 | } |
7486 | |
7487 | CodeModel::Model CM = MF.getTarget().getCodeModel(); |
7488 | // Check if we can fold in the ADD of small code model ADRP + ADD address. |
7489 | if (CM == CodeModel::Small) { |
7490 | auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI); |
7491 | if (OpFns) |
7492 | return OpFns; |
7493 | } |
7494 | |
7495 | if (isBaseWithConstantOffset(Root, MRI)) { |
7496 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7497 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7498 | MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg()); |
7499 | MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg()); |
7500 | |
7501 | int64_t RHSC = (int64_t)RHSDef->getOperand(i: 1).getCImm()->getZExtValue(); |
7502 | unsigned Scale = Log2_32(Value: Size); |
7503 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { |
7504 | if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) |
7505 | return {{ |
7506 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: 1)); }, |
7507 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7508 | }}; |
7509 | |
7510 | return {{ |
7511 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); }, |
7512 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7513 | }}; |
7514 | } |
7515 | } |
7516 | |
7517 | // Before falling back to our general case, check if the unscaled |
7518 | // instructions can handle this. If so, that's preferable. |
7519 | if (selectAddrModeUnscaled(Root, Size)) |
7520 | return std::nullopt; |
7521 | |
7522 | return {{ |
7523 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); }, |
7524 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7525 | }}; |
7526 | } |
7527 | |
7528 | /// Given a shift instruction, return the correct shift type for that |
7529 | /// instruction. |
7530 | static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { |
7531 | switch (MI.getOpcode()) { |
7532 | default: |
7533 | return AArch64_AM::InvalidShiftExtend; |
7534 | case TargetOpcode::G_SHL: |
7535 | return AArch64_AM::LSL; |
7536 | case TargetOpcode::G_LSHR: |
7537 | return AArch64_AM::LSR; |
7538 | case TargetOpcode::G_ASHR: |
7539 | return AArch64_AM::ASR; |
7540 | case TargetOpcode::G_ROTR: |
7541 | return AArch64_AM::ROR; |
7542 | } |
7543 | } |
7544 | |
7545 | /// Select a "shifted register" operand. If the value is not shifted, set the |
7546 | /// shift operand to a default value of "lsl 0". |
7547 | InstructionSelector::ComplexRendererFns |
7548 | AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, |
7549 | bool AllowROR) const { |
7550 | if (!Root.isReg()) |
7551 | return std::nullopt; |
7552 | MachineRegisterInfo &MRI = |
7553 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7554 | |
7555 | // Check if the operand is defined by an instruction which corresponds to |
7556 | // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. |
7557 | MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg()); |
7558 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst); |
7559 | if (ShType == AArch64_AM::InvalidShiftExtend) |
7560 | return std::nullopt; |
7561 | if (ShType == AArch64_AM::ROR && !AllowROR) |
7562 | return std::nullopt; |
7563 | if (!isWorthFoldingIntoExtendedReg(MI&: *ShiftInst, MRI, IsAddrOperand: false)) |
7564 | return std::nullopt; |
7565 | |
7566 | // Need an immediate on the RHS. |
7567 | MachineOperand &ShiftRHS = ShiftInst->getOperand(i: 2); |
7568 | auto Immed = getImmedFromMO(Root: ShiftRHS); |
7569 | if (!Immed) |
7570 | return std::nullopt; |
7571 | |
7572 | // We have something that we can fold. Fold in the shift's LHS and RHS into |
7573 | // the instruction. |
7574 | MachineOperand &ShiftLHS = ShiftInst->getOperand(i: 1); |
7575 | Register ShiftReg = ShiftLHS.getReg(); |
7576 | |
7577 | unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits(); |
7578 | unsigned Val = *Immed & (NumBits - 1); |
7579 | unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val); |
7580 | |
7581 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); }, |
7582 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}}; |
7583 | } |
7584 | |
7585 | AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( |
7586 | MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { |
7587 | unsigned Opc = MI.getOpcode(); |
7588 | |
7589 | // Handle explicit extend instructions first. |
7590 | if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { |
7591 | unsigned Size; |
7592 | if (Opc == TargetOpcode::G_SEXT) |
7593 | Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7594 | else |
7595 | Size = MI.getOperand(i: 2).getImm(); |
7596 | assert(Size != 64 && "Extend from 64 bits?" ); |
7597 | switch (Size) { |
7598 | case 8: |
7599 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; |
7600 | case 16: |
7601 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; |
7602 | case 32: |
7603 | return AArch64_AM::SXTW; |
7604 | default: |
7605 | return AArch64_AM::InvalidShiftExtend; |
7606 | } |
7607 | } |
7608 | |
7609 | if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { |
7610 | unsigned Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7611 | assert(Size != 64 && "Extend from 64 bits?" ); |
7612 | switch (Size) { |
7613 | case 8: |
7614 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; |
7615 | case 16: |
7616 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; |
7617 | case 32: |
7618 | return AArch64_AM::UXTW; |
7619 | default: |
7620 | return AArch64_AM::InvalidShiftExtend; |
7621 | } |
7622 | } |
7623 | |
7624 | // Don't have an explicit extend. Try to handle a G_AND with a constant mask |
7625 | // on the RHS. |
7626 | if (Opc != TargetOpcode::G_AND) |
7627 | return AArch64_AM::InvalidShiftExtend; |
7628 | |
7629 | std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: 2)); |
7630 | if (!MaybeAndMask) |
7631 | return AArch64_AM::InvalidShiftExtend; |
7632 | uint64_t AndMask = *MaybeAndMask; |
7633 | switch (AndMask) { |
7634 | default: |
7635 | return AArch64_AM::InvalidShiftExtend; |
7636 | case 0xFF: |
7637 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; |
7638 | case 0xFFFF: |
7639 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; |
7640 | case 0xFFFFFFFF: |
7641 | return AArch64_AM::UXTW; |
7642 | } |
7643 | } |
7644 | |
7645 | Register AArch64InstructionSelector::moveScalarRegClass( |
7646 | Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { |
7647 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
7648 | auto Ty = MRI.getType(Reg); |
7649 | assert(!Ty.isVector() && "Expected scalars only!" ); |
7650 | if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) |
7651 | return Reg; |
7652 | |
7653 | // Create a copy and immediately select it. |
7654 | // FIXME: We should have an emitCopy function? |
7655 | auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg}); |
7656 | selectCopy(I&: *Copy, TII, MRI, TRI, RBI); |
7657 | return Copy.getReg(Idx: 0); |
7658 | } |
7659 | |
7660 | /// Select an "extended register" operand. This operand folds in an extend |
7661 | /// followed by an optional left shift. |
7662 | InstructionSelector::ComplexRendererFns |
7663 | AArch64InstructionSelector::selectArithExtendedRegister( |
7664 | MachineOperand &Root) const { |
7665 | if (!Root.isReg()) |
7666 | return std::nullopt; |
7667 | MachineRegisterInfo &MRI = |
7668 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7669 | |
7670 | uint64_t ShiftVal = 0; |
7671 | Register ExtReg; |
7672 | AArch64_AM::ShiftExtendType Ext; |
7673 | MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI); |
7674 | if (!RootDef) |
7675 | return std::nullopt; |
7676 | |
7677 | if (!isWorthFoldingIntoExtendedReg(MI&: *RootDef, MRI, IsAddrOperand: false)) |
7678 | return std::nullopt; |
7679 | |
7680 | // Check if we can fold a shift and an extend. |
7681 | if (RootDef->getOpcode() == TargetOpcode::G_SHL) { |
7682 | // Look for a constant on the RHS of the shift. |
7683 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7684 | std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS); |
7685 | if (!MaybeShiftVal) |
7686 | return std::nullopt; |
7687 | ShiftVal = *MaybeShiftVal; |
7688 | if (ShiftVal > 4) |
7689 | return std::nullopt; |
7690 | // Look for a valid extend instruction on the LHS of the shift. |
7691 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7692 | MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
7693 | if (!ExtDef) |
7694 | return std::nullopt; |
7695 | Ext = getExtendTypeForInst(MI&: *ExtDef, MRI); |
7696 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7697 | return std::nullopt; |
7698 | ExtReg = ExtDef->getOperand(i: 1).getReg(); |
7699 | } else { |
7700 | // Didn't get a shift. Try just folding an extend. |
7701 | Ext = getExtendTypeForInst(MI&: *RootDef, MRI); |
7702 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7703 | return std::nullopt; |
7704 | ExtReg = RootDef->getOperand(i: 1).getReg(); |
7705 | |
7706 | // If we have a 32 bit instruction which zeroes out the high half of a |
7707 | // register, we get an implicit zero extend for free. Check if we have one. |
7708 | // FIXME: We actually emit the extend right now even though we don't have |
7709 | // to. |
7710 | if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == 32) { |
7711 | MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg); |
7712 | if (isDef32(MI: *ExtInst)) |
7713 | return std::nullopt; |
7714 | } |
7715 | } |
7716 | |
7717 | // We require a GPR32 here. Narrow the ExtReg if needed using a subregister |
7718 | // copy. |
7719 | MachineIRBuilder MIB(*RootDef); |
7720 | ExtReg = moveScalarRegClass(Reg: ExtReg, RC: AArch64::GPR32RegClass, MIB); |
7721 | |
7722 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7723 | [=](MachineInstrBuilder &MIB) { |
7724 | MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal)); |
7725 | }}}; |
7726 | } |
7727 | |
7728 | InstructionSelector::ComplexRendererFns |
7729 | AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const { |
7730 | if (!Root.isReg()) |
7731 | return std::nullopt; |
7732 | MachineRegisterInfo &MRI = |
7733 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7734 | |
7735 | auto = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI); |
7736 | while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST && |
7737 | STI.isLittleEndian()) |
7738 | Extract = |
7739 | getDefSrcRegIgnoringCopies(Reg: Extract->MI->getOperand(i: 1).getReg(), MRI); |
7740 | if (!Extract) |
7741 | return std::nullopt; |
7742 | |
7743 | if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { |
7744 | if (Extract->Reg == Extract->MI->getOperand(i: 1).getReg()) { |
7745 | Register ExtReg = Extract->MI->getOperand(i: 2).getReg(); |
7746 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7747 | } |
7748 | } |
7749 | if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) { |
7750 | LLT SrcTy = MRI.getType(Reg: Extract->MI->getOperand(i: 1).getReg()); |
7751 | auto LaneIdx = getIConstantVRegValWithLookThrough( |
7752 | VReg: Extract->MI->getOperand(i: 2).getReg(), MRI); |
7753 | if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) && |
7754 | LaneIdx->Value.getSExtValue() == 1) { |
7755 | Register ExtReg = Extract->MI->getOperand(i: 1).getReg(); |
7756 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7757 | } |
7758 | } |
7759 | |
7760 | return std::nullopt; |
7761 | } |
7762 | |
7763 | void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, |
7764 | const MachineInstr &MI, |
7765 | int OpIdx) const { |
7766 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7767 | assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7768 | "Expected G_CONSTANT" ); |
7769 | std::optional<int64_t> CstVal = |
7770 | getIConstantVRegSExtVal(VReg: MI.getOperand(i: 0).getReg(), MRI); |
7771 | assert(CstVal && "Expected constant value" ); |
7772 | MIB.addImm(Val: *CstVal); |
7773 | } |
7774 | |
7775 | void AArch64InstructionSelector::renderLogicalImm32( |
7776 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7777 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7778 | "Expected G_CONSTANT" ); |
7779 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7780 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 32); |
7781 | MIB.addImm(Val: Enc); |
7782 | } |
7783 | |
7784 | void AArch64InstructionSelector::renderLogicalImm64( |
7785 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7786 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7787 | "Expected G_CONSTANT" ); |
7788 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7789 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 64); |
7790 | MIB.addImm(Val: Enc); |
7791 | } |
7792 | |
7793 | void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB, |
7794 | const MachineInstr &MI, |
7795 | int OpIdx) const { |
7796 | assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 && |
7797 | "Expected G_UBSANTRAP" ); |
7798 | MIB.addImm(Val: MI.getOperand(i: 0).getImm() | ('U' << 8)); |
7799 | } |
7800 | |
7801 | void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, |
7802 | const MachineInstr &MI, |
7803 | int OpIdx) const { |
7804 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7805 | "Expected G_FCONSTANT" ); |
7806 | MIB.addImm( |
7807 | Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7808 | } |
7809 | |
7810 | void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, |
7811 | const MachineInstr &MI, |
7812 | int OpIdx) const { |
7813 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7814 | "Expected G_FCONSTANT" ); |
7815 | MIB.addImm( |
7816 | Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7817 | } |
7818 | |
7819 | void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, |
7820 | const MachineInstr &MI, |
7821 | int OpIdx) const { |
7822 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7823 | "Expected G_FCONSTANT" ); |
7824 | MIB.addImm( |
7825 | Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7826 | } |
7827 | |
7828 | void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( |
7829 | MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { |
7830 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7831 | "Expected G_FCONSTANT" ); |
7832 | MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: 1) |
7833 | .getFPImm() |
7834 | ->getValueAPF() |
7835 | .bitcastToAPInt() |
7836 | .getZExtValue())); |
7837 | } |
7838 | |
7839 | bool AArch64InstructionSelector::isLoadStoreOfNumBytes( |
7840 | const MachineInstr &MI, unsigned NumBytes) const { |
7841 | if (!MI.mayLoadOrStore()) |
7842 | return false; |
7843 | assert(MI.hasOneMemOperand() && |
7844 | "Expected load/store to have only one mem op!" ); |
7845 | return (*MI.memoperands_begin())->getSize() == NumBytes; |
7846 | } |
7847 | |
7848 | bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { |
7849 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7850 | if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() != 32) |
7851 | return false; |
7852 | |
7853 | // Only return true if we know the operation will zero-out the high half of |
7854 | // the 64-bit register. Truncates can be subregister copies, which don't |
7855 | // zero out the high bits. Copies and other copy-like instructions can be |
7856 | // fed by truncates, or could be lowered as subregister copies. |
7857 | switch (MI.getOpcode()) { |
7858 | default: |
7859 | return true; |
7860 | case TargetOpcode::COPY: |
7861 | case TargetOpcode::G_BITCAST: |
7862 | case TargetOpcode::G_TRUNC: |
7863 | case TargetOpcode::G_PHI: |
7864 | return false; |
7865 | } |
7866 | } |
7867 | |
7868 | |
7869 | // Perform fixups on the given PHI instruction's operands to force them all |
7870 | // to be the same as the destination regbank. |
7871 | static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, |
7872 | const AArch64RegisterBankInfo &RBI) { |
7873 | assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI" ); |
7874 | Register DstReg = MI.getOperand(i: 0).getReg(); |
7875 | const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg); |
7876 | assert(DstRB && "Expected PHI dst to have regbank assigned" ); |
7877 | MachineIRBuilder MIB(MI); |
7878 | |
7879 | // Go through each operand and ensure it has the same regbank. |
7880 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) { |
7881 | if (!MO.isReg()) |
7882 | continue; |
7883 | Register OpReg = MO.getReg(); |
7884 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg); |
7885 | if (RB != DstRB) { |
7886 | // Insert a cross-bank copy. |
7887 | auto *OpDef = MRI.getVRegDef(Reg: OpReg); |
7888 | const LLT &Ty = MRI.getType(Reg: OpReg); |
7889 | MachineBasicBlock &OpDefBB = *OpDef->getParent(); |
7890 | |
7891 | // Any instruction we insert must appear after all PHIs in the block |
7892 | // for the block to be valid MIR. |
7893 | MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator()); |
7894 | if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) |
7895 | InsertPt = OpDefBB.getFirstNonPHI(); |
7896 | MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt); |
7897 | auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg); |
7898 | MRI.setRegBank(Reg: Copy.getReg(Idx: 0), RegBank: *DstRB); |
7899 | MO.setReg(Copy.getReg(Idx: 0)); |
7900 | } |
7901 | } |
7902 | } |
7903 | |
7904 | void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { |
7905 | // We're looking for PHIs, build a list so we don't invalidate iterators. |
7906 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7907 | SmallVector<MachineInstr *, 32> Phis; |
7908 | for (auto &BB : MF) { |
7909 | for (auto &MI : BB) { |
7910 | if (MI.getOpcode() == TargetOpcode::G_PHI) |
7911 | Phis.emplace_back(Args: &MI); |
7912 | } |
7913 | } |
7914 | |
7915 | for (auto *MI : Phis) { |
7916 | // We need to do some work here if the operand types are < 16 bit and they |
7917 | // are split across fpr/gpr banks. Since all types <32b on gpr |
7918 | // end up being assigned gpr32 regclasses, we can end up with PHIs here |
7919 | // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't |
7920 | // be selecting heterogenous regbanks for operands if possible, but we |
7921 | // still need to be able to deal with it here. |
7922 | // |
7923 | // To fix this, if we have a gpr-bank operand < 32b in size and at least |
7924 | // one other operand is on the fpr bank, then we add cross-bank copies |
7925 | // to homogenize the operand banks. For simplicity the bank that we choose |
7926 | // to settle on is whatever bank the def operand has. For example: |
7927 | // |
7928 | // %endbb: |
7929 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 |
7930 | // => |
7931 | // %bb2: |
7932 | // ... |
7933 | // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) |
7934 | // ... |
7935 | // %endbb: |
7936 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 |
7937 | bool HasGPROp = false, HasFPROp = false; |
7938 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) { |
7939 | if (!MO.isReg()) |
7940 | continue; |
7941 | const LLT &Ty = MRI.getType(Reg: MO.getReg()); |
7942 | if (!Ty.isValid() || !Ty.isScalar()) |
7943 | break; |
7944 | if (Ty.getSizeInBits() >= 32) |
7945 | break; |
7946 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg()); |
7947 | // If for some reason we don't have a regbank yet. Don't try anything. |
7948 | if (!RB) |
7949 | break; |
7950 | |
7951 | if (RB->getID() == AArch64::GPRRegBankID) |
7952 | HasGPROp = true; |
7953 | else |
7954 | HasFPROp = true; |
7955 | } |
7956 | // We have heterogenous regbanks, need to fixup. |
7957 | if (HasGPROp && HasFPROp) |
7958 | fixupPHIOpBanks(MI&: *MI, MRI, RBI); |
7959 | } |
7960 | } |
7961 | |
7962 | namespace llvm { |
7963 | InstructionSelector * |
7964 | createAArch64InstructionSelector(const AArch64TargetMachine &TM, |
7965 | const AArch64Subtarget &Subtarget, |
7966 | const AArch64RegisterBankInfo &RBI) { |
7967 | return new AArch64InstructionSelector(TM, Subtarget, RBI); |
7968 | } |
7969 | } |
7970 | |