1 | //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the InstructionSelector class for |
10 | /// AArch64. |
11 | /// \todo This should be generated by TableGen. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AArch64GlobalISelUtils.h" |
15 | #include "AArch64InstrInfo.h" |
16 | #include "AArch64MachineFunctionInfo.h" |
17 | #include "AArch64RegisterBankInfo.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "AArch64TargetMachine.h" |
21 | #include "MCTargetDesc/AArch64AddressingModes.h" |
22 | #include "MCTargetDesc/AArch64MCTargetDesc.h" |
23 | #include "llvm/BinaryFormat/Dwarf.h" |
24 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
25 | #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" |
26 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
27 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
28 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
29 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
30 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
31 | #include "llvm/CodeGen/MachineBasicBlock.h" |
32 | #include "llvm/CodeGen/MachineConstantPool.h" |
33 | #include "llvm/CodeGen/MachineFrameInfo.h" |
34 | #include "llvm/CodeGen/MachineFunction.h" |
35 | #include "llvm/CodeGen/MachineInstr.h" |
36 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
37 | #include "llvm/CodeGen/MachineMemOperand.h" |
38 | #include "llvm/CodeGen/MachineOperand.h" |
39 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
40 | #include "llvm/CodeGen/TargetOpcodes.h" |
41 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
42 | #include "llvm/IR/Constants.h" |
43 | #include "llvm/IR/DerivedTypes.h" |
44 | #include "llvm/IR/Instructions.h" |
45 | #include "llvm/IR/IntrinsicsAArch64.h" |
46 | #include "llvm/IR/Type.h" |
47 | #include "llvm/Pass.h" |
48 | #include "llvm/Support/Debug.h" |
49 | #include "llvm/Support/raw_ostream.h" |
50 | #include <optional> |
51 | |
52 | #define DEBUG_TYPE "aarch64-isel" |
53 | |
54 | using namespace llvm; |
55 | using namespace MIPatternMatch; |
56 | using namespace AArch64GISelUtils; |
57 | |
58 | namespace llvm { |
59 | class BlockFrequencyInfo; |
60 | class ProfileSummaryInfo; |
61 | } |
62 | |
63 | namespace { |
64 | |
65 | #define GET_GLOBALISEL_PREDICATE_BITSET |
66 | #include "AArch64GenGlobalISel.inc" |
67 | #undef GET_GLOBALISEL_PREDICATE_BITSET |
68 | |
69 | |
70 | class AArch64InstructionSelector : public InstructionSelector { |
71 | public: |
72 | AArch64InstructionSelector(const AArch64TargetMachine &TM, |
73 | const AArch64Subtarget &STI, |
74 | const AArch64RegisterBankInfo &RBI); |
75 | |
76 | bool select(MachineInstr &I) override; |
77 | static const char *getName() { return DEBUG_TYPE; } |
78 | |
79 | void setupMF(MachineFunction &MF, GISelValueTracking *VT, |
80 | CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, |
81 | BlockFrequencyInfo *BFI) override { |
82 | InstructionSelector::setupMF(mf&: MF, vt: VT, covinfo: CoverageInfo, psi: PSI, bfi: BFI); |
83 | MIB.setMF(MF); |
84 | |
85 | // hasFnAttribute() is expensive to call on every BRCOND selection, so |
86 | // cache it here for each run of the selector. |
87 | ProduceNonFlagSettingCondBr = |
88 | !MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening); |
89 | MFReturnAddr = Register(); |
90 | |
91 | processPHIs(MF); |
92 | } |
93 | |
94 | private: |
95 | /// tblgen-erated 'select' implementation, used as the initial selector for |
96 | /// the patterns that don't require complex C++. |
97 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; |
98 | |
99 | // A lowering phase that runs before any selection attempts. |
100 | // Returns true if the instruction was modified. |
101 | bool preISelLower(MachineInstr &I); |
102 | |
103 | // An early selection function that runs before the selectImpl() call. |
104 | bool earlySelect(MachineInstr &I); |
105 | |
106 | /// Save state that is shared between select calls, call select on \p I and |
107 | /// then restore the saved state. This can be used to recursively call select |
108 | /// within a select call. |
109 | bool selectAndRestoreState(MachineInstr &I); |
110 | |
111 | // Do some preprocessing of G_PHIs before we begin selection. |
112 | void processPHIs(MachineFunction &MF); |
113 | |
114 | bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
115 | |
116 | /// Eliminate same-sized cross-bank copies into stores before selectImpl(). |
117 | bool contractCrossBankCopyIntoStore(MachineInstr &I, |
118 | MachineRegisterInfo &MRI); |
119 | |
120 | bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); |
121 | |
122 | bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, |
123 | MachineRegisterInfo &MRI) const; |
124 | bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, |
125 | MachineRegisterInfo &MRI) const; |
126 | |
127 | ///@{ |
128 | /// Helper functions for selectCompareBranch. |
129 | bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, |
130 | MachineIRBuilder &MIB) const; |
131 | bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
132 | MachineIRBuilder &MIB) const; |
133 | bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, |
134 | MachineIRBuilder &MIB) const; |
135 | bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, |
136 | MachineBasicBlock *DstMBB, |
137 | MachineIRBuilder &MIB) const; |
138 | ///@} |
139 | |
140 | bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, |
141 | MachineRegisterInfo &MRI); |
142 | |
143 | bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); |
144 | bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); |
145 | |
146 | // Helper to generate an equivalent of scalar_to_vector into a new register, |
147 | // returned via 'Dst'. |
148 | MachineInstr *emitScalarToVector(unsigned EltSize, |
149 | const TargetRegisterClass *DstRC, |
150 | Register Scalar, |
151 | MachineIRBuilder &MIRBuilder) const; |
152 | /// Helper to narrow vector that was widened by emitScalarToVector. |
153 | /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit |
154 | /// vector, correspondingly. |
155 | MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg, |
156 | MachineIRBuilder &MIRBuilder, |
157 | MachineRegisterInfo &MRI) const; |
158 | |
159 | /// Emit a lane insert into \p DstReg, or a new vector register if |
160 | /// std::nullopt is provided. |
161 | /// |
162 | /// The lane inserted into is defined by \p LaneIdx. The vector source |
163 | /// register is given by \p SrcReg. The register containing the element is |
164 | /// given by \p EltReg. |
165 | MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg, |
166 | Register EltReg, unsigned LaneIdx, |
167 | const RegisterBank &RB, |
168 | MachineIRBuilder &MIRBuilder) const; |
169 | |
170 | /// Emit a sequence of instructions representing a constant \p CV for a |
171 | /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) |
172 | /// |
173 | /// \returns the last instruction in the sequence on success, and nullptr |
174 | /// otherwise. |
175 | MachineInstr *emitConstantVector(Register Dst, Constant *CV, |
176 | MachineIRBuilder &MIRBuilder, |
177 | MachineRegisterInfo &MRI); |
178 | |
179 | MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits, |
180 | MachineIRBuilder &MIRBuilder); |
181 | |
182 | MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits, |
183 | MachineIRBuilder &MIRBuilder, bool Inv); |
184 | |
185 | MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits, |
186 | MachineIRBuilder &MIRBuilder, bool Inv); |
187 | MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits, |
188 | MachineIRBuilder &MIRBuilder); |
189 | MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits, |
190 | MachineIRBuilder &MIRBuilder, bool Inv); |
191 | MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits, |
192 | MachineIRBuilder &MIRBuilder); |
193 | |
194 | bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, |
195 | MachineRegisterInfo &MRI); |
196 | /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a |
197 | /// SUBREG_TO_REG. |
198 | bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); |
199 | bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); |
200 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
201 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); |
202 | |
203 | bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); |
204 | bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); |
205 | bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); |
206 | bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); |
207 | |
208 | /// Helper function to select vector load intrinsics like |
209 | /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. |
210 | /// \p Opc is the opcode that the selected instruction should use. |
211 | /// \p NumVecs is the number of vector destinations for the instruction. |
212 | /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. |
213 | bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, |
214 | MachineInstr &I); |
215 | bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs, |
216 | MachineInstr &I); |
217 | void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs, |
218 | unsigned Opc); |
219 | bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs, |
220 | unsigned Opc); |
221 | bool selectIntrinsicWithSideEffects(MachineInstr &I, |
222 | MachineRegisterInfo &MRI); |
223 | bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); |
224 | bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); |
225 | bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); |
226 | bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); |
227 | bool selectPtrAuthGlobalValue(MachineInstr &I, |
228 | MachineRegisterInfo &MRI) const; |
229 | bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); |
230 | bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); |
231 | bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); |
232 | void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs, |
233 | unsigned Opc1, unsigned Opc2, bool isExt); |
234 | |
235 | bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
236 | bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI); |
237 | bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI); |
238 | |
239 | unsigned emitConstantPoolEntry(const Constant *CPVal, |
240 | MachineFunction &MF) const; |
241 | MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, |
242 | MachineIRBuilder &MIRBuilder) const; |
243 | |
244 | // Emit a vector concat operation. |
245 | MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1, |
246 | Register Op2, |
247 | MachineIRBuilder &MIRBuilder) const; |
248 | |
249 | // Emit an integer compare between LHS and RHS, which checks for Predicate. |
250 | MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
251 | MachineOperand &Predicate, |
252 | MachineIRBuilder &MIRBuilder) const; |
253 | |
254 | /// Emit a floating point comparison between \p LHS and \p RHS. |
255 | /// \p Pred if given is the intended predicate to use. |
256 | MachineInstr * |
257 | emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
258 | std::optional<CmpInst::Predicate> = std::nullopt) const; |
259 | |
260 | MachineInstr * |
261 | emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
262 | std::initializer_list<llvm::SrcOp> SrcOps, |
263 | MachineIRBuilder &MIRBuilder, |
264 | const ComplexRendererFns &RenderFns = std::nullopt) const; |
265 | /// Helper function to emit an add or sub instruction. |
266 | /// |
267 | /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above |
268 | /// in a specific order. |
269 | /// |
270 | /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. |
271 | /// |
272 | /// \code |
273 | /// const std::array<std::array<unsigned, 2>, 4> Table { |
274 | /// {{AArch64::ADDXri, AArch64::ADDWri}, |
275 | /// {AArch64::ADDXrs, AArch64::ADDWrs}, |
276 | /// {AArch64::ADDXrr, AArch64::ADDWrr}, |
277 | /// {AArch64::SUBXri, AArch64::SUBWri}, |
278 | /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
279 | /// \endcode |
280 | /// |
281 | /// Each row in the table corresponds to a different addressing mode. Each |
282 | /// column corresponds to a different register size. |
283 | /// |
284 | /// \attention Rows must be structured as follows: |
285 | /// - Row 0: The ri opcode variants |
286 | /// - Row 1: The rs opcode variants |
287 | /// - Row 2: The rr opcode variants |
288 | /// - Row 3: The ri opcode variants for negative immediates |
289 | /// - Row 4: The rx opcode variants |
290 | /// |
291 | /// \attention Columns must be structured as follows: |
292 | /// - Column 0: The 64-bit opcode variants |
293 | /// - Column 1: The 32-bit opcode variants |
294 | /// |
295 | /// \p Dst is the destination register of the binop to emit. |
296 | /// \p LHS is the left-hand operand of the binop to emit. |
297 | /// \p RHS is the right-hand operand of the binop to emit. |
298 | MachineInstr *emitAddSub( |
299 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
300 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
301 | MachineIRBuilder &MIRBuilder) const; |
302 | MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, |
303 | MachineOperand &RHS, |
304 | MachineIRBuilder &MIRBuilder) const; |
305 | MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
306 | MachineIRBuilder &MIRBuilder) const; |
307 | MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
308 | MachineIRBuilder &MIRBuilder) const; |
309 | MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
310 | MachineIRBuilder &MIRBuilder) const; |
311 | MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
312 | MachineIRBuilder &MIRBuilder) const; |
313 | MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
314 | MachineIRBuilder &MIRBuilder) const; |
315 | MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, |
316 | MachineIRBuilder &MIRBuilder) const; |
317 | MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, |
318 | AArch64CC::CondCode CC, |
319 | MachineIRBuilder &MIRBuilder) const; |
320 | MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg, |
321 | const RegisterBank &DstRB, LLT ScalarTy, |
322 | Register VecReg, unsigned LaneIdx, |
323 | MachineIRBuilder &MIRBuilder) const; |
324 | MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, |
325 | AArch64CC::CondCode Pred, |
326 | MachineIRBuilder &MIRBuilder) const; |
327 | /// Emit a CSet for a FP compare. |
328 | /// |
329 | /// \p Dst is expected to be a 32-bit scalar register. |
330 | MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, |
331 | MachineIRBuilder &MIRBuilder) const; |
332 | |
333 | /// Emit an instruction that sets NZCV to the carry-in expected by \p I. |
334 | /// Might elide the instruction if the previous instruction already sets NZCV |
335 | /// correctly. |
336 | MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg); |
337 | |
338 | /// Emit the overflow op for \p Opcode. |
339 | /// |
340 | /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, |
341 | /// G_USUBO, etc. |
342 | std::pair<MachineInstr *, AArch64CC::CondCode> |
343 | emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, |
344 | MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; |
345 | |
346 | bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI); |
347 | |
348 | /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). |
349 | /// In some cases this is even possible with OR operations in the expression. |
350 | MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, |
351 | MachineIRBuilder &MIB) const; |
352 | MachineInstr *emitConditionalComparison(Register LHS, Register RHS, |
353 | CmpInst::Predicate CC, |
354 | AArch64CC::CondCode Predicate, |
355 | AArch64CC::CondCode OutCC, |
356 | MachineIRBuilder &MIB) const; |
357 | MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, |
358 | bool Negate, Register CCOp, |
359 | AArch64CC::CondCode Predicate, |
360 | MachineIRBuilder &MIB) const; |
361 | |
362 | /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. |
363 | /// \p IsNegative is true if the test should be "not zero". |
364 | /// This will also optimize the test bit instruction when possible. |
365 | MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, |
366 | MachineBasicBlock *DstMBB, |
367 | MachineIRBuilder &MIB) const; |
368 | |
369 | /// Emit a CB(N)Z instruction which branches to \p DestMBB. |
370 | MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, |
371 | MachineBasicBlock *DestMBB, |
372 | MachineIRBuilder &MIB) const; |
373 | |
374 | // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. |
375 | // We use these manually instead of using the importer since it doesn't |
376 | // support SDNodeXForm. |
377 | ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; |
378 | ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; |
379 | ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; |
380 | ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; |
381 | |
382 | ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; |
383 | ComplexRendererFns selectArithImmed(MachineOperand &Root) const; |
384 | ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; |
385 | |
386 | ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, |
387 | unsigned Size) const; |
388 | |
389 | ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { |
390 | return selectAddrModeUnscaled(Root, Size: 1); |
391 | } |
392 | ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { |
393 | return selectAddrModeUnscaled(Root, Size: 2); |
394 | } |
395 | ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { |
396 | return selectAddrModeUnscaled(Root, Size: 4); |
397 | } |
398 | ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { |
399 | return selectAddrModeUnscaled(Root, Size: 8); |
400 | } |
401 | ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { |
402 | return selectAddrModeUnscaled(Root, Size: 16); |
403 | } |
404 | |
405 | /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used |
406 | /// from complex pattern matchers like selectAddrModeIndexed(). |
407 | ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, |
408 | MachineRegisterInfo &MRI) const; |
409 | |
410 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, |
411 | unsigned Size) const; |
412 | template <int Width> |
413 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { |
414 | return selectAddrModeIndexed(Root, Size: Width / 8); |
415 | } |
416 | |
417 | std::optional<bool> |
418 | isWorthFoldingIntoAddrMode(MachineInstr &MI, |
419 | const MachineRegisterInfo &MRI) const; |
420 | |
421 | bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, |
422 | const MachineRegisterInfo &MRI, |
423 | bool IsAddrOperand) const; |
424 | ComplexRendererFns |
425 | selectAddrModeShiftedExtendXReg(MachineOperand &Root, |
426 | unsigned SizeInBytes) const; |
427 | |
428 | /// Returns a \p ComplexRendererFns which contains a base, offset, and whether |
429 | /// or not a shift + extend should be folded into an addressing mode. Returns |
430 | /// None when this is not profitable or possible. |
431 | ComplexRendererFns |
432 | selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, |
433 | MachineOperand &Offset, unsigned SizeInBytes, |
434 | bool WantsExt) const; |
435 | ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; |
436 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, |
437 | unsigned SizeInBytes) const; |
438 | template <int Width> |
439 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { |
440 | return selectAddrModeXRO(Root, SizeInBytes: Width / 8); |
441 | } |
442 | |
443 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, |
444 | unsigned SizeInBytes) const; |
445 | template <int Width> |
446 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { |
447 | return selectAddrModeWRO(Root, SizeInBytes: Width / 8); |
448 | } |
449 | |
450 | ComplexRendererFns selectShiftedRegister(MachineOperand &Root, |
451 | bool AllowROR = false) const; |
452 | |
453 | ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { |
454 | return selectShiftedRegister(Root); |
455 | } |
456 | |
457 | ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { |
458 | return selectShiftedRegister(Root, AllowROR: true); |
459 | } |
460 | |
461 | /// Given an extend instruction, determine the correct shift-extend type for |
462 | /// that instruction. |
463 | /// |
464 | /// If the instruction is going to be used in a load or store, pass |
465 | /// \p IsLoadStore = true. |
466 | AArch64_AM::ShiftExtendType |
467 | getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, |
468 | bool IsLoadStore = false) const; |
469 | |
470 | /// Move \p Reg to \p RC if \p Reg is not already on \p RC. |
471 | /// |
472 | /// \returns Either \p Reg if no change was necessary, or the new register |
473 | /// created by moving \p Reg. |
474 | /// |
475 | /// Note: This uses emitCopy right now. |
476 | Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, |
477 | MachineIRBuilder &MIB) const; |
478 | |
479 | ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; |
480 | |
481 | ComplexRendererFns selectExtractHigh(MachineOperand &Root) const; |
482 | |
483 | void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, |
484 | int OpIdx = -1) const; |
485 | void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, |
486 | int OpIdx = -1) const; |
487 | void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, |
488 | int OpIdx = -1) const; |
489 | void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI, |
490 | int OpIdx) const; |
491 | void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, |
492 | int OpIdx = -1) const; |
493 | void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, |
494 | int OpIdx = -1) const; |
495 | void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, |
496 | int OpIdx = -1) const; |
497 | void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, |
498 | const MachineInstr &MI, |
499 | int OpIdx = -1) const; |
500 | |
501 | // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. |
502 | void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); |
503 | |
504 | // Optimization methods. |
505 | bool tryOptSelect(GSelect &Sel); |
506 | bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); |
507 | MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, |
508 | MachineOperand &Predicate, |
509 | MachineIRBuilder &MIRBuilder) const; |
510 | |
511 | /// Return true if \p MI is a load or store of \p NumBytes bytes. |
512 | bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; |
513 | |
514 | /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit |
515 | /// register zeroed out. In other words, the result of MI has been explicitly |
516 | /// zero extended. |
517 | bool isDef32(const MachineInstr &MI) const; |
518 | |
519 | const AArch64TargetMachine &TM; |
520 | const AArch64Subtarget &STI; |
521 | const AArch64InstrInfo &TII; |
522 | const AArch64RegisterInfo &TRI; |
523 | const AArch64RegisterBankInfo &RBI; |
524 | |
525 | bool ProduceNonFlagSettingCondBr = false; |
526 | |
527 | // Some cached values used during selection. |
528 | // We use LR as a live-in register, and we keep track of it here as it can be |
529 | // clobbered by calls. |
530 | Register MFReturnAddr; |
531 | |
532 | MachineIRBuilder MIB; |
533 | |
534 | #define GET_GLOBALISEL_PREDICATES_DECL |
535 | #include "AArch64GenGlobalISel.inc" |
536 | #undef GET_GLOBALISEL_PREDICATES_DECL |
537 | |
538 | // We declare the temporaries used by selectImpl() in the class to minimize the |
539 | // cost of constructing placeholder values. |
540 | #define GET_GLOBALISEL_TEMPORARIES_DECL |
541 | #include "AArch64GenGlobalISel.inc" |
542 | #undef GET_GLOBALISEL_TEMPORARIES_DECL |
543 | }; |
544 | |
545 | } // end anonymous namespace |
546 | |
547 | #define GET_GLOBALISEL_IMPL |
548 | #include "AArch64GenGlobalISel.inc" |
549 | #undef GET_GLOBALISEL_IMPL |
550 | |
551 | AArch64InstructionSelector::AArch64InstructionSelector( |
552 | const AArch64TargetMachine &TM, const AArch64Subtarget &STI, |
553 | const AArch64RegisterBankInfo &RBI) |
554 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), |
555 | RBI(RBI), |
556 | #define GET_GLOBALISEL_PREDICATES_INIT |
557 | #include "AArch64GenGlobalISel.inc" |
558 | #undef GET_GLOBALISEL_PREDICATES_INIT |
559 | #define GET_GLOBALISEL_TEMPORARIES_INIT |
560 | #include "AArch64GenGlobalISel.inc" |
561 | #undef GET_GLOBALISEL_TEMPORARIES_INIT |
562 | { |
563 | } |
564 | |
565 | // FIXME: This should be target-independent, inferred from the types declared |
566 | // for each class in the bank. |
567 | // |
568 | /// Given a register bank, and a type, return the smallest register class that |
569 | /// can represent that combination. |
570 | static const TargetRegisterClass * |
571 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, |
572 | bool GetAllRegSet = false) { |
573 | if (RB.getID() == AArch64::GPRRegBankID) { |
574 | if (Ty.getSizeInBits() <= 32) |
575 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
576 | : &AArch64::GPR32RegClass; |
577 | if (Ty.getSizeInBits() == 64) |
578 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
579 | : &AArch64::GPR64RegClass; |
580 | if (Ty.getSizeInBits() == 128) |
581 | return &AArch64::XSeqPairsClassRegClass; |
582 | return nullptr; |
583 | } |
584 | |
585 | if (RB.getID() == AArch64::FPRRegBankID) { |
586 | switch (Ty.getSizeInBits()) { |
587 | case 8: |
588 | return &AArch64::FPR8RegClass; |
589 | case 16: |
590 | return &AArch64::FPR16RegClass; |
591 | case 32: |
592 | return &AArch64::FPR32RegClass; |
593 | case 64: |
594 | return &AArch64::FPR64RegClass; |
595 | case 128: |
596 | return &AArch64::FPR128RegClass; |
597 | } |
598 | return nullptr; |
599 | } |
600 | |
601 | return nullptr; |
602 | } |
603 | |
604 | /// Given a register bank, and size in bits, return the smallest register class |
605 | /// that can represent that combination. |
606 | static const TargetRegisterClass * |
607 | getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, |
608 | bool GetAllRegSet = false) { |
609 | if (SizeInBits.isScalable()) { |
610 | assert(RB.getID() == AArch64::FPRRegBankID && |
611 | "Expected FPR regbank for scalable type size" ); |
612 | return &AArch64::ZPRRegClass; |
613 | } |
614 | |
615 | unsigned RegBankID = RB.getID(); |
616 | |
617 | if (RegBankID == AArch64::GPRRegBankID) { |
618 | assert(!SizeInBits.isScalable() && "Unexpected scalable register size" ); |
619 | if (SizeInBits <= 32) |
620 | return GetAllRegSet ? &AArch64::GPR32allRegClass |
621 | : &AArch64::GPR32RegClass; |
622 | if (SizeInBits == 64) |
623 | return GetAllRegSet ? &AArch64::GPR64allRegClass |
624 | : &AArch64::GPR64RegClass; |
625 | if (SizeInBits == 128) |
626 | return &AArch64::XSeqPairsClassRegClass; |
627 | } |
628 | |
629 | if (RegBankID == AArch64::FPRRegBankID) { |
630 | if (SizeInBits.isScalable()) { |
631 | assert(SizeInBits == TypeSize::getScalable(128) && |
632 | "Unexpected scalable register size" ); |
633 | return &AArch64::ZPRRegClass; |
634 | } |
635 | |
636 | switch (SizeInBits) { |
637 | default: |
638 | return nullptr; |
639 | case 8: |
640 | return &AArch64::FPR8RegClass; |
641 | case 16: |
642 | return &AArch64::FPR16RegClass; |
643 | case 32: |
644 | return &AArch64::FPR32RegClass; |
645 | case 64: |
646 | return &AArch64::FPR64RegClass; |
647 | case 128: |
648 | return &AArch64::FPR128RegClass; |
649 | } |
650 | } |
651 | |
652 | return nullptr; |
653 | } |
654 | |
655 | /// Returns the correct subregister to use for a given register class. |
656 | static bool getSubRegForClass(const TargetRegisterClass *RC, |
657 | const TargetRegisterInfo &TRI, unsigned &SubReg) { |
658 | switch (TRI.getRegSizeInBits(RC: *RC)) { |
659 | case 8: |
660 | SubReg = AArch64::bsub; |
661 | break; |
662 | case 16: |
663 | SubReg = AArch64::hsub; |
664 | break; |
665 | case 32: |
666 | if (RC != &AArch64::FPR32RegClass) |
667 | SubReg = AArch64::sub_32; |
668 | else |
669 | SubReg = AArch64::ssub; |
670 | break; |
671 | case 64: |
672 | SubReg = AArch64::dsub; |
673 | break; |
674 | default: |
675 | LLVM_DEBUG( |
676 | dbgs() << "Couldn't find appropriate subregister for register class." ); |
677 | return false; |
678 | } |
679 | |
680 | return true; |
681 | } |
682 | |
683 | /// Returns the minimum size the given register bank can hold. |
684 | static unsigned getMinSizeForRegBank(const RegisterBank &RB) { |
685 | switch (RB.getID()) { |
686 | case AArch64::GPRRegBankID: |
687 | return 32; |
688 | case AArch64::FPRRegBankID: |
689 | return 8; |
690 | default: |
691 | llvm_unreachable("Tried to get minimum size for unknown register bank." ); |
692 | } |
693 | } |
694 | |
695 | /// Create a REG_SEQUENCE instruction using the registers in \p Regs. |
696 | /// Helper function for functions like createDTuple and createQTuple. |
697 | /// |
698 | /// \p RegClassIDs - The list of register class IDs available for some tuple of |
699 | /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is |
700 | /// expected to contain between 2 and 4 tuple classes. |
701 | /// |
702 | /// \p SubRegs - The list of subregister classes associated with each register |
703 | /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 |
704 | /// subregister class. The index of each subregister class is expected to |
705 | /// correspond with the index of each register class. |
706 | /// |
707 | /// \returns Either the destination register of REG_SEQUENCE instruction that |
708 | /// was created, or the 0th element of \p Regs if \p Regs contains a single |
709 | /// element. |
710 | static Register createTuple(ArrayRef<Register> Regs, |
711 | const unsigned RegClassIDs[], |
712 | const unsigned SubRegs[], MachineIRBuilder &MIB) { |
713 | unsigned NumRegs = Regs.size(); |
714 | if (NumRegs == 1) |
715 | return Regs[0]; |
716 | assert(NumRegs >= 2 && NumRegs <= 4 && |
717 | "Only support between two and 4 registers in a tuple!" ); |
718 | const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); |
719 | auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - 2]); |
720 | auto RegSequence = |
721 | MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {}); |
722 | for (unsigned I = 0, E = Regs.size(); I < E; ++I) { |
723 | RegSequence.addUse(RegNo: Regs[I]); |
724 | RegSequence.addImm(Val: SubRegs[I]); |
725 | } |
726 | return RegSequence.getReg(Idx: 0); |
727 | } |
728 | |
729 | /// Create a tuple of D-registers using the registers in \p Regs. |
730 | static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
731 | static const unsigned RegClassIDs[] = { |
732 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; |
733 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, |
734 | AArch64::dsub2, AArch64::dsub3}; |
735 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
736 | } |
737 | |
738 | /// Create a tuple of Q-registers using the registers in \p Regs. |
739 | static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { |
740 | static const unsigned RegClassIDs[] = { |
741 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; |
742 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, |
743 | AArch64::qsub2, AArch64::qsub3}; |
744 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); |
745 | } |
746 | |
747 | static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { |
748 | auto &MI = *Root.getParent(); |
749 | auto &MBB = *MI.getParent(); |
750 | auto &MF = *MBB.getParent(); |
751 | auto &MRI = MF.getRegInfo(); |
752 | uint64_t Immed; |
753 | if (Root.isImm()) |
754 | Immed = Root.getImm(); |
755 | else if (Root.isCImm()) |
756 | Immed = Root.getCImm()->getZExtValue(); |
757 | else if (Root.isReg()) { |
758 | auto ValAndVReg = |
759 | getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true); |
760 | if (!ValAndVReg) |
761 | return std::nullopt; |
762 | Immed = ValAndVReg->Value.getSExtValue(); |
763 | } else |
764 | return std::nullopt; |
765 | return Immed; |
766 | } |
767 | |
768 | /// Check whether \p I is a currently unsupported binary operation: |
769 | /// - it has an unsized type |
770 | /// - an operand is not a vreg |
771 | /// - all operands are not in the same bank |
772 | /// These are checks that should someday live in the verifier, but right now, |
773 | /// these are mostly limitations of the aarch64 selector. |
774 | static bool unsupportedBinOp(const MachineInstr &I, |
775 | const AArch64RegisterBankInfo &RBI, |
776 | const MachineRegisterInfo &MRI, |
777 | const AArch64RegisterInfo &TRI) { |
778 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
779 | if (!Ty.isValid()) { |
780 | LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n" ); |
781 | return true; |
782 | } |
783 | |
784 | const RegisterBank *PrevOpBank = nullptr; |
785 | for (auto &MO : I.operands()) { |
786 | // FIXME: Support non-register operands. |
787 | if (!MO.isReg()) { |
788 | LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n" ); |
789 | return true; |
790 | } |
791 | |
792 | // FIXME: Can generic operations have physical registers operands? If |
793 | // so, this will need to be taught about that, and we'll need to get the |
794 | // bank out of the minimal class for the register. |
795 | // Either way, this needs to be documented (and possibly verified). |
796 | if (!MO.getReg().isVirtual()) { |
797 | LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n" ); |
798 | return true; |
799 | } |
800 | |
801 | const RegisterBank *OpBank = RBI.getRegBank(Reg: MO.getReg(), MRI, TRI); |
802 | if (!OpBank) { |
803 | LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n" ); |
804 | return true; |
805 | } |
806 | |
807 | if (PrevOpBank && OpBank != PrevOpBank) { |
808 | LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n" ); |
809 | return true; |
810 | } |
811 | PrevOpBank = OpBank; |
812 | } |
813 | return false; |
814 | } |
815 | |
816 | /// Select the AArch64 opcode for the basic binary operation \p GenericOpc |
817 | /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID |
818 | /// and of size \p OpSize. |
819 | /// \returns \p GenericOpc if the combination is unsupported. |
820 | static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, |
821 | unsigned OpSize) { |
822 | switch (RegBankID) { |
823 | case AArch64::GPRRegBankID: |
824 | if (OpSize == 32) { |
825 | switch (GenericOpc) { |
826 | case TargetOpcode::G_SHL: |
827 | return AArch64::LSLVWr; |
828 | case TargetOpcode::G_LSHR: |
829 | return AArch64::LSRVWr; |
830 | case TargetOpcode::G_ASHR: |
831 | return AArch64::ASRVWr; |
832 | default: |
833 | return GenericOpc; |
834 | } |
835 | } else if (OpSize == 64) { |
836 | switch (GenericOpc) { |
837 | case TargetOpcode::G_PTR_ADD: |
838 | return AArch64::ADDXrr; |
839 | case TargetOpcode::G_SHL: |
840 | return AArch64::LSLVXr; |
841 | case TargetOpcode::G_LSHR: |
842 | return AArch64::LSRVXr; |
843 | case TargetOpcode::G_ASHR: |
844 | return AArch64::ASRVXr; |
845 | default: |
846 | return GenericOpc; |
847 | } |
848 | } |
849 | break; |
850 | case AArch64::FPRRegBankID: |
851 | switch (OpSize) { |
852 | case 32: |
853 | switch (GenericOpc) { |
854 | case TargetOpcode::G_FADD: |
855 | return AArch64::FADDSrr; |
856 | case TargetOpcode::G_FSUB: |
857 | return AArch64::FSUBSrr; |
858 | case TargetOpcode::G_FMUL: |
859 | return AArch64::FMULSrr; |
860 | case TargetOpcode::G_FDIV: |
861 | return AArch64::FDIVSrr; |
862 | default: |
863 | return GenericOpc; |
864 | } |
865 | case 64: |
866 | switch (GenericOpc) { |
867 | case TargetOpcode::G_FADD: |
868 | return AArch64::FADDDrr; |
869 | case TargetOpcode::G_FSUB: |
870 | return AArch64::FSUBDrr; |
871 | case TargetOpcode::G_FMUL: |
872 | return AArch64::FMULDrr; |
873 | case TargetOpcode::G_FDIV: |
874 | return AArch64::FDIVDrr; |
875 | case TargetOpcode::G_OR: |
876 | return AArch64::ORRv8i8; |
877 | default: |
878 | return GenericOpc; |
879 | } |
880 | } |
881 | break; |
882 | } |
883 | return GenericOpc; |
884 | } |
885 | |
886 | /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, |
887 | /// appropriate for the (value) register bank \p RegBankID and of memory access |
888 | /// size \p OpSize. This returns the variant with the base+unsigned-immediate |
889 | /// addressing mode (e.g., LDRXui). |
890 | /// \returns \p GenericOpc if the combination is unsupported. |
891 | static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, |
892 | unsigned OpSize) { |
893 | const bool isStore = GenericOpc == TargetOpcode::G_STORE; |
894 | switch (RegBankID) { |
895 | case AArch64::GPRRegBankID: |
896 | switch (OpSize) { |
897 | case 8: |
898 | return isStore ? AArch64::STRBBui : AArch64::LDRBBui; |
899 | case 16: |
900 | return isStore ? AArch64::STRHHui : AArch64::LDRHHui; |
901 | case 32: |
902 | return isStore ? AArch64::STRWui : AArch64::LDRWui; |
903 | case 64: |
904 | return isStore ? AArch64::STRXui : AArch64::LDRXui; |
905 | } |
906 | break; |
907 | case AArch64::FPRRegBankID: |
908 | switch (OpSize) { |
909 | case 8: |
910 | return isStore ? AArch64::STRBui : AArch64::LDRBui; |
911 | case 16: |
912 | return isStore ? AArch64::STRHui : AArch64::LDRHui; |
913 | case 32: |
914 | return isStore ? AArch64::STRSui : AArch64::LDRSui; |
915 | case 64: |
916 | return isStore ? AArch64::STRDui : AArch64::LDRDui; |
917 | case 128: |
918 | return isStore ? AArch64::STRQui : AArch64::LDRQui; |
919 | } |
920 | break; |
921 | } |
922 | return GenericOpc; |
923 | } |
924 | |
925 | /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg |
926 | /// to \p *To. |
927 | /// |
928 | /// E.g "To = COPY SrcReg:SubReg" |
929 | static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, |
930 | const RegisterBankInfo &RBI, Register SrcReg, |
931 | const TargetRegisterClass *To, unsigned SubReg) { |
932 | assert(SrcReg.isValid() && "Expected a valid source register?" ); |
933 | assert(To && "Destination register class cannot be null" ); |
934 | assert(SubReg && "Expected a valid subregister" ); |
935 | |
936 | MachineIRBuilder MIB(I); |
937 | auto SubRegCopy = |
938 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: 0, SubReg); |
939 | MachineOperand &RegOp = I.getOperand(i: 1); |
940 | RegOp.setReg(SubRegCopy.getReg(Idx: 0)); |
941 | |
942 | // It's possible that the destination register won't be constrained. Make |
943 | // sure that happens. |
944 | if (!I.getOperand(i: 0).getReg().isPhysical()) |
945 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: *To, MRI); |
946 | |
947 | return true; |
948 | } |
949 | |
950 | /// Helper function to get the source and destination register classes for a |
951 | /// copy. Returns a std::pair containing the source register class for the |
952 | /// copy, and the destination register class for the copy. If a register class |
953 | /// cannot be determined, then it will be nullptr. |
954 | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> |
955 | getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, |
956 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
957 | const RegisterBankInfo &RBI) { |
958 | Register DstReg = I.getOperand(i: 0).getReg(); |
959 | Register SrcReg = I.getOperand(i: 1).getReg(); |
960 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
961 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
962 | |
963 | TypeSize DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI); |
964 | TypeSize SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI); |
965 | |
966 | // Special casing for cross-bank copies of s1s. We can technically represent |
967 | // a 1-bit value with any size of register. The minimum size for a GPR is 32 |
968 | // bits. So, we need to put the FPR on 32 bits as well. |
969 | // |
970 | // FIXME: I'm not sure if this case holds true outside of copies. If it does, |
971 | // then we can pull it into the helpers that get the appropriate class for a |
972 | // register bank. Or make a new helper that carries along some constraint |
973 | // information. |
974 | if (SrcRegBank != DstRegBank && |
975 | (DstSize == TypeSize::getFixed(ExactSize: 1) && SrcSize == TypeSize::getFixed(ExactSize: 1))) |
976 | SrcSize = DstSize = TypeSize::getFixed(ExactSize: 32); |
977 | |
978 | return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true), |
979 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)}; |
980 | } |
981 | |
982 | // FIXME: We need some sort of API in RBI/TRI to allow generic code to |
983 | // constrain operands of simple instructions given a TargetRegisterClass |
984 | // and LLT |
985 | static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, |
986 | const RegisterBankInfo &RBI) { |
987 | for (MachineOperand &MO : I.operands()) { |
988 | if (!MO.isReg()) |
989 | continue; |
990 | Register Reg = MO.getReg(); |
991 | if (!Reg) |
992 | continue; |
993 | if (Reg.isPhysical()) |
994 | continue; |
995 | LLT Ty = MRI.getType(Reg); |
996 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); |
997 | const TargetRegisterClass *RC = |
998 | dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank); |
999 | if (!RC) { |
1000 | const RegisterBank &RB = *cast<const RegisterBank *>(Val: RegClassOrBank); |
1001 | RC = getRegClassForTypeOnBank(Ty, RB); |
1002 | if (!RC) { |
1003 | LLVM_DEBUG( |
1004 | dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n" ); |
1005 | break; |
1006 | } |
1007 | } |
1008 | RBI.constrainGenericRegister(Reg, RC: *RC, MRI); |
1009 | } |
1010 | |
1011 | return true; |
1012 | } |
1013 | |
1014 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, |
1015 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
1016 | const RegisterBankInfo &RBI) { |
1017 | Register DstReg = I.getOperand(i: 0).getReg(); |
1018 | Register SrcReg = I.getOperand(i: 1).getReg(); |
1019 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
1020 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
1021 | |
1022 | // Find the correct register classes for the source and destination registers. |
1023 | const TargetRegisterClass *SrcRC; |
1024 | const TargetRegisterClass *DstRC; |
1025 | std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); |
1026 | |
1027 | if (!DstRC) { |
1028 | LLVM_DEBUG(dbgs() << "Unexpected dest size " |
1029 | << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); |
1030 | return false; |
1031 | } |
1032 | |
1033 | // Is this a copy? If so, then we may need to insert a subregister copy. |
1034 | if (I.isCopy()) { |
1035 | // Yes. Check if there's anything to fix up. |
1036 | if (!SrcRC) { |
1037 | LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n" ); |
1038 | return false; |
1039 | } |
1040 | |
1041 | const TypeSize SrcSize = TRI.getRegSizeInBits(RC: *SrcRC); |
1042 | const TypeSize DstSize = TRI.getRegSizeInBits(RC: *DstRC); |
1043 | unsigned SubReg; |
1044 | |
1045 | // If the source bank doesn't support a subregister copy small enough, |
1046 | // then we first need to copy to the destination bank. |
1047 | if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) { |
1048 | const TargetRegisterClass *DstTempRC = |
1049 | getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, /* GetAllRegSet */ true); |
1050 | getSubRegForClass(RC: DstRC, TRI, SubReg); |
1051 | |
1052 | MachineIRBuilder MIB(I); |
1053 | auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg}); |
1054 | copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: 0), To: DstRC, SubReg); |
1055 | } else if (SrcSize > DstSize) { |
1056 | // If the source register is bigger than the destination we need to |
1057 | // perform a subregister copy. |
1058 | const TargetRegisterClass *SubRegRC = |
1059 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1060 | getSubRegForClass(RC: SubRegRC, TRI, SubReg); |
1061 | copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg); |
1062 | } else if (DstSize > SrcSize) { |
1063 | // If the destination register is bigger than the source we need to do |
1064 | // a promotion using SUBREG_TO_REG. |
1065 | const TargetRegisterClass *PromotionRC = |
1066 | getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true); |
1067 | getSubRegForClass(RC: SrcRC, TRI, SubReg); |
1068 | |
1069 | Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC); |
1070 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1071 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: PromoteReg) |
1072 | .addImm(Val: 0) |
1073 | .addUse(RegNo: SrcReg) |
1074 | .addImm(Val: SubReg); |
1075 | MachineOperand &RegOp = I.getOperand(i: 1); |
1076 | RegOp.setReg(PromoteReg); |
1077 | } |
1078 | |
1079 | // If the destination is a physical register, then there's nothing to |
1080 | // change, so we're done. |
1081 | if (DstReg.isPhysical()) |
1082 | return true; |
1083 | } |
1084 | |
1085 | // No need to constrain SrcReg. It will get constrained when we hit another |
1086 | // of its use or its defs. Copies do not have constraints. |
1087 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
1088 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
1089 | << " operand\n" ); |
1090 | return false; |
1091 | } |
1092 | |
1093 | // If this a GPR ZEXT that we want to just reduce down into a copy. |
1094 | // The sizes will be mismatched with the source < 32b but that's ok. |
1095 | if (I.getOpcode() == TargetOpcode::G_ZEXT) { |
1096 | I.setDesc(TII.get(Opcode: AArch64::COPY)); |
1097 | assert(SrcRegBank.getID() == AArch64::GPRRegBankID); |
1098 | return selectCopy(I, TII, MRI, TRI, RBI); |
1099 | } |
1100 | |
1101 | I.setDesc(TII.get(Opcode: AArch64::COPY)); |
1102 | return true; |
1103 | } |
1104 | |
1105 | static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { |
1106 | if (!DstTy.isScalar() || !SrcTy.isScalar()) |
1107 | return GenericOpc; |
1108 | |
1109 | const unsigned DstSize = DstTy.getSizeInBits(); |
1110 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
1111 | |
1112 | switch (DstSize) { |
1113 | case 32: |
1114 | switch (SrcSize) { |
1115 | case 32: |
1116 | switch (GenericOpc) { |
1117 | case TargetOpcode::G_SITOFP: |
1118 | return AArch64::SCVTFUWSri; |
1119 | case TargetOpcode::G_UITOFP: |
1120 | return AArch64::UCVTFUWSri; |
1121 | case TargetOpcode::G_FPTOSI: |
1122 | return AArch64::FCVTZSUWSr; |
1123 | case TargetOpcode::G_FPTOUI: |
1124 | return AArch64::FCVTZUUWSr; |
1125 | default: |
1126 | return GenericOpc; |
1127 | } |
1128 | case 64: |
1129 | switch (GenericOpc) { |
1130 | case TargetOpcode::G_SITOFP: |
1131 | return AArch64::SCVTFUXSri; |
1132 | case TargetOpcode::G_UITOFP: |
1133 | return AArch64::UCVTFUXSri; |
1134 | case TargetOpcode::G_FPTOSI: |
1135 | return AArch64::FCVTZSUWDr; |
1136 | case TargetOpcode::G_FPTOUI: |
1137 | return AArch64::FCVTZUUWDr; |
1138 | default: |
1139 | return GenericOpc; |
1140 | } |
1141 | default: |
1142 | return GenericOpc; |
1143 | } |
1144 | case 64: |
1145 | switch (SrcSize) { |
1146 | case 32: |
1147 | switch (GenericOpc) { |
1148 | case TargetOpcode::G_SITOFP: |
1149 | return AArch64::SCVTFUWDri; |
1150 | case TargetOpcode::G_UITOFP: |
1151 | return AArch64::UCVTFUWDri; |
1152 | case TargetOpcode::G_FPTOSI: |
1153 | return AArch64::FCVTZSUXSr; |
1154 | case TargetOpcode::G_FPTOUI: |
1155 | return AArch64::FCVTZUUXSr; |
1156 | default: |
1157 | return GenericOpc; |
1158 | } |
1159 | case 64: |
1160 | switch (GenericOpc) { |
1161 | case TargetOpcode::G_SITOFP: |
1162 | return AArch64::SCVTFUXDri; |
1163 | case TargetOpcode::G_UITOFP: |
1164 | return AArch64::UCVTFUXDri; |
1165 | case TargetOpcode::G_FPTOSI: |
1166 | return AArch64::FCVTZSUXDr; |
1167 | case TargetOpcode::G_FPTOUI: |
1168 | return AArch64::FCVTZUUXDr; |
1169 | default: |
1170 | return GenericOpc; |
1171 | } |
1172 | default: |
1173 | return GenericOpc; |
1174 | } |
1175 | default: |
1176 | return GenericOpc; |
1177 | }; |
1178 | return GenericOpc; |
1179 | } |
1180 | |
1181 | MachineInstr * |
1182 | AArch64InstructionSelector::emitSelect(Register Dst, Register True, |
1183 | Register False, AArch64CC::CondCode CC, |
1184 | MachineIRBuilder &MIB) const { |
1185 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1186 | assert(RBI.getRegBank(False, MRI, TRI)->getID() == |
1187 | RBI.getRegBank(True, MRI, TRI)->getID() && |
1188 | "Expected both select operands to have the same regbank?" ); |
1189 | LLT Ty = MRI.getType(Reg: True); |
1190 | if (Ty.isVector()) |
1191 | return nullptr; |
1192 | const unsigned Size = Ty.getSizeInBits(); |
1193 | assert((Size == 32 || Size == 64) && |
1194 | "Expected 32 bit or 64 bit select only?" ); |
1195 | const bool Is32Bit = Size == 32; |
1196 | if (RBI.getRegBank(Reg: True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { |
1197 | unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; |
1198 | auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1199 | constrainSelectedInstRegOperands(I&: *FCSel, TII, TRI, RBI); |
1200 | return &*FCSel; |
1201 | } |
1202 | |
1203 | // By default, we'll try and emit a CSEL. |
1204 | unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; |
1205 | bool Optimized = false; |
1206 | auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, |
1207 | &Optimized](Register &Reg, Register &OtherReg, |
1208 | bool Invert) { |
1209 | if (Optimized) |
1210 | return false; |
1211 | |
1212 | // Attempt to fold: |
1213 | // |
1214 | // %sub = G_SUB 0, %x |
1215 | // %select = G_SELECT cc, %reg, %sub |
1216 | // |
1217 | // Into: |
1218 | // %select = CSNEG %reg, %x, cc |
1219 | Register MatchReg; |
1220 | if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) { |
1221 | Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; |
1222 | Reg = MatchReg; |
1223 | if (Invert) { |
1224 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1225 | std::swap(a&: Reg, b&: OtherReg); |
1226 | } |
1227 | return true; |
1228 | } |
1229 | |
1230 | // Attempt to fold: |
1231 | // |
1232 | // %xor = G_XOR %x, -1 |
1233 | // %select = G_SELECT cc, %reg, %xor |
1234 | // |
1235 | // Into: |
1236 | // %select = CSINV %reg, %x, cc |
1237 | if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) { |
1238 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1239 | Reg = MatchReg; |
1240 | if (Invert) { |
1241 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1242 | std::swap(a&: Reg, b&: OtherReg); |
1243 | } |
1244 | return true; |
1245 | } |
1246 | |
1247 | // Attempt to fold: |
1248 | // |
1249 | // %add = G_ADD %x, 1 |
1250 | // %select = G_SELECT cc, %reg, %add |
1251 | // |
1252 | // Into: |
1253 | // %select = CSINC %reg, %x, cc |
1254 | if (mi_match(R: Reg, MRI, |
1255 | P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1)), |
1256 | preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1))))) { |
1257 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1258 | Reg = MatchReg; |
1259 | if (Invert) { |
1260 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1261 | std::swap(a&: Reg, b&: OtherReg); |
1262 | } |
1263 | return true; |
1264 | } |
1265 | |
1266 | return false; |
1267 | }; |
1268 | |
1269 | // Helper lambda which tries to use CSINC/CSINV for the instruction when its |
1270 | // true/false values are constants. |
1271 | // FIXME: All of these patterns already exist in tablegen. We should be |
1272 | // able to import these. |
1273 | auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, |
1274 | &Optimized]() { |
1275 | if (Optimized) |
1276 | return false; |
1277 | auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI); |
1278 | auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI); |
1279 | if (!TrueCst && !FalseCst) |
1280 | return false; |
1281 | |
1282 | Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; |
1283 | if (TrueCst && FalseCst) { |
1284 | int64_t T = TrueCst->Value.getSExtValue(); |
1285 | int64_t F = FalseCst->Value.getSExtValue(); |
1286 | |
1287 | if (T == 0 && F == 1) { |
1288 | // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc |
1289 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1290 | True = ZReg; |
1291 | False = ZReg; |
1292 | return true; |
1293 | } |
1294 | |
1295 | if (T == 0 && F == -1) { |
1296 | // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc |
1297 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1298 | True = ZReg; |
1299 | False = ZReg; |
1300 | return true; |
1301 | } |
1302 | } |
1303 | |
1304 | if (TrueCst) { |
1305 | int64_t T = TrueCst->Value.getSExtValue(); |
1306 | if (T == 1) { |
1307 | // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc |
1308 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1309 | True = False; |
1310 | False = ZReg; |
1311 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1312 | return true; |
1313 | } |
1314 | |
1315 | if (T == -1) { |
1316 | // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc |
1317 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1318 | True = False; |
1319 | False = ZReg; |
1320 | CC = AArch64CC::getInvertedCondCode(Code: CC); |
1321 | return true; |
1322 | } |
1323 | } |
1324 | |
1325 | if (FalseCst) { |
1326 | int64_t F = FalseCst->Value.getSExtValue(); |
1327 | if (F == 1) { |
1328 | // G_SELECT cc, t, 1 -> CSINC t, zreg, cc |
1329 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; |
1330 | False = ZReg; |
1331 | return true; |
1332 | } |
1333 | |
1334 | if (F == -1) { |
1335 | // G_SELECT cc, t, -1 -> CSINC t, zreg, cc |
1336 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; |
1337 | False = ZReg; |
1338 | return true; |
1339 | } |
1340 | } |
1341 | return false; |
1342 | }; |
1343 | |
1344 | Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); |
1345 | Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); |
1346 | Optimized |= TryOptSelectCst(); |
1347 | auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC); |
1348 | constrainSelectedInstRegOperands(I&: *SelectInst, TII, TRI, RBI); |
1349 | return &*SelectInst; |
1350 | } |
1351 | |
1352 | static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { |
1353 | switch (P) { |
1354 | default: |
1355 | llvm_unreachable("Unknown condition code!" ); |
1356 | case CmpInst::ICMP_NE: |
1357 | return AArch64CC::NE; |
1358 | case CmpInst::ICMP_EQ: |
1359 | return AArch64CC::EQ; |
1360 | case CmpInst::ICMP_SGT: |
1361 | return AArch64CC::GT; |
1362 | case CmpInst::ICMP_SGE: |
1363 | return AArch64CC::GE; |
1364 | case CmpInst::ICMP_SLT: |
1365 | return AArch64CC::LT; |
1366 | case CmpInst::ICMP_SLE: |
1367 | return AArch64CC::LE; |
1368 | case CmpInst::ICMP_UGT: |
1369 | return AArch64CC::HI; |
1370 | case CmpInst::ICMP_UGE: |
1371 | return AArch64CC::HS; |
1372 | case CmpInst::ICMP_ULT: |
1373 | return AArch64CC::LO; |
1374 | case CmpInst::ICMP_ULE: |
1375 | return AArch64CC::LS; |
1376 | } |
1377 | } |
1378 | |
1379 | /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. |
1380 | static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, |
1381 | AArch64CC::CondCode &CondCode, |
1382 | AArch64CC::CondCode &CondCode2) { |
1383 | CondCode2 = AArch64CC::AL; |
1384 | switch (CC) { |
1385 | default: |
1386 | llvm_unreachable("Unknown FP condition!" ); |
1387 | case CmpInst::FCMP_OEQ: |
1388 | CondCode = AArch64CC::EQ; |
1389 | break; |
1390 | case CmpInst::FCMP_OGT: |
1391 | CondCode = AArch64CC::GT; |
1392 | break; |
1393 | case CmpInst::FCMP_OGE: |
1394 | CondCode = AArch64CC::GE; |
1395 | break; |
1396 | case CmpInst::FCMP_OLT: |
1397 | CondCode = AArch64CC::MI; |
1398 | break; |
1399 | case CmpInst::FCMP_OLE: |
1400 | CondCode = AArch64CC::LS; |
1401 | break; |
1402 | case CmpInst::FCMP_ONE: |
1403 | CondCode = AArch64CC::MI; |
1404 | CondCode2 = AArch64CC::GT; |
1405 | break; |
1406 | case CmpInst::FCMP_ORD: |
1407 | CondCode = AArch64CC::VC; |
1408 | break; |
1409 | case CmpInst::FCMP_UNO: |
1410 | CondCode = AArch64CC::VS; |
1411 | break; |
1412 | case CmpInst::FCMP_UEQ: |
1413 | CondCode = AArch64CC::EQ; |
1414 | CondCode2 = AArch64CC::VS; |
1415 | break; |
1416 | case CmpInst::FCMP_UGT: |
1417 | CondCode = AArch64CC::HI; |
1418 | break; |
1419 | case CmpInst::FCMP_UGE: |
1420 | CondCode = AArch64CC::PL; |
1421 | break; |
1422 | case CmpInst::FCMP_ULT: |
1423 | CondCode = AArch64CC::LT; |
1424 | break; |
1425 | case CmpInst::FCMP_ULE: |
1426 | CondCode = AArch64CC::LE; |
1427 | break; |
1428 | case CmpInst::FCMP_UNE: |
1429 | CondCode = AArch64CC::NE; |
1430 | break; |
1431 | } |
1432 | } |
1433 | |
1434 | /// Convert an IR fp condition code to an AArch64 CC. |
1435 | /// This differs from changeFPCCToAArch64CC in that it returns cond codes that |
1436 | /// should be AND'ed instead of OR'ed. |
1437 | static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, |
1438 | AArch64CC::CondCode &CondCode, |
1439 | AArch64CC::CondCode &CondCode2) { |
1440 | CondCode2 = AArch64CC::AL; |
1441 | switch (CC) { |
1442 | default: |
1443 | changeFPCCToORAArch64CC(CC, CondCode, CondCode2); |
1444 | assert(CondCode2 == AArch64CC::AL); |
1445 | break; |
1446 | case CmpInst::FCMP_ONE: |
1447 | // (a one b) |
1448 | // == ((a olt b) || (a ogt b)) |
1449 | // == ((a ord b) && (a une b)) |
1450 | CondCode = AArch64CC::VC; |
1451 | CondCode2 = AArch64CC::NE; |
1452 | break; |
1453 | case CmpInst::FCMP_UEQ: |
1454 | // (a ueq b) |
1455 | // == ((a uno b) || (a oeq b)) |
1456 | // == ((a ule b) && (a uge b)) |
1457 | CondCode = AArch64CC::PL; |
1458 | CondCode2 = AArch64CC::LE; |
1459 | break; |
1460 | } |
1461 | } |
1462 | |
1463 | /// Return a register which can be used as a bit to test in a TB(N)Z. |
1464 | static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, |
1465 | MachineRegisterInfo &MRI) { |
1466 | assert(Reg.isValid() && "Expected valid register!" ); |
1467 | bool HasZext = false; |
1468 | while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { |
1469 | unsigned Opc = MI->getOpcode(); |
1470 | |
1471 | if (!MI->getOperand(i: 0).isReg() || |
1472 | !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg())) |
1473 | break; |
1474 | |
1475 | // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. |
1476 | // |
1477 | // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number |
1478 | // on the truncated x is the same as the bit number on x. |
1479 | if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || |
1480 | Opc == TargetOpcode::G_TRUNC) { |
1481 | if (Opc == TargetOpcode::G_ZEXT) |
1482 | HasZext = true; |
1483 | |
1484 | Register NextReg = MI->getOperand(i: 1).getReg(); |
1485 | // Did we find something worth folding? |
1486 | if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(RegNo: NextReg)) |
1487 | break; |
1488 | |
1489 | // NextReg is worth folding. Keep looking. |
1490 | Reg = NextReg; |
1491 | continue; |
1492 | } |
1493 | |
1494 | // Attempt to find a suitable operation with a constant on one side. |
1495 | std::optional<uint64_t> C; |
1496 | Register TestReg; |
1497 | switch (Opc) { |
1498 | default: |
1499 | break; |
1500 | case TargetOpcode::G_AND: |
1501 | case TargetOpcode::G_XOR: { |
1502 | TestReg = MI->getOperand(i: 1).getReg(); |
1503 | Register ConstantReg = MI->getOperand(i: 2).getReg(); |
1504 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1505 | if (!VRegAndVal) { |
1506 | // AND commutes, check the other side for a constant. |
1507 | // FIXME: Can we canonicalize the constant so that it's always on the |
1508 | // same side at some point earlier? |
1509 | std::swap(a&: ConstantReg, b&: TestReg); |
1510 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
1511 | } |
1512 | if (VRegAndVal) { |
1513 | if (HasZext) |
1514 | C = VRegAndVal->Value.getZExtValue(); |
1515 | else |
1516 | C = VRegAndVal->Value.getSExtValue(); |
1517 | } |
1518 | break; |
1519 | } |
1520 | case TargetOpcode::G_ASHR: |
1521 | case TargetOpcode::G_LSHR: |
1522 | case TargetOpcode::G_SHL: { |
1523 | TestReg = MI->getOperand(i: 1).getReg(); |
1524 | auto VRegAndVal = |
1525 | getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: 2).getReg(), MRI); |
1526 | if (VRegAndVal) |
1527 | C = VRegAndVal->Value.getSExtValue(); |
1528 | break; |
1529 | } |
1530 | } |
1531 | |
1532 | // Didn't find a constant or viable register. Bail out of the loop. |
1533 | if (!C || !TestReg.isValid()) |
1534 | break; |
1535 | |
1536 | // We found a suitable instruction with a constant. Check to see if we can |
1537 | // walk through the instruction. |
1538 | Register NextReg; |
1539 | unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits(); |
1540 | switch (Opc) { |
1541 | default: |
1542 | break; |
1543 | case TargetOpcode::G_AND: |
1544 | // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. |
1545 | if ((*C >> Bit) & 1) |
1546 | NextReg = TestReg; |
1547 | break; |
1548 | case TargetOpcode::G_SHL: |
1549 | // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in |
1550 | // the type of the register. |
1551 | if (*C <= Bit && (Bit - *C) < TestRegSize) { |
1552 | NextReg = TestReg; |
1553 | Bit = Bit - *C; |
1554 | } |
1555 | break; |
1556 | case TargetOpcode::G_ASHR: |
1557 | // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits |
1558 | // in x |
1559 | NextReg = TestReg; |
1560 | Bit = Bit + *C; |
1561 | if (Bit >= TestRegSize) |
1562 | Bit = TestRegSize - 1; |
1563 | break; |
1564 | case TargetOpcode::G_LSHR: |
1565 | // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x |
1566 | if ((Bit + *C) < TestRegSize) { |
1567 | NextReg = TestReg; |
1568 | Bit = Bit + *C; |
1569 | } |
1570 | break; |
1571 | case TargetOpcode::G_XOR: |
1572 | // We can walk through a G_XOR by inverting whether we use tbz/tbnz when |
1573 | // appropriate. |
1574 | // |
1575 | // e.g. If x' = xor x, c, and the b-th bit is set in c then |
1576 | // |
1577 | // tbz x', b -> tbnz x, b |
1578 | // |
1579 | // Because x' only has the b-th bit set if x does not. |
1580 | if ((*C >> Bit) & 1) |
1581 | Invert = !Invert; |
1582 | NextReg = TestReg; |
1583 | break; |
1584 | } |
1585 | |
1586 | // Check if we found anything worth folding. |
1587 | if (!NextReg.isValid()) |
1588 | return Reg; |
1589 | Reg = NextReg; |
1590 | } |
1591 | |
1592 | return Reg; |
1593 | } |
1594 | |
1595 | MachineInstr *AArch64InstructionSelector::emitTestBit( |
1596 | Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, |
1597 | MachineIRBuilder &MIB) const { |
1598 | assert(TestReg.isValid()); |
1599 | assert(ProduceNonFlagSettingCondBr && |
1600 | "Cannot emit TB(N)Z with speculation tracking!" ); |
1601 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1602 | |
1603 | // Attempt to optimize the test bit by walking over instructions. |
1604 | TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI); |
1605 | LLT Ty = MRI.getType(Reg: TestReg); |
1606 | unsigned Size = Ty.getSizeInBits(); |
1607 | assert(!Ty.isVector() && "Expected a scalar!" ); |
1608 | assert(Bit < 64 && "Bit is too large!" ); |
1609 | |
1610 | // When the test register is a 64-bit register, we have to narrow to make |
1611 | // TBNZW work. |
1612 | bool UseWReg = Bit < 32; |
1613 | unsigned NecessarySize = UseWReg ? 32 : 64; |
1614 | if (Size != NecessarySize) |
1615 | TestReg = moveScalarRegClass( |
1616 | Reg: TestReg, RC: UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, |
1617 | MIB); |
1618 | |
1619 | static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, |
1620 | {AArch64::TBZW, AArch64::TBNZW}}; |
1621 | unsigned Opc = OpcTable[UseWReg][IsNegative]; |
1622 | auto TestBitMI = |
1623 | MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB); |
1624 | constrainSelectedInstRegOperands(I&: *TestBitMI, TII, TRI, RBI); |
1625 | return &*TestBitMI; |
1626 | } |
1627 | |
1628 | bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( |
1629 | MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, |
1630 | MachineIRBuilder &MIB) const { |
1631 | assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?" ); |
1632 | // Given something like this: |
1633 | // |
1634 | // %x = ...Something... |
1635 | // %one = G_CONSTANT i64 1 |
1636 | // %zero = G_CONSTANT i64 0 |
1637 | // %and = G_AND %x, %one |
1638 | // %cmp = G_ICMP intpred(ne), %and, %zero |
1639 | // %cmp_trunc = G_TRUNC %cmp |
1640 | // G_BRCOND %cmp_trunc, %bb.3 |
1641 | // |
1642 | // We want to try and fold the AND into the G_BRCOND and produce either a |
1643 | // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). |
1644 | // |
1645 | // In this case, we'd get |
1646 | // |
1647 | // TBNZ %x %bb.3 |
1648 | // |
1649 | |
1650 | // Check if the AND has a constant on its RHS which we can use as a mask. |
1651 | // If it's a power of 2, then it's the same as checking a specific bit. |
1652 | // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) |
1653 | auto MaybeBit = getIConstantVRegValWithLookThrough( |
1654 | VReg: AndInst.getOperand(i: 2).getReg(), MRI: *MIB.getMRI()); |
1655 | if (!MaybeBit) |
1656 | return false; |
1657 | |
1658 | int32_t Bit = MaybeBit->Value.exactLogBase2(); |
1659 | if (Bit < 0) |
1660 | return false; |
1661 | |
1662 | Register TestReg = AndInst.getOperand(i: 1).getReg(); |
1663 | |
1664 | // Emit a TB(N)Z. |
1665 | emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB); |
1666 | return true; |
1667 | } |
1668 | |
1669 | MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, |
1670 | bool IsNegative, |
1671 | MachineBasicBlock *DestMBB, |
1672 | MachineIRBuilder &MIB) const { |
1673 | assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!" ); |
1674 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1675 | assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == |
1676 | AArch64::GPRRegBankID && |
1677 | "Expected GPRs only?" ); |
1678 | auto Ty = MRI.getType(Reg: CompareReg); |
1679 | unsigned Width = Ty.getSizeInBits(); |
1680 | assert(!Ty.isVector() && "Expected scalar only?" ); |
1681 | assert(Width <= 64 && "Expected width to be at most 64?" ); |
1682 | static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, |
1683 | {AArch64::CBNZW, AArch64::CBNZX}}; |
1684 | unsigned Opc = OpcTable[IsNegative][Width == 64]; |
1685 | auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB); |
1686 | constrainSelectedInstRegOperands(I&: *BranchMI, TII, TRI, RBI); |
1687 | return &*BranchMI; |
1688 | } |
1689 | |
1690 | bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( |
1691 | MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { |
1692 | assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); |
1693 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1694 | // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't |
1695 | // totally clean. Some of them require two branches to implement. |
1696 | auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: 1).getPredicate(); |
1697 | emitFPCompare(LHS: FCmp.getOperand(i: 2).getReg(), RHS: FCmp.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
1698 | Pred); |
1699 | AArch64CC::CondCode CC1, CC2; |
1700 | changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2); |
1701 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1702 | MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC1).addMBB(MBB: DestMBB); |
1703 | if (CC2 != AArch64CC::AL) |
1704 | MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC2).addMBB(MBB: DestMBB); |
1705 | I.eraseFromParent(); |
1706 | return true; |
1707 | } |
1708 | |
1709 | bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( |
1710 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1711 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1712 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1713 | // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. |
1714 | // |
1715 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1716 | // instructions will not be produced, as they are conditional branch |
1717 | // instructions that do not set flags. |
1718 | if (!ProduceNonFlagSettingCondBr) |
1719 | return false; |
1720 | |
1721 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
1722 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1723 | auto Pred = |
1724 | static_cast<CmpInst::Predicate>(ICmp.getOperand(i: 1).getPredicate()); |
1725 | Register LHS = ICmp.getOperand(i: 2).getReg(); |
1726 | Register RHS = ICmp.getOperand(i: 3).getReg(); |
1727 | |
1728 | // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. |
1729 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1730 | MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1731 | |
1732 | // When we can emit a TB(N)Z, prefer that. |
1733 | // |
1734 | // Handle non-commutative condition codes first. |
1735 | // Note that we don't want to do this when we have a G_AND because it can |
1736 | // become a tst. The tst will make the test bit in the TB(N)Z redundant. |
1737 | if (VRegAndVal && !AndInst) { |
1738 | int64_t C = VRegAndVal->Value.getSExtValue(); |
1739 | |
1740 | // When we have a greater-than comparison, we can just test if the msb is |
1741 | // zero. |
1742 | if (C == -1 && Pred == CmpInst::ICMP_SGT) { |
1743 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1744 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1745 | I.eraseFromParent(); |
1746 | return true; |
1747 | } |
1748 | |
1749 | // When we have a less than comparison, we can just test if the msb is not |
1750 | // zero. |
1751 | if (C == 0 && Pred == CmpInst::ICMP_SLT) { |
1752 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1753 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ true, DstMBB: DestMBB, MIB); |
1754 | I.eraseFromParent(); |
1755 | return true; |
1756 | } |
1757 | |
1758 | // Inversely, if we have a signed greater-than-or-equal comparison to zero, |
1759 | // we can test if the msb is zero. |
1760 | if (C == 0 && Pred == CmpInst::ICMP_SGE) { |
1761 | uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1; |
1762 | emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB); |
1763 | I.eraseFromParent(); |
1764 | return true; |
1765 | } |
1766 | } |
1767 | |
1768 | // Attempt to handle commutative condition codes. Right now, that's only |
1769 | // eq/ne. |
1770 | if (ICmpInst::isEquality(P: Pred)) { |
1771 | if (!VRegAndVal) { |
1772 | std::swap(a&: RHS, b&: LHS); |
1773 | VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
1774 | AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI); |
1775 | } |
1776 | |
1777 | if (VRegAndVal && VRegAndVal->Value == 0) { |
1778 | // If there's a G_AND feeding into this branch, try to fold it away by |
1779 | // emitting a TB(N)Z instead. |
1780 | // |
1781 | // Note: If we have LT, then it *is* possible to fold, but it wouldn't be |
1782 | // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding |
1783 | // would be redundant. |
1784 | if (AndInst && |
1785 | tryOptAndIntoCompareBranch( |
1786 | AndInst&: *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) { |
1787 | I.eraseFromParent(); |
1788 | return true; |
1789 | } |
1790 | |
1791 | // Otherwise, try to emit a CB(N)Z instead. |
1792 | auto LHSTy = MRI.getType(Reg: LHS); |
1793 | if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { |
1794 | emitCBZ(CompareReg: LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); |
1795 | I.eraseFromParent(); |
1796 | return true; |
1797 | } |
1798 | } |
1799 | } |
1800 | |
1801 | return false; |
1802 | } |
1803 | |
1804 | bool AArch64InstructionSelector::selectCompareBranchFedByICmp( |
1805 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { |
1806 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); |
1807 | assert(I.getOpcode() == TargetOpcode::G_BRCOND); |
1808 | if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) |
1809 | return true; |
1810 | |
1811 | // Couldn't optimize. Emit a compare + a Bcc. |
1812 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1813 | auto PredOp = ICmp.getOperand(i: 1); |
1814 | emitIntegerCompare(LHS&: ICmp.getOperand(i: 2), RHS&: ICmp.getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
1815 | const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( |
1816 | P: static_cast<CmpInst::Predicate>(PredOp.getPredicate())); |
1817 | MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC).addMBB(MBB: DestMBB); |
1818 | I.eraseFromParent(); |
1819 | return true; |
1820 | } |
1821 | |
1822 | bool AArch64InstructionSelector::selectCompareBranch( |
1823 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { |
1824 | Register CondReg = I.getOperand(i: 0).getReg(); |
1825 | MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg); |
1826 | // Try to select the G_BRCOND using whatever is feeding the condition if |
1827 | // possible. |
1828 | unsigned CCMIOpc = CCMI->getOpcode(); |
1829 | if (CCMIOpc == TargetOpcode::G_FCMP) |
1830 | return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB); |
1831 | if (CCMIOpc == TargetOpcode::G_ICMP) |
1832 | return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB); |
1833 | |
1834 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z |
1835 | // instructions will not be produced, as they are conditional branch |
1836 | // instructions that do not set flags. |
1837 | if (ProduceNonFlagSettingCondBr) { |
1838 | emitTestBit(TestReg: CondReg, /*Bit = */ 0, /*IsNegative = */ true, |
1839 | DstMBB: I.getOperand(i: 1).getMBB(), MIB); |
1840 | I.eraseFromParent(); |
1841 | return true; |
1842 | } |
1843 | |
1844 | // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. |
1845 | auto TstMI = |
1846 | MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {LLT::scalar(SizeInBits: 32)}, SrcOps: {CondReg}).addImm(Val: 1); |
1847 | constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI); |
1848 | auto Bcc = MIB.buildInstr(Opcode: AArch64::Bcc) |
1849 | .addImm(Val: AArch64CC::NE) |
1850 | .addMBB(MBB: I.getOperand(i: 1).getMBB()); |
1851 | I.eraseFromParent(); |
1852 | return constrainSelectedInstRegOperands(I&: *Bcc, TII, TRI, RBI); |
1853 | } |
1854 | |
1855 | /// Returns the element immediate value of a vector shift operand if found. |
1856 | /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. |
1857 | static std::optional<int64_t> getVectorShiftImm(Register Reg, |
1858 | MachineRegisterInfo &MRI) { |
1859 | assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand" ); |
1860 | MachineInstr *OpMI = MRI.getVRegDef(Reg); |
1861 | return getAArch64VectorSplatScalar(MI: *OpMI, MRI); |
1862 | } |
1863 | |
1864 | /// Matches and returns the shift immediate value for a SHL instruction given |
1865 | /// a shift operand. |
1866 | static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, |
1867 | MachineRegisterInfo &MRI) { |
1868 | std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); |
1869 | if (!ShiftImm) |
1870 | return std::nullopt; |
1871 | // Check the immediate is in range for a SHL. |
1872 | int64_t Imm = *ShiftImm; |
1873 | if (Imm < 0) |
1874 | return std::nullopt; |
1875 | switch (SrcTy.getElementType().getSizeInBits()) { |
1876 | default: |
1877 | LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift" ); |
1878 | return std::nullopt; |
1879 | case 8: |
1880 | if (Imm > 7) |
1881 | return std::nullopt; |
1882 | break; |
1883 | case 16: |
1884 | if (Imm > 15) |
1885 | return std::nullopt; |
1886 | break; |
1887 | case 32: |
1888 | if (Imm > 31) |
1889 | return std::nullopt; |
1890 | break; |
1891 | case 64: |
1892 | if (Imm > 63) |
1893 | return std::nullopt; |
1894 | break; |
1895 | } |
1896 | return Imm; |
1897 | } |
1898 | |
1899 | bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, |
1900 | MachineRegisterInfo &MRI) { |
1901 | assert(I.getOpcode() == TargetOpcode::G_SHL); |
1902 | Register DstReg = I.getOperand(i: 0).getReg(); |
1903 | const LLT Ty = MRI.getType(Reg: DstReg); |
1904 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1905 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1906 | |
1907 | if (!Ty.isVector()) |
1908 | return false; |
1909 | |
1910 | // Check if we have a vector of constants on RHS that we can select as the |
1911 | // immediate form. |
1912 | std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI); |
1913 | |
1914 | unsigned Opc = 0; |
1915 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1916 | Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; |
1917 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1918 | Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; |
1919 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1920 | Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; |
1921 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1922 | Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; |
1923 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1924 | Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; |
1925 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1926 | Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; |
1927 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1928 | Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; |
1929 | } else { |
1930 | LLVM_DEBUG(dbgs() << "Unhandled G_SHL type" ); |
1931 | return false; |
1932 | } |
1933 | |
1934 | auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg}); |
1935 | if (ImmVal) |
1936 | Shl.addImm(Val: *ImmVal); |
1937 | else |
1938 | Shl.addUse(RegNo: Src2Reg); |
1939 | constrainSelectedInstRegOperands(I&: *Shl, TII, TRI, RBI); |
1940 | I.eraseFromParent(); |
1941 | return true; |
1942 | } |
1943 | |
1944 | bool AArch64InstructionSelector::selectVectorAshrLshr( |
1945 | MachineInstr &I, MachineRegisterInfo &MRI) { |
1946 | assert(I.getOpcode() == TargetOpcode::G_ASHR || |
1947 | I.getOpcode() == TargetOpcode::G_LSHR); |
1948 | Register DstReg = I.getOperand(i: 0).getReg(); |
1949 | const LLT Ty = MRI.getType(Reg: DstReg); |
1950 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
1951 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
1952 | |
1953 | if (!Ty.isVector()) |
1954 | return false; |
1955 | |
1956 | bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; |
1957 | |
1958 | // We expect the immediate case to be lowered in the PostLegalCombiner to |
1959 | // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. |
1960 | |
1961 | // There is not a shift right register instruction, but the shift left |
1962 | // register instruction takes a signed value, where negative numbers specify a |
1963 | // right shift. |
1964 | |
1965 | unsigned Opc = 0; |
1966 | unsigned NegOpc = 0; |
1967 | const TargetRegisterClass *RC = |
1968 | getRegClassForTypeOnBank(Ty, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID)); |
1969 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1970 | Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; |
1971 | NegOpc = AArch64::NEGv2i64; |
1972 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
1973 | Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; |
1974 | NegOpc = AArch64::NEGv4i32; |
1975 | } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) { |
1976 | Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; |
1977 | NegOpc = AArch64::NEGv2i32; |
1978 | } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) { |
1979 | Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; |
1980 | NegOpc = AArch64::NEGv4i16; |
1981 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) { |
1982 | Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; |
1983 | NegOpc = AArch64::NEGv8i16; |
1984 | } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) { |
1985 | Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; |
1986 | NegOpc = AArch64::NEGv16i8; |
1987 | } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) { |
1988 | Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; |
1989 | NegOpc = AArch64::NEGv8i8; |
1990 | } else { |
1991 | LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type" ); |
1992 | return false; |
1993 | } |
1994 | |
1995 | auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg}); |
1996 | constrainSelectedInstRegOperands(I&: *Neg, TII, TRI, RBI); |
1997 | auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg}); |
1998 | constrainSelectedInstRegOperands(I&: *SShl, TII, TRI, RBI); |
1999 | I.eraseFromParent(); |
2000 | return true; |
2001 | } |
2002 | |
2003 | bool AArch64InstructionSelector::selectVaStartAAPCS( |
2004 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
2005 | |
2006 | if (STI.isCallingConvWin64(CC: MF.getFunction().getCallingConv(), |
2007 | IsVarArg: MF.getFunction().isVarArg())) |
2008 | return false; |
2009 | |
2010 | // The layout of the va_list struct is specified in the AArch64 Procedure Call |
2011 | // Standard, section 10.1.5. |
2012 | |
2013 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
2014 | const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8; |
2015 | const auto *PtrRegClass = |
2016 | STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; |
2017 | |
2018 | const MCInstrDesc &MCIDAddAddr = |
2019 | TII.get(Opcode: STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri); |
2020 | const MCInstrDesc &MCIDStoreAddr = |
2021 | TII.get(Opcode: STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui); |
2022 | |
2023 | /* |
2024 | * typedef struct va_list { |
2025 | * void * stack; // next stack param |
2026 | * void * gr_top; // end of GP arg reg save area |
2027 | * void * vr_top; // end of FP/SIMD arg reg save area |
2028 | * int gr_offs; // offset from gr_top to next GP register arg |
2029 | * int vr_offs; // offset from vr_top to next FP/SIMD register arg |
2030 | * } va_list; |
2031 | */ |
2032 | const auto VAList = I.getOperand(i: 0).getReg(); |
2033 | |
2034 | // Our current offset in bytes from the va_list struct (VAList). |
2035 | unsigned OffsetBytes = 0; |
2036 | |
2037 | // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes |
2038 | // and increment OffsetBytes by PtrSize. |
2039 | const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) { |
2040 | const Register Top = MRI.createVirtualRegister(RegClass: PtrRegClass); |
2041 | auto MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDAddAddr) |
2042 | .addDef(RegNo: Top) |
2043 | .addFrameIndex(Idx: FrameIndex) |
2044 | .addImm(Val: Imm) |
2045 | .addImm(Val: 0); |
2046 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2047 | |
2048 | const auto *MMO = *I.memoperands_begin(); |
2049 | MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDStoreAddr) |
2050 | .addUse(RegNo: Top) |
2051 | .addUse(RegNo: VAList) |
2052 | .addImm(Val: OffsetBytes / PtrSize) |
2053 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2054 | PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes), |
2055 | F: MachineMemOperand::MOStore, Size: PtrSize, BaseAlignment: MMO->getBaseAlign())); |
2056 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2057 | |
2058 | OffsetBytes += PtrSize; |
2059 | }; |
2060 | |
2061 | // void* stack at offset 0 |
2062 | PushAddress(FuncInfo->getVarArgsStackIndex(), 0); |
2063 | |
2064 | // void* gr_top at offset 8 (4 on ILP32) |
2065 | const unsigned GPRSize = FuncInfo->getVarArgsGPRSize(); |
2066 | PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize); |
2067 | |
2068 | // void* vr_top at offset 16 (8 on ILP32) |
2069 | const unsigned FPRSize = FuncInfo->getVarArgsFPRSize(); |
2070 | PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize); |
2071 | |
2072 | // Helper function to store a 4-byte integer constant to VAList at offset |
2073 | // OffsetBytes, and increment OffsetBytes by 4. |
2074 | const auto PushIntConstant = [&](const int32_t Value) { |
2075 | constexpr int IntSize = 4; |
2076 | const Register Temp = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
2077 | auto MIB = |
2078 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVi32imm)) |
2079 | .addDef(RegNo: Temp) |
2080 | .addImm(Val: Value); |
2081 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2082 | |
2083 | const auto *MMO = *I.memoperands_begin(); |
2084 | MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRWui)) |
2085 | .addUse(RegNo: Temp) |
2086 | .addUse(RegNo: VAList) |
2087 | .addImm(Val: OffsetBytes / IntSize) |
2088 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2089 | PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes), |
2090 | F: MachineMemOperand::MOStore, Size: IntSize, BaseAlignment: MMO->getBaseAlign())); |
2091 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2092 | OffsetBytes += IntSize; |
2093 | }; |
2094 | |
2095 | // int gr_offs at offset 24 (12 on ILP32) |
2096 | PushIntConstant(-static_cast<int32_t>(GPRSize)); |
2097 | |
2098 | // int vr_offs at offset 28 (16 on ILP32) |
2099 | PushIntConstant(-static_cast<int32_t>(FPRSize)); |
2100 | |
2101 | assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset" ); |
2102 | |
2103 | I.eraseFromParent(); |
2104 | return true; |
2105 | } |
2106 | |
2107 | bool AArch64InstructionSelector::selectVaStartDarwin( |
2108 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { |
2109 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
2110 | Register ListReg = I.getOperand(i: 0).getReg(); |
2111 | |
2112 | Register ArgsAddrReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
2113 | |
2114 | int FrameIdx = FuncInfo->getVarArgsStackIndex(); |
2115 | if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64( |
2116 | CC: MF.getFunction().getCallingConv(), IsVarArg: MF.getFunction().isVarArg())) { |
2117 | FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 |
2118 | ? FuncInfo->getVarArgsGPRIndex() |
2119 | : FuncInfo->getVarArgsStackIndex(); |
2120 | } |
2121 | |
2122 | auto MIB = |
2123 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri)) |
2124 | .addDef(RegNo: ArgsAddrReg) |
2125 | .addFrameIndex(Idx: FrameIdx) |
2126 | .addImm(Val: 0) |
2127 | .addImm(Val: 0); |
2128 | |
2129 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2130 | |
2131 | MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRXui)) |
2132 | .addUse(RegNo: ArgsAddrReg) |
2133 | .addUse(RegNo: ListReg) |
2134 | .addImm(Val: 0) |
2135 | .addMemOperand(MMO: *I.memoperands_begin()); |
2136 | |
2137 | constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI); |
2138 | I.eraseFromParent(); |
2139 | return true; |
2140 | } |
2141 | |
2142 | void AArch64InstructionSelector::materializeLargeCMVal( |
2143 | MachineInstr &I, const Value *V, unsigned OpFlags) { |
2144 | MachineBasicBlock &MBB = *I.getParent(); |
2145 | MachineFunction &MF = *MBB.getParent(); |
2146 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2147 | |
2148 | auto MovZ = MIB.buildInstr(Opc: AArch64::MOVZXi, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {}); |
2149 | MovZ->addOperand(MF, Op: I.getOperand(i: 1)); |
2150 | MovZ->getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_G0 | |
2151 | AArch64II::MO_NC); |
2152 | MovZ->addOperand(MF, Op: MachineOperand::CreateImm(Val: 0)); |
2153 | constrainSelectedInstRegOperands(I&: *MovZ, TII, TRI, RBI); |
2154 | |
2155 | auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, |
2156 | Register ForceDstReg) { |
2157 | Register DstReg = ForceDstReg |
2158 | ? ForceDstReg |
2159 | : MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
2160 | auto MovI = MIB.buildInstr(Opcode: AArch64::MOVKXi).addDef(RegNo: DstReg).addUse(RegNo: SrcReg); |
2161 | if (auto *GV = dyn_cast<GlobalValue>(Val: V)) { |
2162 | MovI->addOperand(MF, Op: MachineOperand::CreateGA( |
2163 | GV, Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags)); |
2164 | } else { |
2165 | MovI->addOperand( |
2166 | MF, Op: MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V), |
2167 | Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags)); |
2168 | } |
2169 | MovI->addOperand(MF, Op: MachineOperand::CreateImm(Val: Offset)); |
2170 | constrainSelectedInstRegOperands(I&: *MovI, TII, TRI, RBI); |
2171 | return DstReg; |
2172 | }; |
2173 | Register DstReg = BuildMovK(MovZ.getReg(Idx: 0), |
2174 | AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); |
2175 | DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); |
2176 | BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(i: 0).getReg()); |
2177 | } |
2178 | |
2179 | bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { |
2180 | MachineBasicBlock &MBB = *I.getParent(); |
2181 | MachineFunction &MF = *MBB.getParent(); |
2182 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2183 | |
2184 | switch (I.getOpcode()) { |
2185 | case TargetOpcode::G_STORE: { |
2186 | bool Changed = contractCrossBankCopyIntoStore(I, MRI); |
2187 | MachineOperand &SrcOp = I.getOperand(i: 0); |
2188 | if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) { |
2189 | // Allow matching with imported patterns for stores of pointers. Unlike |
2190 | // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy |
2191 | // and constrain. |
2192 | auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: SrcOp); |
2193 | Register NewSrc = Copy.getReg(Idx: 0); |
2194 | SrcOp.setReg(NewSrc); |
2195 | RBI.constrainGenericRegister(Reg: NewSrc, RC: AArch64::GPR64RegClass, MRI); |
2196 | Changed = true; |
2197 | } |
2198 | return Changed; |
2199 | } |
2200 | case TargetOpcode::G_PTR_ADD: { |
2201 | // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer |
2202 | // arithmetic semantics instead of falling back to regular arithmetic. |
2203 | const auto &TL = STI.getTargetLowering(); |
2204 | if (TL->shouldPreservePtrArith(F: MF.getFunction(), PtrVT: EVT())) |
2205 | return false; |
2206 | return convertPtrAddToAdd(I, MRI); |
2207 | } |
2208 | case TargetOpcode::G_LOAD: { |
2209 | // For scalar loads of pointers, we try to convert the dest type from p0 |
2210 | // to s64 so that our imported patterns can match. Like with the G_PTR_ADD |
2211 | // conversion, this should be ok because all users should have been |
2212 | // selected already, so the type doesn't matter for them. |
2213 | Register DstReg = I.getOperand(i: 0).getReg(); |
2214 | const LLT DstTy = MRI.getType(Reg: DstReg); |
2215 | if (!DstTy.isPointer()) |
2216 | return false; |
2217 | MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: 64)); |
2218 | return true; |
2219 | } |
2220 | case AArch64::G_DUP: { |
2221 | // Convert the type from p0 to s64 to help selection. |
2222 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2223 | if (!DstTy.isPointerVector()) |
2224 | return false; |
2225 | auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 1).getReg()); |
2226 | MRI.setType(VReg: I.getOperand(i: 0).getReg(), |
2227 | Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64))); |
2228 | MRI.setRegClass(Reg: NewSrc.getReg(Idx: 0), RC: &AArch64::GPR64RegClass); |
2229 | I.getOperand(i: 1).setReg(NewSrc.getReg(Idx: 0)); |
2230 | return true; |
2231 | } |
2232 | case AArch64::G_INSERT_VECTOR_ELT: { |
2233 | // Convert the type from p0 to s64 to help selection. |
2234 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2235 | LLT SrcVecTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
2236 | if (!SrcVecTy.isPointerVector()) |
2237 | return false; |
2238 | auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 2).getReg()); |
2239 | MRI.setType(VReg: I.getOperand(i: 1).getReg(), |
2240 | Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64))); |
2241 | MRI.setType(VReg: I.getOperand(i: 0).getReg(), |
2242 | Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64))); |
2243 | MRI.setRegClass(Reg: NewSrc.getReg(Idx: 0), RC: &AArch64::GPR64RegClass); |
2244 | I.getOperand(i: 2).setReg(NewSrc.getReg(Idx: 0)); |
2245 | return true; |
2246 | } |
2247 | case TargetOpcode::G_UITOFP: |
2248 | case TargetOpcode::G_SITOFP: { |
2249 | // If both source and destination regbanks are FPR, then convert the opcode |
2250 | // to G_SITOF so that the importer can select it to an fpr variant. |
2251 | // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank |
2252 | // copy. |
2253 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2254 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2255 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2256 | if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) |
2257 | return false; |
2258 | |
2259 | if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { |
2260 | if (I.getOpcode() == TargetOpcode::G_SITOFP) |
2261 | I.setDesc(TII.get(Opcode: AArch64::G_SITOF)); |
2262 | else |
2263 | I.setDesc(TII.get(Opcode: AArch64::G_UITOF)); |
2264 | return true; |
2265 | } |
2266 | return false; |
2267 | } |
2268 | default: |
2269 | return false; |
2270 | } |
2271 | } |
2272 | |
2273 | /// This lowering tries to look for G_PTR_ADD instructions and then converts |
2274 | /// them to a standard G_ADD with a COPY on the source. |
2275 | /// |
2276 | /// The motivation behind this is to expose the add semantics to the imported |
2277 | /// tablegen patterns. We shouldn't need to check for uses being loads/stores, |
2278 | /// because the selector works bottom up, uses before defs. By the time we |
2279 | /// end up trying to select a G_PTR_ADD, we should have already attempted to |
2280 | /// fold this into addressing modes and were therefore unsuccessful. |
2281 | bool AArch64InstructionSelector::convertPtrAddToAdd( |
2282 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2283 | assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD" ); |
2284 | Register DstReg = I.getOperand(i: 0).getReg(); |
2285 | Register AddOp1Reg = I.getOperand(i: 1).getReg(); |
2286 | const LLT PtrTy = MRI.getType(Reg: DstReg); |
2287 | if (PtrTy.getAddressSpace() != 0) |
2288 | return false; |
2289 | |
2290 | const LLT CastPtrTy = |
2291 | PtrTy.isVector() ? LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) : LLT::scalar(SizeInBits: 64); |
2292 | auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg); |
2293 | // Set regbanks on the registers. |
2294 | if (PtrTy.isVector()) |
2295 | MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::FPRRegBankID)); |
2296 | else |
2297 | MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID)); |
2298 | |
2299 | // Now turn the %dst(p0) = G_PTR_ADD %base, off into: |
2300 | // %dst(intty) = G_ADD %intbase, off |
2301 | I.setDesc(TII.get(Opcode: TargetOpcode::G_ADD)); |
2302 | MRI.setType(VReg: DstReg, Ty: CastPtrTy); |
2303 | I.getOperand(i: 1).setReg(PtrToInt.getReg(Idx: 0)); |
2304 | if (!select(I&: *PtrToInt)) { |
2305 | LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd" ); |
2306 | return false; |
2307 | } |
2308 | |
2309 | // Also take the opportunity here to try to do some optimization. |
2310 | // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. |
2311 | Register NegatedReg; |
2312 | if (!mi_match(R: I.getOperand(i: 2).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg)))) |
2313 | return true; |
2314 | I.getOperand(i: 2).setReg(NegatedReg); |
2315 | I.setDesc(TII.get(Opcode: TargetOpcode::G_SUB)); |
2316 | return true; |
2317 | } |
2318 | |
2319 | bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, |
2320 | MachineRegisterInfo &MRI) { |
2321 | // We try to match the immediate variant of LSL, which is actually an alias |
2322 | // for a special case of UBFM. Otherwise, we fall back to the imported |
2323 | // selector which will match the register variant. |
2324 | assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op" ); |
2325 | const auto &MO = I.getOperand(i: 2); |
2326 | auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI); |
2327 | if (!VRegAndVal) |
2328 | return false; |
2329 | |
2330 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2331 | if (DstTy.isVector()) |
2332 | return false; |
2333 | bool Is64Bit = DstTy.getSizeInBits() == 64; |
2334 | auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO); |
2335 | auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO); |
2336 | |
2337 | if (!Imm1Fn || !Imm2Fn) |
2338 | return false; |
2339 | |
2340 | auto NewI = |
2341 | MIB.buildInstr(Opc: Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, |
2342 | DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {I.getOperand(i: 1).getReg()}); |
2343 | |
2344 | for (auto &RenderFn : *Imm1Fn) |
2345 | RenderFn(NewI); |
2346 | for (auto &RenderFn : *Imm2Fn) |
2347 | RenderFn(NewI); |
2348 | |
2349 | I.eraseFromParent(); |
2350 | return constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI); |
2351 | } |
2352 | |
2353 | bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( |
2354 | MachineInstr &I, MachineRegisterInfo &MRI) { |
2355 | assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE" ); |
2356 | // If we're storing a scalar, it doesn't matter what register bank that |
2357 | // scalar is on. All that matters is the size. |
2358 | // |
2359 | // So, if we see something like this (with a 32-bit scalar as an example): |
2360 | // |
2361 | // %x:gpr(s32) = ... something ... |
2362 | // %y:fpr(s32) = COPY %x:gpr(s32) |
2363 | // G_STORE %y:fpr(s32) |
2364 | // |
2365 | // We can fix this up into something like this: |
2366 | // |
2367 | // G_STORE %x:gpr(s32) |
2368 | // |
2369 | // And then continue the selection process normally. |
2370 | Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: 0).getReg(), MRI); |
2371 | if (!DefDstReg.isValid()) |
2372 | return false; |
2373 | LLT DefDstTy = MRI.getType(Reg: DefDstReg); |
2374 | Register StoreSrcReg = I.getOperand(i: 0).getReg(); |
2375 | LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg); |
2376 | |
2377 | // If we get something strange like a physical register, then we shouldn't |
2378 | // go any further. |
2379 | if (!DefDstTy.isValid()) |
2380 | return false; |
2381 | |
2382 | // Are the source and dst types the same size? |
2383 | if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) |
2384 | return false; |
2385 | |
2386 | if (RBI.getRegBank(Reg: StoreSrcReg, MRI, TRI) == |
2387 | RBI.getRegBank(Reg: DefDstReg, MRI, TRI)) |
2388 | return false; |
2389 | |
2390 | // We have a cross-bank copy, which is entering a store. Let's fold it. |
2391 | I.getOperand(i: 0).setReg(DefDstReg); |
2392 | return true; |
2393 | } |
2394 | |
2395 | bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { |
2396 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2397 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2398 | |
2399 | MachineBasicBlock &MBB = *I.getParent(); |
2400 | MachineFunction &MF = *MBB.getParent(); |
2401 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2402 | |
2403 | switch (I.getOpcode()) { |
2404 | case AArch64::G_DUP: { |
2405 | // Before selecting a DUP instruction, check if it is better selected as a |
2406 | // MOV or load from a constant pool. |
2407 | Register Src = I.getOperand(i: 1).getReg(); |
2408 | auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI); |
2409 | if (!ValAndVReg) |
2410 | return false; |
2411 | LLVMContext &Ctx = MF.getFunction().getContext(); |
2412 | Register Dst = I.getOperand(i: 0).getReg(); |
2413 | auto *CV = ConstantDataVector::getSplat( |
2414 | NumElts: MRI.getType(Reg: Dst).getNumElements(), |
2415 | Elt: ConstantInt::get( |
2416 | Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Dst).getScalarSizeInBits()), |
2417 | V: ValAndVReg->Value.trunc(width: MRI.getType(Reg: Dst).getScalarSizeInBits()))); |
2418 | if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI)) |
2419 | return false; |
2420 | I.eraseFromParent(); |
2421 | return true; |
2422 | } |
2423 | case TargetOpcode::G_SEXT: |
2424 | // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV |
2425 | // over a normal extend. |
2426 | if (selectUSMovFromExtend(I, MRI)) |
2427 | return true; |
2428 | return false; |
2429 | case TargetOpcode::G_BR: |
2430 | return false; |
2431 | case TargetOpcode::G_SHL: |
2432 | return earlySelectSHL(I, MRI); |
2433 | case TargetOpcode::G_CONSTANT: { |
2434 | bool IsZero = false; |
2435 | if (I.getOperand(i: 1).isCImm()) |
2436 | IsZero = I.getOperand(i: 1).getCImm()->isZero(); |
2437 | else if (I.getOperand(i: 1).isImm()) |
2438 | IsZero = I.getOperand(i: 1).getImm() == 0; |
2439 | |
2440 | if (!IsZero) |
2441 | return false; |
2442 | |
2443 | Register DefReg = I.getOperand(i: 0).getReg(); |
2444 | LLT Ty = MRI.getType(Reg: DefReg); |
2445 | if (Ty.getSizeInBits() == 64) { |
2446 | I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::XZR, isDef: false); |
2447 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
2448 | } else if (Ty.getSizeInBits() == 32) { |
2449 | I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::WZR, isDef: false); |
2450 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR32RegClass, MRI); |
2451 | } else |
2452 | return false; |
2453 | |
2454 | I.setDesc(TII.get(Opcode: TargetOpcode::COPY)); |
2455 | return true; |
2456 | } |
2457 | |
2458 | case TargetOpcode::G_ADD: { |
2459 | // Check if this is being fed by a G_ICMP on either side. |
2460 | // |
2461 | // (cmp pred, x, y) + z |
2462 | // |
2463 | // In the above case, when the cmp is true, we increment z by 1. So, we can |
2464 | // fold the add into the cset for the cmp by using cinc. |
2465 | // |
2466 | // FIXME: This would probably be a lot nicer in PostLegalizerLowering. |
2467 | Register AddDst = I.getOperand(i: 0).getReg(); |
2468 | Register AddLHS = I.getOperand(i: 1).getReg(); |
2469 | Register AddRHS = I.getOperand(i: 2).getReg(); |
2470 | // Only handle scalars. |
2471 | LLT Ty = MRI.getType(Reg: AddLHS); |
2472 | if (Ty.isVector()) |
2473 | return false; |
2474 | // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 |
2475 | // bits. |
2476 | unsigned Size = Ty.getSizeInBits(); |
2477 | if (Size != 32 && Size != 64) |
2478 | return false; |
2479 | auto MatchCmp = [&](Register Reg) -> MachineInstr * { |
2480 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
2481 | return nullptr; |
2482 | // If the LHS of the add is 32 bits, then we want to fold a 32-bit |
2483 | // compare. |
2484 | if (Size == 32) |
2485 | return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI); |
2486 | // We model scalar compares using 32-bit destinations right now. |
2487 | // If it's a 64-bit compare, it'll have 64-bit sources. |
2488 | Register ZExt; |
2489 | if (!mi_match(R: Reg, MRI, |
2490 | P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt)))))) |
2491 | return nullptr; |
2492 | auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI); |
2493 | if (!Cmp || |
2494 | MRI.getType(Reg: Cmp->getOperand(i: 2).getReg()).getSizeInBits() != 64) |
2495 | return nullptr; |
2496 | return Cmp; |
2497 | }; |
2498 | // Try to match |
2499 | // z + (cmp pred, x, y) |
2500 | MachineInstr *Cmp = MatchCmp(AddRHS); |
2501 | if (!Cmp) { |
2502 | // (cmp pred, x, y) + z |
2503 | std::swap(a&: AddLHS, b&: AddRHS); |
2504 | Cmp = MatchCmp(AddRHS); |
2505 | if (!Cmp) |
2506 | return false; |
2507 | } |
2508 | auto &PredOp = Cmp->getOperand(i: 1); |
2509 | auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); |
2510 | const AArch64CC::CondCode InvCC = |
2511 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
2512 | MIB.setInstrAndDebugLoc(I); |
2513 | emitIntegerCompare(/*LHS=*/Cmp->getOperand(i: 2), |
2514 | /*RHS=*/Cmp->getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB); |
2515 | emitCSINC(/*Dst=*/AddDst, /*Src =*/Src1: AddLHS, /*Src2=*/AddLHS, Pred: InvCC, MIRBuilder&: MIB); |
2516 | I.eraseFromParent(); |
2517 | return true; |
2518 | } |
2519 | case TargetOpcode::G_OR: { |
2520 | // Look for operations that take the lower `Width=Size-ShiftImm` bits of |
2521 | // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via |
2522 | // shifting and masking that we can replace with a BFI (encoded as a BFM). |
2523 | Register Dst = I.getOperand(i: 0).getReg(); |
2524 | LLT Ty = MRI.getType(Reg: Dst); |
2525 | |
2526 | if (!Ty.isScalar()) |
2527 | return false; |
2528 | |
2529 | unsigned Size = Ty.getSizeInBits(); |
2530 | if (Size != 32 && Size != 64) |
2531 | return false; |
2532 | |
2533 | Register ShiftSrc; |
2534 | int64_t ShiftImm; |
2535 | Register MaskSrc; |
2536 | int64_t MaskImm; |
2537 | if (!mi_match( |
2538 | R: Dst, MRI, |
2539 | P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))), |
2540 | R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm)))))) |
2541 | return false; |
2542 | |
2543 | if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) |
2544 | return false; |
2545 | |
2546 | int64_t Immr = Size - ShiftImm; |
2547 | int64_t Imms = Size - ShiftImm - 1; |
2548 | unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; |
2549 | emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB); |
2550 | I.eraseFromParent(); |
2551 | return true; |
2552 | } |
2553 | case TargetOpcode::G_FENCE: { |
2554 | if (I.getOperand(i: 1).getImm() == 0) |
2555 | BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: TargetOpcode::MEMBARRIER)); |
2556 | else |
2557 | BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: AArch64::DMB)) |
2558 | .addImm(Val: I.getOperand(i: 0).getImm() == 4 ? 0x9 : 0xb); |
2559 | I.eraseFromParent(); |
2560 | return true; |
2561 | } |
2562 | default: |
2563 | return false; |
2564 | } |
2565 | } |
2566 | |
2567 | bool AArch64InstructionSelector::select(MachineInstr &I) { |
2568 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
2569 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
2570 | |
2571 | MachineBasicBlock &MBB = *I.getParent(); |
2572 | MachineFunction &MF = *MBB.getParent(); |
2573 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2574 | |
2575 | const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>(); |
2576 | if (Subtarget->requiresStrictAlign()) { |
2577 | // We don't support this feature yet. |
2578 | LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n" ); |
2579 | return false; |
2580 | } |
2581 | |
2582 | MIB.setInstrAndDebugLoc(I); |
2583 | |
2584 | unsigned Opcode = I.getOpcode(); |
2585 | // G_PHI requires same handling as PHI |
2586 | if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { |
2587 | // Certain non-generic instructions also need some special handling. |
2588 | |
2589 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) |
2590 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2591 | |
2592 | if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { |
2593 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2594 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2595 | |
2596 | const RegClassOrRegBank &RegClassOrBank = |
2597 | MRI.getRegClassOrRegBank(Reg: DefReg); |
2598 | |
2599 | const TargetRegisterClass *DefRC = |
2600 | dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank); |
2601 | if (!DefRC) { |
2602 | if (!DefTy.isValid()) { |
2603 | LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n" ); |
2604 | return false; |
2605 | } |
2606 | const RegisterBank &RB = *cast<const RegisterBank *>(Val: RegClassOrBank); |
2607 | DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB); |
2608 | if (!DefRC) { |
2609 | LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n" ); |
2610 | return false; |
2611 | } |
2612 | } |
2613 | |
2614 | I.setDesc(TII.get(Opcode: TargetOpcode::PHI)); |
2615 | |
2616 | return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI); |
2617 | } |
2618 | |
2619 | if (I.isCopy()) |
2620 | return selectCopy(I, TII, MRI, TRI, RBI); |
2621 | |
2622 | if (I.isDebugInstr()) |
2623 | return selectDebugInstr(I, MRI, RBI); |
2624 | |
2625 | return true; |
2626 | } |
2627 | |
2628 | |
2629 | if (I.getNumOperands() != I.getNumExplicitOperands()) { |
2630 | LLVM_DEBUG( |
2631 | dbgs() << "Generic instruction has unexpected implicit operands\n" ); |
2632 | return false; |
2633 | } |
2634 | |
2635 | // Try to do some lowering before we start instruction selecting. These |
2636 | // lowerings are purely transformations on the input G_MIR and so selection |
2637 | // must continue after any modification of the instruction. |
2638 | if (preISelLower(I)) { |
2639 | Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. |
2640 | } |
2641 | |
2642 | // There may be patterns where the importer can't deal with them optimally, |
2643 | // but does select it to a suboptimal sequence so our custom C++ selection |
2644 | // code later never has a chance to work on it. Therefore, we have an early |
2645 | // selection attempt here to give priority to certain selection routines |
2646 | // over the imported ones. |
2647 | if (earlySelect(I)) |
2648 | return true; |
2649 | |
2650 | if (selectImpl(I, CoverageInfo&: *CoverageInfo)) |
2651 | return true; |
2652 | |
2653 | LLT Ty = |
2654 | I.getOperand(i: 0).isReg() ? MRI.getType(Reg: I.getOperand(i: 0).getReg()) : LLT{}; |
2655 | |
2656 | switch (Opcode) { |
2657 | case TargetOpcode::G_SBFX: |
2658 | case TargetOpcode::G_UBFX: { |
2659 | static const unsigned OpcTable[2][2] = { |
2660 | {AArch64::UBFMWri, AArch64::UBFMXri}, |
2661 | {AArch64::SBFMWri, AArch64::SBFMXri}}; |
2662 | bool IsSigned = Opcode == TargetOpcode::G_SBFX; |
2663 | unsigned Size = Ty.getSizeInBits(); |
2664 | unsigned Opc = OpcTable[IsSigned][Size == 64]; |
2665 | auto Cst1 = |
2666 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 2).getReg(), MRI); |
2667 | assert(Cst1 && "Should have gotten a constant for src 1?" ); |
2668 | auto Cst2 = |
2669 | getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 3).getReg(), MRI); |
2670 | assert(Cst2 && "Should have gotten a constant for src 2?" ); |
2671 | auto LSB = Cst1->Value.getZExtValue(); |
2672 | auto Width = Cst2->Value.getZExtValue(); |
2673 | auto BitfieldInst = |
2674 | MIB.buildInstr(Opc, DstOps: {I.getOperand(i: 0)}, SrcOps: {I.getOperand(i: 1)}) |
2675 | .addImm(Val: LSB) |
2676 | .addImm(Val: LSB + Width - 1); |
2677 | I.eraseFromParent(); |
2678 | return constrainSelectedInstRegOperands(I&: *BitfieldInst, TII, TRI, RBI); |
2679 | } |
2680 | case TargetOpcode::G_BRCOND: |
2681 | return selectCompareBranch(I, MF, MRI); |
2682 | |
2683 | case TargetOpcode::G_BRINDIRECT: { |
2684 | const Function &Fn = MF.getFunction(); |
2685 | if (std::optional<uint16_t> BADisc = |
2686 | STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: Fn)) { |
2687 | auto MI = MIB.buildInstr(Opc: AArch64::BRA, DstOps: {}, SrcOps: {I.getOperand(i: 0).getReg()}); |
2688 | MI.addImm(Val: AArch64PACKey::IA); |
2689 | MI.addImm(Val: *BADisc); |
2690 | MI.addReg(/*AddrDisc=*/RegNo: AArch64::XZR); |
2691 | I.eraseFromParent(); |
2692 | return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
2693 | } |
2694 | I.setDesc(TII.get(Opcode: AArch64::BR)); |
2695 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2696 | } |
2697 | |
2698 | case TargetOpcode::G_BRJT: |
2699 | return selectBrJT(I, MRI); |
2700 | |
2701 | case AArch64::G_ADD_LOW: { |
2702 | // This op may have been separated from it's ADRP companion by the localizer |
2703 | // or some other code motion pass. Given that many CPUs will try to |
2704 | // macro fuse these operations anyway, select this into a MOVaddr pseudo |
2705 | // which will later be expanded into an ADRP+ADD pair after scheduling. |
2706 | MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
2707 | if (BaseMI->getOpcode() != AArch64::ADRP) { |
2708 | I.setDesc(TII.get(Opcode: AArch64::ADDXri)); |
2709 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2710 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2711 | } |
2712 | assert(TM.getCodeModel() == CodeModel::Small && |
2713 | "Expected small code model" ); |
2714 | auto Op1 = BaseMI->getOperand(i: 1); |
2715 | auto Op2 = I.getOperand(i: 2); |
2716 | auto MovAddr = MIB.buildInstr(Opc: AArch64::MOVaddr, DstOps: {I.getOperand(i: 0)}, SrcOps: {}) |
2717 | .addGlobalAddress(GV: Op1.getGlobal(), Offset: Op1.getOffset(), |
2718 | TargetFlags: Op1.getTargetFlags()) |
2719 | .addGlobalAddress(GV: Op2.getGlobal(), Offset: Op2.getOffset(), |
2720 | TargetFlags: Op2.getTargetFlags()); |
2721 | I.eraseFromParent(); |
2722 | return constrainSelectedInstRegOperands(I&: *MovAddr, TII, TRI, RBI); |
2723 | } |
2724 | |
2725 | case TargetOpcode::G_FCONSTANT: |
2726 | case TargetOpcode::G_CONSTANT: { |
2727 | const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; |
2728 | |
2729 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
2730 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
2731 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
2732 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
2733 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
2734 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
2735 | |
2736 | const Register DefReg = I.getOperand(i: 0).getReg(); |
2737 | const LLT DefTy = MRI.getType(Reg: DefReg); |
2738 | const unsigned DefSize = DefTy.getSizeInBits(); |
2739 | const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI); |
2740 | |
2741 | // FIXME: Redundant check, but even less readable when factored out. |
2742 | if (isFP) { |
2743 | if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { |
2744 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2745 | << " constant, expected: " << s16 << " or " << s32 |
2746 | << " or " << s64 << " or " << s128 << '\n'); |
2747 | return false; |
2748 | } |
2749 | |
2750 | if (RB.getID() != AArch64::FPRRegBankID) { |
2751 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty |
2752 | << " constant on bank: " << RB |
2753 | << ", expected: FPR\n" ); |
2754 | return false; |
2755 | } |
2756 | |
2757 | // The case when we have 0.0 is covered by tablegen. Reject it here so we |
2758 | // can be sure tablegen works correctly and isn't rescued by this code. |
2759 | // 0.0 is not covered by tablegen for FP128. So we will handle this |
2760 | // scenario in the code here. |
2761 | if (DefSize != 128 && I.getOperand(i: 1).getFPImm()->isExactlyValue(V: 0.0)) |
2762 | return false; |
2763 | } else { |
2764 | // s32 and s64 are covered by tablegen. |
2765 | if (Ty != p0 && Ty != s8 && Ty != s16) { |
2766 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2767 | << " constant, expected: " << s32 << ", " << s64 |
2768 | << ", or " << p0 << '\n'); |
2769 | return false; |
2770 | } |
2771 | |
2772 | if (RB.getID() != AArch64::GPRRegBankID) { |
2773 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty |
2774 | << " constant on bank: " << RB |
2775 | << ", expected: GPR\n" ); |
2776 | return false; |
2777 | } |
2778 | } |
2779 | |
2780 | if (isFP) { |
2781 | const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB); |
2782 | // For 16, 64, and 128b values, emit a constant pool load. |
2783 | switch (DefSize) { |
2784 | default: |
2785 | llvm_unreachable("Unexpected destination size for G_FCONSTANT?" ); |
2786 | case 32: |
2787 | case 64: { |
2788 | bool OptForSize = shouldOptForSize(MF: &MF); |
2789 | const auto &TLI = MF.getSubtarget().getTargetLowering(); |
2790 | // If TLI says that this fpimm is illegal, then we'll expand to a |
2791 | // constant pool load. |
2792 | if (TLI->isFPImmLegal(I.getOperand(i: 1).getFPImm()->getValueAPF(), |
2793 | EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize)) |
2794 | break; |
2795 | [[fallthrough]]; |
2796 | } |
2797 | case 16: |
2798 | case 128: { |
2799 | auto *FPImm = I.getOperand(i: 1).getFPImm(); |
2800 | auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB); |
2801 | if (!LoadMI) { |
2802 | LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n" ); |
2803 | return false; |
2804 | } |
2805 | MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: 0).getReg()}); |
2806 | I.eraseFromParent(); |
2807 | return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI); |
2808 | } |
2809 | } |
2810 | |
2811 | assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size" ); |
2812 | // Either emit a FMOV, or emit a copy to emit a normal mov. |
2813 | const Register DefGPRReg = MRI.createVirtualRegister( |
2814 | RegClass: DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
2815 | MachineOperand &RegOp = I.getOperand(i: 0); |
2816 | RegOp.setReg(DefGPRReg); |
2817 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2818 | MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg}); |
2819 | |
2820 | if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) { |
2821 | LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n" ); |
2822 | return false; |
2823 | } |
2824 | |
2825 | MachineOperand &ImmOp = I.getOperand(i: 1); |
2826 | // FIXME: Is going through int64_t always correct? |
2827 | ImmOp.ChangeToImmediate( |
2828 | ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); |
2829 | } else if (I.getOperand(i: 1).isCImm()) { |
2830 | uint64_t Val = I.getOperand(i: 1).getCImm()->getZExtValue(); |
2831 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2832 | } else if (I.getOperand(i: 1).isImm()) { |
2833 | uint64_t Val = I.getOperand(i: 1).getImm(); |
2834 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
2835 | } |
2836 | |
2837 | const unsigned MovOpc = |
2838 | DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
2839 | I.setDesc(TII.get(Opcode: MovOpc)); |
2840 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2841 | return true; |
2842 | } |
2843 | case TargetOpcode::G_EXTRACT: { |
2844 | Register DstReg = I.getOperand(i: 0).getReg(); |
2845 | Register SrcReg = I.getOperand(i: 1).getReg(); |
2846 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
2847 | LLT DstTy = MRI.getType(Reg: DstReg); |
2848 | (void)DstTy; |
2849 | unsigned SrcSize = SrcTy.getSizeInBits(); |
2850 | |
2851 | if (SrcTy.getSizeInBits() > 64) { |
2852 | // This should be an extract of an s128, which is like a vector extract. |
2853 | if (SrcTy.getSizeInBits() != 128) |
2854 | return false; |
2855 | // Only support extracting 64 bits from an s128 at the moment. |
2856 | if (DstTy.getSizeInBits() != 64) |
2857 | return false; |
2858 | |
2859 | unsigned Offset = I.getOperand(i: 2).getImm(); |
2860 | if (Offset % 64 != 0) |
2861 | return false; |
2862 | |
2863 | // Check we have the right regbank always. |
2864 | const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
2865 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
2866 | assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!" ); |
2867 | |
2868 | if (SrcRB.getID() == AArch64::GPRRegBankID) { |
2869 | auto NewI = |
2870 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}) |
2871 | .addUse(RegNo: SrcReg, Flags: 0, |
2872 | SubReg: Offset == 0 ? AArch64::sube64 : AArch64::subo64); |
2873 | constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt&: *NewI, |
2874 | RegClass: AArch64::GPR64RegClass, RegMO&: NewI->getOperand(i: 0)); |
2875 | I.eraseFromParent(); |
2876 | return true; |
2877 | } |
2878 | |
2879 | // Emit the same code as a vector extract. |
2880 | // Offset must be a multiple of 64. |
2881 | unsigned LaneIdx = Offset / 64; |
2882 | MachineInstr * = emitExtractVectorElt( |
2883 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: 64), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB); |
2884 | if (!Extract) |
2885 | return false; |
2886 | I.eraseFromParent(); |
2887 | return true; |
2888 | } |
2889 | |
2890 | I.setDesc(TII.get(Opcode: SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); |
2891 | MachineInstrBuilder(MF, I).addImm(Val: I.getOperand(i: 2).getImm() + |
2892 | Ty.getSizeInBits() - 1); |
2893 | |
2894 | if (SrcSize < 64) { |
2895 | assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && |
2896 | "unexpected G_EXTRACT types" ); |
2897 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2898 | } |
2899 | |
2900 | DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2901 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator())); |
2902 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {}) |
2903 | .addReg(RegNo: DstReg, flags: 0, SubReg: AArch64::sub_32); |
2904 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), |
2905 | RC: AArch64::GPR32RegClass, MRI); |
2906 | I.getOperand(i: 0).setReg(DstReg); |
2907 | |
2908 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2909 | } |
2910 | |
2911 | case TargetOpcode::G_INSERT: { |
2912 | LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 2).getReg()); |
2913 | LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
2914 | unsigned DstSize = DstTy.getSizeInBits(); |
2915 | // Larger inserts are vectors, same-size ones should be something else by |
2916 | // now (split up or turned into COPYs). |
2917 | if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) |
2918 | return false; |
2919 | |
2920 | I.setDesc(TII.get(Opcode: DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); |
2921 | unsigned LSB = I.getOperand(i: 3).getImm(); |
2922 | unsigned Width = MRI.getType(Reg: I.getOperand(i: 2).getReg()).getSizeInBits(); |
2923 | I.getOperand(i: 3).setImm((DstSize - LSB) % DstSize); |
2924 | MachineInstrBuilder(MF, I).addImm(Val: Width - 1); |
2925 | |
2926 | if (DstSize < 64) { |
2927 | assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && |
2928 | "unexpected G_INSERT types" ); |
2929 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2930 | } |
2931 | |
2932 | Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
2933 | BuildMI(BB&: MBB, I: I.getIterator(), MIMD: I.getDebugLoc(), |
2934 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG)) |
2935 | .addDef(RegNo: SrcReg) |
2936 | .addImm(Val: 0) |
2937 | .addUse(RegNo: I.getOperand(i: 2).getReg()) |
2938 | .addImm(Val: AArch64::sub_32); |
2939 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 2).getReg(), |
2940 | RC: AArch64::GPR32RegClass, MRI); |
2941 | I.getOperand(i: 2).setReg(SrcReg); |
2942 | |
2943 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2944 | } |
2945 | case TargetOpcode::G_FRAME_INDEX: { |
2946 | // allocas and G_FRAME_INDEX are only supported in addrspace(0). |
2947 | if (Ty != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
2948 | LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty |
2949 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
2950 | return false; |
2951 | } |
2952 | I.setDesc(TII.get(Opcode: AArch64::ADDXri)); |
2953 | |
2954 | // MOs for a #0 shifted immediate. |
2955 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2956 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
2957 | |
2958 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
2959 | } |
2960 | |
2961 | case TargetOpcode::G_GLOBAL_VALUE: { |
2962 | const GlobalValue *GV = nullptr; |
2963 | unsigned OpFlags; |
2964 | if (I.getOperand(i: 1).isSymbol()) { |
2965 | OpFlags = I.getOperand(i: 1).getTargetFlags(); |
2966 | // Currently only used by "RtLibUseGOT". |
2967 | assert(OpFlags == AArch64II::MO_GOT); |
2968 | } else { |
2969 | GV = I.getOperand(i: 1).getGlobal(); |
2970 | if (GV->isThreadLocal()) { |
2971 | // We don't support instructions with emulated TLS variables yet |
2972 | if (TM.useEmulatedTLS()) |
2973 | return false; |
2974 | return selectTLSGlobalValue(I, MRI); |
2975 | } |
2976 | OpFlags = STI.ClassifyGlobalReference(GV, TM); |
2977 | } |
2978 | |
2979 | if (OpFlags & AArch64II::MO_GOT) { |
2980 | I.setDesc(TII.get(Opcode: MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT() |
2981 | ? AArch64::LOADgotAUTH |
2982 | : AArch64::LOADgot)); |
2983 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2984 | } else if (TM.getCodeModel() == CodeModel::Large && |
2985 | !TM.isPositionIndependent()) { |
2986 | // Materialize the global using movz/movk instructions. |
2987 | materializeLargeCMVal(I, V: GV, OpFlags); |
2988 | I.eraseFromParent(); |
2989 | return true; |
2990 | } else if (TM.getCodeModel() == CodeModel::Tiny) { |
2991 | I.setDesc(TII.get(Opcode: AArch64::ADR)); |
2992 | I.getOperand(i: 1).setTargetFlags(OpFlags); |
2993 | } else { |
2994 | I.setDesc(TII.get(Opcode: AArch64::MOVaddr)); |
2995 | I.getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); |
2996 | MachineInstrBuilder MIB(MF, I); |
2997 | MIB.addGlobalAddress(GV, Offset: I.getOperand(i: 1).getOffset(), |
2998 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
2999 | } |
3000 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3001 | } |
3002 | |
3003 | case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE: |
3004 | return selectPtrAuthGlobalValue(I, MRI); |
3005 | |
3006 | case TargetOpcode::G_ZEXTLOAD: |
3007 | case TargetOpcode::G_LOAD: |
3008 | case TargetOpcode::G_STORE: { |
3009 | GLoadStore &LdSt = cast<GLoadStore>(Val&: I); |
3010 | bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; |
3011 | LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg()); |
3012 | |
3013 | // Can only handle AddressSpace 0, 64-bit pointers. |
3014 | if (PtrTy != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
3015 | return false; |
3016 | } |
3017 | |
3018 | uint64_t MemSizeInBytes = LdSt.getMemSize().getValue(); |
3019 | unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue(); |
3020 | AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); |
3021 | |
3022 | // Need special instructions for atomics that affect ordering. |
3023 | if (Order != AtomicOrdering::NotAtomic && |
3024 | Order != AtomicOrdering::Unordered && |
3025 | Order != AtomicOrdering::Monotonic) { |
3026 | assert(!isa<GZExtLoad>(LdSt)); |
3027 | assert(MemSizeInBytes <= 8 && |
3028 | "128-bit atomics should already be custom-legalized" ); |
3029 | |
3030 | if (isa<GLoad>(Val: LdSt)) { |
3031 | static constexpr unsigned LDAPROpcodes[] = { |
3032 | AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX}; |
3033 | static constexpr unsigned LDAROpcodes[] = { |
3034 | AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; |
3035 | ArrayRef<unsigned> Opcodes = |
3036 | STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent |
3037 | ? LDAPROpcodes |
3038 | : LDAROpcodes; |
3039 | I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)])); |
3040 | } else { |
3041 | static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, |
3042 | AArch64::STLRW, AArch64::STLRX}; |
3043 | Register ValReg = LdSt.getReg(Idx: 0); |
3044 | if (MRI.getType(Reg: ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { |
3045 | // Emit a subreg copy of 32 bits. |
3046 | Register NewVal = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
3047 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {NewVal}, SrcOps: {}) |
3048 | .addReg(RegNo: I.getOperand(i: 0).getReg(), flags: 0, SubReg: AArch64::sub_32); |
3049 | I.getOperand(i: 0).setReg(NewVal); |
3050 | } |
3051 | I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)])); |
3052 | } |
3053 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3054 | return true; |
3055 | } |
3056 | |
3057 | #ifndef NDEBUG |
3058 | const Register PtrReg = LdSt.getPointerReg(); |
3059 | const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); |
3060 | // Check that the pointer register is valid. |
3061 | assert(PtrRB.getID() == AArch64::GPRRegBankID && |
3062 | "Load/Store pointer operand isn't a GPR" ); |
3063 | assert(MRI.getType(PtrReg).isPointer() && |
3064 | "Load/Store pointer operand isn't a pointer" ); |
3065 | #endif |
3066 | |
3067 | const Register ValReg = LdSt.getReg(Idx: 0); |
3068 | const RegisterBank &RB = *RBI.getRegBank(Reg: ValReg, MRI, TRI); |
3069 | LLT ValTy = MRI.getType(Reg: ValReg); |
3070 | |
3071 | // The code below doesn't support truncating stores, so we need to split it |
3072 | // again. |
3073 | if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
3074 | unsigned SubReg; |
3075 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
3076 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
3077 | if (!getSubRegForClass(RC, TRI, SubReg)) |
3078 | return false; |
3079 | |
3080 | // Generate a subreg copy. |
3081 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {}) |
3082 | .addReg(RegNo: ValReg, flags: 0, SubReg) |
3083 | .getReg(Idx: 0); |
3084 | RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI); |
3085 | LdSt.getOperand(i: 0).setReg(Copy); |
3086 | } else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { |
3087 | // If this is an any-extending load from the FPR bank, split it into a regular |
3088 | // load + extend. |
3089 | if (RB.getID() == AArch64::FPRRegBankID) { |
3090 | unsigned SubReg; |
3091 | LLT MemTy = LdSt.getMMO().getMemoryType(); |
3092 | auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB); |
3093 | if (!getSubRegForClass(RC, TRI, SubReg)) |
3094 | return false; |
3095 | Register OldDst = LdSt.getReg(Idx: 0); |
3096 | Register NewDst = |
3097 | MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType()); |
3098 | LdSt.getOperand(i: 0).setReg(NewDst); |
3099 | MRI.setRegBank(Reg: NewDst, RegBank: RB); |
3100 | // Generate a SUBREG_TO_REG to extend it. |
3101 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator())); |
3102 | MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {OldDst}, SrcOps: {}) |
3103 | .addImm(Val: 0) |
3104 | .addUse(RegNo: NewDst) |
3105 | .addImm(Val: SubReg); |
3106 | auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB); |
3107 | RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI); |
3108 | MIB.setInstr(LdSt); |
3109 | ValTy = MemTy; // This is no longer an extending load. |
3110 | } |
3111 | } |
3112 | |
3113 | // Helper lambda for partially selecting I. Either returns the original |
3114 | // instruction with an updated opcode, or a new instruction. |
3115 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { |
3116 | bool IsStore = isa<GStore>(Val: I); |
3117 | const unsigned NewOpc = |
3118 | selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits); |
3119 | if (NewOpc == I.getOpcode()) |
3120 | return nullptr; |
3121 | // Check if we can fold anything into the addressing mode. |
3122 | auto AddrModeFns = |
3123 | selectAddrModeIndexed(Root&: I.getOperand(i: 1), Size: MemSizeInBytes); |
3124 | if (!AddrModeFns) { |
3125 | // Can't fold anything. Use the original instruction. |
3126 | I.setDesc(TII.get(Opcode: NewOpc)); |
3127 | I.addOperand(Op: MachineOperand::CreateImm(Val: 0)); |
3128 | return &I; |
3129 | } |
3130 | |
3131 | // Folded something. Create a new instruction and return it. |
3132 | auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags()); |
3133 | Register CurValReg = I.getOperand(i: 0).getReg(); |
3134 | IsStore ? NewInst.addUse(RegNo: CurValReg) : NewInst.addDef(RegNo: CurValReg); |
3135 | NewInst.cloneMemRefs(OtherMI: I); |
3136 | for (auto &Fn : *AddrModeFns) |
3137 | Fn(NewInst); |
3138 | I.eraseFromParent(); |
3139 | return &*NewInst; |
3140 | }; |
3141 | |
3142 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); |
3143 | if (!LoadStore) |
3144 | return false; |
3145 | |
3146 | // If we're storing a 0, use WZR/XZR. |
3147 | if (Opcode == TargetOpcode::G_STORE) { |
3148 | auto CVal = getIConstantVRegValWithLookThrough( |
3149 | VReg: LoadStore->getOperand(i: 0).getReg(), MRI); |
3150 | if (CVal && CVal->Value == 0) { |
3151 | switch (LoadStore->getOpcode()) { |
3152 | case AArch64::STRWui: |
3153 | case AArch64::STRHHui: |
3154 | case AArch64::STRBBui: |
3155 | LoadStore->getOperand(i: 0).setReg(AArch64::WZR); |
3156 | break; |
3157 | case AArch64::STRXui: |
3158 | LoadStore->getOperand(i: 0).setReg(AArch64::XZR); |
3159 | break; |
3160 | } |
3161 | } |
3162 | } |
3163 | |
3164 | if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD && |
3165 | ValTy == LLT::scalar(SizeInBits: 64) && MemSizeInBits == 32)) { |
3166 | // The any/zextload from a smaller type to i32 should be handled by the |
3167 | // importer. |
3168 | if (MRI.getType(Reg: LoadStore->getOperand(i: 0).getReg()).getSizeInBits() != 64) |
3169 | return false; |
3170 | // If we have an extending load then change the load's type to be a |
3171 | // narrower reg and zero_extend with SUBREG_TO_REG. |
3172 | Register LdReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
3173 | Register DstReg = LoadStore->getOperand(i: 0).getReg(); |
3174 | LoadStore->getOperand(i: 0).setReg(LdReg); |
3175 | |
3176 | MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator())); |
3177 | MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DstReg}, SrcOps: {}) |
3178 | .addImm(Val: 0) |
3179 | .addUse(RegNo: LdReg) |
3180 | .addImm(Val: AArch64::sub_32); |
3181 | constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI); |
3182 | return RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64allRegClass, |
3183 | MRI); |
3184 | } |
3185 | return constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI); |
3186 | } |
3187 | |
3188 | case TargetOpcode::G_INDEXED_ZEXTLOAD: |
3189 | case TargetOpcode::G_INDEXED_SEXTLOAD: |
3190 | return selectIndexedExtLoad(I, MRI); |
3191 | case TargetOpcode::G_INDEXED_LOAD: |
3192 | return selectIndexedLoad(I, MRI); |
3193 | case TargetOpcode::G_INDEXED_STORE: |
3194 | return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI); |
3195 | |
3196 | case TargetOpcode::G_LSHR: |
3197 | case TargetOpcode::G_ASHR: |
3198 | if (MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3199 | return selectVectorAshrLshr(I, MRI); |
3200 | [[fallthrough]]; |
3201 | case TargetOpcode::G_SHL: |
3202 | if (Opcode == TargetOpcode::G_SHL && |
3203 | MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector()) |
3204 | return selectVectorSHL(I, MRI); |
3205 | |
3206 | // These shifts were legalized to have 64 bit shift amounts because we |
3207 | // want to take advantage of the selection patterns that assume the |
3208 | // immediates are s64s, however, selectBinaryOp will assume both operands |
3209 | // will have the same bit size. |
3210 | { |
3211 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3212 | Register ShiftReg = I.getOperand(i: 2).getReg(); |
3213 | const LLT ShiftTy = MRI.getType(Reg: ShiftReg); |
3214 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3215 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && |
3216 | ShiftTy.getSizeInBits() == 64) { |
3217 | assert(!ShiftTy.isVector() && "unexpected vector shift ty" ); |
3218 | // Insert a subregister copy to implement a 64->32 trunc |
3219 | auto Trunc = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {SrcTy}, SrcOps: {}) |
3220 | .addReg(RegNo: ShiftReg, flags: 0, SubReg: AArch64::sub_32); |
3221 | MRI.setRegBank(Reg: Trunc.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID)); |
3222 | I.getOperand(i: 2).setReg(Trunc.getReg(Idx: 0)); |
3223 | } |
3224 | } |
3225 | [[fallthrough]]; |
3226 | case TargetOpcode::G_OR: { |
3227 | // Reject the various things we don't support yet. |
3228 | if (unsupportedBinOp(I, RBI, MRI, TRI)) |
3229 | return false; |
3230 | |
3231 | const unsigned OpSize = Ty.getSizeInBits(); |
3232 | |
3233 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3234 | const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI); |
3235 | |
3236 | const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize); |
3237 | if (NewOpc == I.getOpcode()) |
3238 | return false; |
3239 | |
3240 | I.setDesc(TII.get(Opcode: NewOpc)); |
3241 | // FIXME: Should the type be always reset in setDesc? |
3242 | |
3243 | // Now that we selected an opcode, we need to constrain the register |
3244 | // operands to use appropriate classes. |
3245 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3246 | } |
3247 | |
3248 | case TargetOpcode::G_PTR_ADD: { |
3249 | emitADD(DefReg: I.getOperand(i: 0).getReg(), LHS&: I.getOperand(i: 1), RHS&: I.getOperand(i: 2), MIRBuilder&: MIB); |
3250 | I.eraseFromParent(); |
3251 | return true; |
3252 | } |
3253 | |
3254 | case TargetOpcode::G_SADDE: |
3255 | case TargetOpcode::G_UADDE: |
3256 | case TargetOpcode::G_SSUBE: |
3257 | case TargetOpcode::G_USUBE: |
3258 | case TargetOpcode::G_SADDO: |
3259 | case TargetOpcode::G_UADDO: |
3260 | case TargetOpcode::G_SSUBO: |
3261 | case TargetOpcode::G_USUBO: |
3262 | return selectOverflowOp(I, MRI); |
3263 | |
3264 | case TargetOpcode::G_PTRMASK: { |
3265 | Register MaskReg = I.getOperand(i: 2).getReg(); |
3266 | std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI); |
3267 | // TODO: Implement arbitrary cases |
3268 | if (!MaskVal || !isShiftedMask_64(Value: *MaskVal)) |
3269 | return false; |
3270 | |
3271 | uint64_t Mask = *MaskVal; |
3272 | I.setDesc(TII.get(Opcode: AArch64::ANDXri)); |
3273 | I.getOperand(i: 2).ChangeToImmediate( |
3274 | ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: 64)); |
3275 | |
3276 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3277 | } |
3278 | case TargetOpcode::G_PTRTOINT: |
3279 | case TargetOpcode::G_TRUNC: { |
3280 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3281 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3282 | |
3283 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3284 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3285 | |
3286 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
3287 | const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
3288 | |
3289 | if (DstRB.getID() != SrcRB.getID()) { |
3290 | LLVM_DEBUG( |
3291 | dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ); |
3292 | return false; |
3293 | } |
3294 | |
3295 | if (DstRB.getID() == AArch64::GPRRegBankID) { |
3296 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3297 | if (!DstRC) |
3298 | return false; |
3299 | |
3300 | const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB); |
3301 | if (!SrcRC) |
3302 | return false; |
3303 | |
3304 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
3305 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
3306 | LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n" ); |
3307 | return false; |
3308 | } |
3309 | |
3310 | if (DstRC == SrcRC) { |
3311 | // Nothing to be done |
3312 | } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: 32) && |
3313 | SrcTy == LLT::scalar(SizeInBits: 64)) { |
3314 | llvm_unreachable("TableGen can import this case" ); |
3315 | return false; |
3316 | } else if (DstRC == &AArch64::GPR32RegClass && |
3317 | SrcRC == &AArch64::GPR64RegClass) { |
3318 | I.getOperand(i: 1).setSubReg(AArch64::sub_32); |
3319 | } else { |
3320 | LLVM_DEBUG( |
3321 | dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ); |
3322 | return false; |
3323 | } |
3324 | |
3325 | I.setDesc(TII.get(Opcode: TargetOpcode::COPY)); |
3326 | return true; |
3327 | } else if (DstRB.getID() == AArch64::FPRRegBankID) { |
3328 | if (DstTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) && |
3329 | SrcTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) { |
3330 | I.setDesc(TII.get(Opcode: AArch64::XTNv4i16)); |
3331 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3332 | return true; |
3333 | } |
3334 | |
3335 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { |
3336 | MachineInstr * = emitExtractVectorElt( |
3337 | DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: 0, MIRBuilder&: MIB); |
3338 | if (!Extract) |
3339 | return false; |
3340 | I.eraseFromParent(); |
3341 | return true; |
3342 | } |
3343 | |
3344 | // We might have a vector G_PTRTOINT, in which case just emit a COPY. |
3345 | if (Opcode == TargetOpcode::G_PTRTOINT) { |
3346 | assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector" ); |
3347 | I.setDesc(TII.get(Opcode: TargetOpcode::COPY)); |
3348 | return selectCopy(I, TII, MRI, TRI, RBI); |
3349 | } |
3350 | } |
3351 | |
3352 | return false; |
3353 | } |
3354 | |
3355 | case TargetOpcode::G_ANYEXT: { |
3356 | if (selectUSMovFromExtend(I, MRI)) |
3357 | return true; |
3358 | |
3359 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3360 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
3361 | |
3362 | const RegisterBank &RBDst = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
3363 | if (RBDst.getID() != AArch64::GPRRegBankID) { |
3364 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst |
3365 | << ", expected: GPR\n" ); |
3366 | return false; |
3367 | } |
3368 | |
3369 | const RegisterBank &RBSrc = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
3370 | if (RBSrc.getID() != AArch64::GPRRegBankID) { |
3371 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc |
3372 | << ", expected: GPR\n" ); |
3373 | return false; |
3374 | } |
3375 | |
3376 | const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits(); |
3377 | |
3378 | if (DstSize == 0) { |
3379 | LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n" ); |
3380 | return false; |
3381 | } |
3382 | |
3383 | if (DstSize != 64 && DstSize > 32) { |
3384 | LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize |
3385 | << ", expected: 32 or 64\n" ); |
3386 | return false; |
3387 | } |
3388 | // At this point G_ANYEXT is just like a plain COPY, but we need |
3389 | // to explicitly form the 64-bit value if any. |
3390 | if (DstSize > 32) { |
3391 | Register ExtSrc = MRI.createVirtualRegister(RegClass: &AArch64::GPR64allRegClass); |
3392 | BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG)) |
3393 | .addDef(RegNo: ExtSrc) |
3394 | .addImm(Val: 0) |
3395 | .addUse(RegNo: SrcReg) |
3396 | .addImm(Val: AArch64::sub_32); |
3397 | I.getOperand(i: 1).setReg(ExtSrc); |
3398 | } |
3399 | return selectCopy(I, TII, MRI, TRI, RBI); |
3400 | } |
3401 | |
3402 | case TargetOpcode::G_ZEXT: |
3403 | case TargetOpcode::G_SEXT_INREG: |
3404 | case TargetOpcode::G_SEXT: { |
3405 | if (selectUSMovFromExtend(I, MRI)) |
3406 | return true; |
3407 | |
3408 | unsigned Opcode = I.getOpcode(); |
3409 | const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; |
3410 | const Register DefReg = I.getOperand(i: 0).getReg(); |
3411 | Register SrcReg = I.getOperand(i: 1).getReg(); |
3412 | const LLT DstTy = MRI.getType(Reg: DefReg); |
3413 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
3414 | unsigned DstSize = DstTy.getSizeInBits(); |
3415 | unsigned SrcSize = SrcTy.getSizeInBits(); |
3416 | |
3417 | // SEXT_INREG has the same src reg size as dst, the size of the value to be |
3418 | // extended is encoded in the imm. |
3419 | if (Opcode == TargetOpcode::G_SEXT_INREG) |
3420 | SrcSize = I.getOperand(i: 2).getImm(); |
3421 | |
3422 | if (DstTy.isVector()) |
3423 | return false; // Should be handled by imported patterns. |
3424 | |
3425 | assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == |
3426 | AArch64::GPRRegBankID && |
3427 | "Unexpected ext regbank" ); |
3428 | |
3429 | MachineInstr *ExtI; |
3430 | |
3431 | // First check if we're extending the result of a load which has a dest type |
3432 | // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest |
3433 | // GPR register on AArch64 and all loads which are smaller automatically |
3434 | // zero-extend the upper bits. E.g. |
3435 | // %v(s8) = G_LOAD %p, :: (load 1) |
3436 | // %v2(s32) = G_ZEXT %v(s8) |
3437 | if (!IsSigned) { |
3438 | auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI); |
3439 | bool IsGPR = |
3440 | RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; |
3441 | if (LoadMI && IsGPR) { |
3442 | const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); |
3443 | unsigned BytesLoaded = MemOp->getSize().getValue(); |
3444 | if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) |
3445 | return selectCopy(I, TII, MRI, TRI, RBI); |
3446 | } |
3447 | |
3448 | // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) |
3449 | // + SUBREG_TO_REG. |
3450 | if (IsGPR && SrcSize == 32 && DstSize == 64) { |
3451 | Register SubregToRegSrc = |
3452 | MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
3453 | const Register ZReg = AArch64::WZR; |
3454 | MIB.buildInstr(Opc: AArch64::ORRWrs, DstOps: {SubregToRegSrc}, SrcOps: {ZReg, SrcReg}) |
3455 | .addImm(Val: 0); |
3456 | |
3457 | MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {}) |
3458 | .addImm(Val: 0) |
3459 | .addUse(RegNo: SubregToRegSrc) |
3460 | .addImm(Val: AArch64::sub_32); |
3461 | |
3462 | if (!RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, |
3463 | MRI)) { |
3464 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n" ); |
3465 | return false; |
3466 | } |
3467 | |
3468 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass, |
3469 | MRI)) { |
3470 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n" ); |
3471 | return false; |
3472 | } |
3473 | |
3474 | I.eraseFromParent(); |
3475 | return true; |
3476 | } |
3477 | } |
3478 | |
3479 | if (DstSize == 64) { |
3480 | if (Opcode != TargetOpcode::G_SEXT_INREG) { |
3481 | // FIXME: Can we avoid manually doing this? |
3482 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass, |
3483 | MRI)) { |
3484 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) |
3485 | << " operand\n" ); |
3486 | return false; |
3487 | } |
3488 | SrcReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, |
3489 | DstOps: {&AArch64::GPR64RegClass}, SrcOps: {}) |
3490 | .addImm(Val: 0) |
3491 | .addUse(RegNo: SrcReg) |
3492 | .addImm(Val: AArch64::sub_32) |
3493 | .getReg(Idx: 0); |
3494 | } |
3495 | |
3496 | ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, |
3497 | DstOps: {DefReg}, SrcOps: {SrcReg}) |
3498 | .addImm(Val: 0) |
3499 | .addImm(Val: SrcSize - 1); |
3500 | } else if (DstSize <= 32) { |
3501 | ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, |
3502 | DstOps: {DefReg}, SrcOps: {SrcReg}) |
3503 | .addImm(Val: 0) |
3504 | .addImm(Val: SrcSize - 1); |
3505 | } else { |
3506 | return false; |
3507 | } |
3508 | |
3509 | constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI); |
3510 | I.eraseFromParent(); |
3511 | return true; |
3512 | } |
3513 | |
3514 | case TargetOpcode::G_SITOFP: |
3515 | case TargetOpcode::G_UITOFP: |
3516 | case TargetOpcode::G_FPTOSI: |
3517 | case TargetOpcode::G_FPTOUI: { |
3518 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()), |
3519 | SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3520 | const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy); |
3521 | if (NewOpc == Opcode) |
3522 | return false; |
3523 | |
3524 | I.setDesc(TII.get(Opcode: NewOpc)); |
3525 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3526 | I.setFlags(MachineInstr::NoFPExcept); |
3527 | |
3528 | return true; |
3529 | } |
3530 | |
3531 | case TargetOpcode::G_FREEZE: |
3532 | return selectCopy(I, TII, MRI, TRI, RBI); |
3533 | |
3534 | case TargetOpcode::G_INTTOPTR: |
3535 | // The importer is currently unable to import pointer types since they |
3536 | // didn't exist in SelectionDAG. |
3537 | return selectCopy(I, TII, MRI, TRI, RBI); |
3538 | |
3539 | case TargetOpcode::G_BITCAST: |
3540 | // Imported SelectionDAG rules can handle every bitcast except those that |
3541 | // bitcast from a type to the same type. Ideally, these shouldn't occur |
3542 | // but we might not run an optimizer that deletes them. The other exception |
3543 | // is bitcasts involving pointer types, as SelectionDAG has no knowledge |
3544 | // of them. |
3545 | return selectCopy(I, TII, MRI, TRI, RBI); |
3546 | |
3547 | case TargetOpcode::G_SELECT: { |
3548 | auto &Sel = cast<GSelect>(Val&: I); |
3549 | const Register CondReg = Sel.getCondReg(); |
3550 | const Register TReg = Sel.getTrueReg(); |
3551 | const Register FReg = Sel.getFalseReg(); |
3552 | |
3553 | if (tryOptSelect(Sel)) |
3554 | return true; |
3555 | |
3556 | // Make sure to use an unused vreg instead of wzr, so that the peephole |
3557 | // optimizations will be able to optimize these. |
3558 | Register DeadVReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
3559 | auto TstMI = MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {DeadVReg}, SrcOps: {CondReg}) |
3560 | .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 1, regSize: 32)); |
3561 | constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI); |
3562 | if (!emitSelect(Dst: Sel.getReg(Idx: 0), True: TReg, False: FReg, CC: AArch64CC::NE, MIB)) |
3563 | return false; |
3564 | Sel.eraseFromParent(); |
3565 | return true; |
3566 | } |
3567 | case TargetOpcode::G_ICMP: { |
3568 | if (Ty.isVector()) |
3569 | return false; |
3570 | |
3571 | if (Ty != LLT::scalar(SizeInBits: 32)) { |
3572 | LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty |
3573 | << ", expected: " << LLT::scalar(32) << '\n'); |
3574 | return false; |
3575 | } |
3576 | |
3577 | auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3578 | const AArch64CC::CondCode InvCC = |
3579 | changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred)); |
3580 | emitIntegerCompare(LHS&: I.getOperand(i: 2), RHS&: I.getOperand(i: 3), Predicate&: I.getOperand(i: 1), MIRBuilder&: MIB); |
3581 | emitCSINC(/*Dst=*/I.getOperand(i: 0).getReg(), /*Src1=*/AArch64::WZR, |
3582 | /*Src2=*/AArch64::WZR, Pred: InvCC, MIRBuilder&: MIB); |
3583 | I.eraseFromParent(); |
3584 | return true; |
3585 | } |
3586 | |
3587 | case TargetOpcode::G_FCMP: { |
3588 | CmpInst::Predicate Pred = |
3589 | static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate()); |
3590 | if (!emitFPCompare(LHS: I.getOperand(i: 2).getReg(), RHS: I.getOperand(i: 3).getReg(), MIRBuilder&: MIB, |
3591 | Pred) || |
3592 | !emitCSetForFCmp(Dst: I.getOperand(i: 0).getReg(), Pred, MIRBuilder&: MIB)) |
3593 | return false; |
3594 | I.eraseFromParent(); |
3595 | return true; |
3596 | } |
3597 | case TargetOpcode::G_VASTART: |
3598 | return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) |
3599 | : selectVaStartAAPCS(I, MF, MRI); |
3600 | case TargetOpcode::G_INTRINSIC: |
3601 | return selectIntrinsic(I, MRI); |
3602 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
3603 | return selectIntrinsicWithSideEffects(I, MRI); |
3604 | case TargetOpcode::G_IMPLICIT_DEF: { |
3605 | I.setDesc(TII.get(Opcode: TargetOpcode::IMPLICIT_DEF)); |
3606 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3607 | const Register DstReg = I.getOperand(i: 0).getReg(); |
3608 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
3609 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB); |
3610 | RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI); |
3611 | return true; |
3612 | } |
3613 | case TargetOpcode::G_BLOCK_ADDR: { |
3614 | Function *BAFn = I.getOperand(i: 1).getBlockAddress()->getFunction(); |
3615 | if (std::optional<uint16_t> BADisc = |
3616 | STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: *BAFn)) { |
3617 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {}); |
3618 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
3619 | MIB.buildInstr(Opcode: AArch64::MOVaddrPAC) |
3620 | .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress()) |
3621 | .addImm(Val: AArch64PACKey::IA) |
3622 | .addReg(/*AddrDisc=*/RegNo: AArch64::XZR) |
3623 | .addImm(Val: *BADisc) |
3624 | .constrainAllUses(TII, TRI, RBI); |
3625 | MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X16)); |
3626 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), |
3627 | RC: AArch64::GPR64RegClass, MRI); |
3628 | I.eraseFromParent(); |
3629 | return true; |
3630 | } |
3631 | if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) { |
3632 | materializeLargeCMVal(I, V: I.getOperand(i: 1).getBlockAddress(), OpFlags: 0); |
3633 | I.eraseFromParent(); |
3634 | return true; |
3635 | } else { |
3636 | I.setDesc(TII.get(Opcode: AArch64::MOVaddrBA)); |
3637 | auto MovMI = BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVaddrBA), |
3638 | DestReg: I.getOperand(i: 0).getReg()) |
3639 | .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress(), |
3640 | /* Offset */ 0, TargetFlags: AArch64II::MO_PAGE) |
3641 | .addBlockAddress( |
3642 | BA: I.getOperand(i: 1).getBlockAddress(), /* Offset */ 0, |
3643 | TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3644 | I.eraseFromParent(); |
3645 | return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI); |
3646 | } |
3647 | } |
3648 | case AArch64::G_DUP: { |
3649 | // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by |
3650 | // imported patterns. Do it manually here. Avoiding generating s16 gpr is |
3651 | // difficult because at RBS we may end up pessimizing the fpr case if we |
3652 | // decided to add an anyextend to fix this. Manual selection is the most |
3653 | // robust solution for now. |
3654 | if (RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() != |
3655 | AArch64::GPRRegBankID) |
3656 | return false; // We expect the fpr regbank case to be imported. |
3657 | LLT VecTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3658 | if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) |
3659 | I.setDesc(TII.get(Opcode: AArch64::DUPv8i8gpr)); |
3660 | else if (VecTy == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) |
3661 | I.setDesc(TII.get(Opcode: AArch64::DUPv16i8gpr)); |
3662 | else if (VecTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) |
3663 | I.setDesc(TII.get(Opcode: AArch64::DUPv4i16gpr)); |
3664 | else if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) |
3665 | I.setDesc(TII.get(Opcode: AArch64::DUPv8i16gpr)); |
3666 | else |
3667 | return false; |
3668 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
3669 | } |
3670 | case TargetOpcode::G_BUILD_VECTOR: |
3671 | return selectBuildVector(I, MRI); |
3672 | case TargetOpcode::G_MERGE_VALUES: |
3673 | return selectMergeValues(I, MRI); |
3674 | case TargetOpcode::G_UNMERGE_VALUES: |
3675 | return selectUnmergeValues(I, MRI); |
3676 | case TargetOpcode::G_SHUFFLE_VECTOR: |
3677 | return selectShuffleVector(I, MRI); |
3678 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: |
3679 | return selectExtractElt(I, MRI); |
3680 | case TargetOpcode::G_CONCAT_VECTORS: |
3681 | return selectConcatVectors(I, MRI); |
3682 | case TargetOpcode::G_JUMP_TABLE: |
3683 | return selectJumpTable(I, MRI); |
3684 | case TargetOpcode::G_MEMCPY: |
3685 | case TargetOpcode::G_MEMCPY_INLINE: |
3686 | case TargetOpcode::G_MEMMOVE: |
3687 | case TargetOpcode::G_MEMSET: |
3688 | assert(STI.hasMOPS() && "Shouldn't get here without +mops feature" ); |
3689 | return selectMOPS(I, MRI); |
3690 | } |
3691 | |
3692 | return false; |
3693 | } |
3694 | |
3695 | bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) { |
3696 | MachineIRBuilderState OldMIBState = MIB.getState(); |
3697 | bool Success = select(I); |
3698 | MIB.setState(OldMIBState); |
3699 | return Success; |
3700 | } |
3701 | |
3702 | bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, |
3703 | MachineRegisterInfo &MRI) { |
3704 | unsigned Mopcode; |
3705 | switch (GI.getOpcode()) { |
3706 | case TargetOpcode::G_MEMCPY: |
3707 | case TargetOpcode::G_MEMCPY_INLINE: |
3708 | Mopcode = AArch64::MOPSMemoryCopyPseudo; |
3709 | break; |
3710 | case TargetOpcode::G_MEMMOVE: |
3711 | Mopcode = AArch64::MOPSMemoryMovePseudo; |
3712 | break; |
3713 | case TargetOpcode::G_MEMSET: |
3714 | // For tagged memset see llvm.aarch64.mops.memset.tag |
3715 | Mopcode = AArch64::MOPSMemorySetPseudo; |
3716 | break; |
3717 | } |
3718 | |
3719 | auto &DstPtr = GI.getOperand(i: 0); |
3720 | auto &SrcOrVal = GI.getOperand(i: 1); |
3721 | auto &Size = GI.getOperand(i: 2); |
3722 | |
3723 | // Create copies of the registers that can be clobbered. |
3724 | const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg()); |
3725 | const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg()); |
3726 | const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg()); |
3727 | |
3728 | const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; |
3729 | const auto &SrcValRegClass = |
3730 | IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; |
3731 | |
3732 | // Constrain to specific registers |
3733 | RBI.constrainGenericRegister(Reg: DstPtrCopy, RC: AArch64::GPR64commonRegClass, MRI); |
3734 | RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI); |
3735 | RBI.constrainGenericRegister(Reg: SizeCopy, RC: AArch64::GPR64RegClass, MRI); |
3736 | |
3737 | MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr); |
3738 | MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal); |
3739 | MIB.buildCopy(Res: SizeCopy, Op: Size); |
3740 | |
3741 | // New instruction uses the copied registers because it must update them. |
3742 | // The defs are not used since they don't exist in G_MEM*. They are still |
3743 | // tied. |
3744 | // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE |
3745 | Register DefDstPtr = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass); |
3746 | Register DefSize = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
3747 | if (IsSet) { |
3748 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize}, |
3749 | SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy}); |
3750 | } else { |
3751 | Register DefSrcPtr = MRI.createVirtualRegister(RegClass: &SrcValRegClass); |
3752 | MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize}, |
3753 | SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy}); |
3754 | } |
3755 | |
3756 | GI.eraseFromParent(); |
3757 | return true; |
3758 | } |
3759 | |
3760 | bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, |
3761 | MachineRegisterInfo &MRI) { |
3762 | assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT" ); |
3763 | Register JTAddr = I.getOperand(i: 0).getReg(); |
3764 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3765 | Register Index = I.getOperand(i: 2).getReg(); |
3766 | |
3767 | MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: 4, PCRelSym: nullptr); |
3768 | |
3769 | // With aarch64-jump-table-hardening, we only expand the jump table dispatch |
3770 | // sequence later, to guarantee the integrity of the intermediate values. |
3771 | if (MF->getFunction().hasFnAttribute(Kind: "aarch64-jump-table-hardening" )) { |
3772 | CodeModel::Model CM = TM.getCodeModel(); |
3773 | if (STI.isTargetMachO()) { |
3774 | if (CM != CodeModel::Small && CM != CodeModel::Large) |
3775 | report_fatal_error(reason: "Unsupported code-model for hardened jump-table" ); |
3776 | } else { |
3777 | // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO. |
3778 | assert(STI.isTargetELF() && |
3779 | "jump table hardening only supported on MachO/ELF" ); |
3780 | if (CM != CodeModel::Small) |
3781 | report_fatal_error(reason: "Unsupported code-model for hardened jump-table" ); |
3782 | } |
3783 | |
3784 | MIB.buildCopy(Res: {AArch64::X16}, Op: I.getOperand(i: 2).getReg()); |
3785 | MIB.buildInstr(Opcode: AArch64::BR_JumpTable) |
3786 | .addJumpTableIndex(Idx: I.getOperand(i: 1).getIndex()); |
3787 | I.eraseFromParent(); |
3788 | return true; |
3789 | } |
3790 | |
3791 | Register TargetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
3792 | Register ScratchReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass); |
3793 | |
3794 | auto JumpTableInst = MIB.buildInstr(Opc: AArch64::JumpTableDest32, |
3795 | DstOps: {TargetReg, ScratchReg}, SrcOps: {JTAddr, Index}) |
3796 | .addJumpTableIndex(Idx: JTI); |
3797 | // Save the jump table info. |
3798 | MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {}, |
3799 | SrcOps: {static_cast<int64_t>(JTI)}); |
3800 | // Build the indirect branch. |
3801 | MIB.buildInstr(Opc: AArch64::BR, DstOps: {}, SrcOps: {TargetReg}); |
3802 | I.eraseFromParent(); |
3803 | return constrainSelectedInstRegOperands(I&: *JumpTableInst, TII, TRI, RBI); |
3804 | } |
3805 | |
3806 | bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, |
3807 | MachineRegisterInfo &MRI) { |
3808 | assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table" ); |
3809 | assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!" ); |
3810 | |
3811 | Register DstReg = I.getOperand(i: 0).getReg(); |
3812 | unsigned JTI = I.getOperand(i: 1).getIndex(); |
3813 | // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. |
3814 | auto MovMI = |
3815 | MIB.buildInstr(Opc: AArch64::MOVaddrJT, DstOps: {DstReg}, SrcOps: {}) |
3816 | .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_PAGE) |
3817 | .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF); |
3818 | I.eraseFromParent(); |
3819 | return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI); |
3820 | } |
3821 | |
3822 | bool AArch64InstructionSelector::selectTLSGlobalValue( |
3823 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3824 | if (!STI.isTargetMachO()) |
3825 | return false; |
3826 | MachineFunction &MF = *I.getParent()->getParent(); |
3827 | MF.getFrameInfo().setAdjustsStack(true); |
3828 | |
3829 | const auto &GlobalOp = I.getOperand(i: 1); |
3830 | assert(GlobalOp.getOffset() == 0 && |
3831 | "Shouldn't have an offset on TLS globals!" ); |
3832 | const GlobalValue &GV = *GlobalOp.getGlobal(); |
3833 | |
3834 | auto LoadGOT = |
3835 | MIB.buildInstr(Opc: AArch64::LOADgot, DstOps: {&AArch64::GPR64commonRegClass}, SrcOps: {}) |
3836 | .addGlobalAddress(GV: &GV, Offset: 0, TargetFlags: AArch64II::MO_TLS); |
3837 | |
3838 | auto Load = MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {&AArch64::GPR64commonRegClass}, |
3839 | SrcOps: {LoadGOT.getReg(Idx: 0)}) |
3840 | .addImm(Val: 0); |
3841 | |
3842 | MIB.buildCopy(Res: Register(AArch64::X0), Op: LoadGOT.getReg(Idx: 0)); |
3843 | // TLS calls preserve all registers except those that absolutely must be |
3844 | // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be |
3845 | // silly). |
3846 | unsigned Opcode = getBLRCallOpcode(MF); |
3847 | |
3848 | // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0). |
3849 | if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-calls" )) { |
3850 | assert(Opcode == AArch64::BLR); |
3851 | Opcode = AArch64::BLRAAZ; |
3852 | } |
3853 | |
3854 | MIB.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: {Load}) |
3855 | .addUse(RegNo: AArch64::X0, Flags: RegState::Implicit) |
3856 | .addDef(RegNo: AArch64::X0, Flags: RegState::Implicit) |
3857 | .addRegMask(Mask: TRI.getTLSCallPreservedMask()); |
3858 | |
3859 | MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X0)); |
3860 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: AArch64::GPR64RegClass, |
3861 | MRI); |
3862 | I.eraseFromParent(); |
3863 | return true; |
3864 | } |
3865 | |
3866 | MachineInstr *AArch64InstructionSelector::emitScalarToVector( |
3867 | unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, |
3868 | MachineIRBuilder &MIRBuilder) const { |
3869 | auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {}); |
3870 | |
3871 | auto BuildFn = [&](unsigned SubregIndex) { |
3872 | auto Ins = |
3873 | MIRBuilder |
3874 | .buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar}) |
3875 | .addImm(Val: SubregIndex); |
3876 | constrainSelectedInstRegOperands(I&: *Undef, TII, TRI, RBI); |
3877 | constrainSelectedInstRegOperands(I&: *Ins, TII, TRI, RBI); |
3878 | return &*Ins; |
3879 | }; |
3880 | |
3881 | switch (EltSize) { |
3882 | case 8: |
3883 | return BuildFn(AArch64::bsub); |
3884 | case 16: |
3885 | return BuildFn(AArch64::hsub); |
3886 | case 32: |
3887 | return BuildFn(AArch64::ssub); |
3888 | case 64: |
3889 | return BuildFn(AArch64::dsub); |
3890 | default: |
3891 | return nullptr; |
3892 | } |
3893 | } |
3894 | |
3895 | MachineInstr * |
3896 | AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg, |
3897 | MachineIRBuilder &MIB, |
3898 | MachineRegisterInfo &MRI) const { |
3899 | LLT DstTy = MRI.getType(Reg: DstReg); |
3900 | const TargetRegisterClass *RC = |
3901 | getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI)); |
3902 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
3903 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
3904 | return nullptr; |
3905 | } |
3906 | unsigned SubReg = 0; |
3907 | if (!getSubRegForClass(RC, TRI, SubReg)) |
3908 | return nullptr; |
3909 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
3910 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" |
3911 | << DstTy.getSizeInBits() << "\n" ); |
3912 | return nullptr; |
3913 | } |
3914 | auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}) |
3915 | .addReg(RegNo: SrcReg, flags: 0, SubReg); |
3916 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
3917 | return Copy; |
3918 | } |
3919 | |
3920 | bool AArch64InstructionSelector::selectMergeValues( |
3921 | MachineInstr &I, MachineRegisterInfo &MRI) { |
3922 | assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode" ); |
3923 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
3924 | const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
3925 | assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation" ); |
3926 | const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI); |
3927 | |
3928 | if (I.getNumOperands() != 3) |
3929 | return false; |
3930 | |
3931 | // Merging 2 s64s into an s128. |
3932 | if (DstTy == LLT::scalar(SizeInBits: 128)) { |
3933 | if (SrcTy.getSizeInBits() != 64) |
3934 | return false; |
3935 | Register DstReg = I.getOperand(i: 0).getReg(); |
3936 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
3937 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
3938 | auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {}); |
3939 | MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: 0), EltReg: Src1Reg, |
3940 | /* LaneIdx */ 0, RB, MIRBuilder&: MIB); |
3941 | if (!InsMI) |
3942 | return false; |
3943 | MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: 0).getReg(), |
3944 | EltReg: Src2Reg, /* LaneIdx */ 1, RB, MIRBuilder&: MIB); |
3945 | if (!Ins2MI) |
3946 | return false; |
3947 | constrainSelectedInstRegOperands(I&: *InsMI, TII, TRI, RBI); |
3948 | constrainSelectedInstRegOperands(I&: *Ins2MI, TII, TRI, RBI); |
3949 | I.eraseFromParent(); |
3950 | return true; |
3951 | } |
3952 | |
3953 | if (RB.getID() != AArch64::GPRRegBankID) |
3954 | return false; |
3955 | |
3956 | if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) |
3957 | return false; |
3958 | |
3959 | auto *DstRC = &AArch64::GPR64RegClass; |
3960 | Register SubToRegDef = MRI.createVirtualRegister(RegClass: DstRC); |
3961 | MachineInstr &SubRegMI = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
3962 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
3963 | .addDef(RegNo: SubToRegDef) |
3964 | .addImm(Val: 0) |
3965 | .addUse(RegNo: I.getOperand(i: 1).getReg()) |
3966 | .addImm(Val: AArch64::sub_32); |
3967 | Register SubToRegDef2 = MRI.createVirtualRegister(RegClass: DstRC); |
3968 | // Need to anyext the second scalar before we can use bfm |
3969 | MachineInstr &SubRegMI2 = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
3970 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
3971 | .addDef(RegNo: SubToRegDef2) |
3972 | .addImm(Val: 0) |
3973 | .addUse(RegNo: I.getOperand(i: 2).getReg()) |
3974 | .addImm(Val: AArch64::sub_32); |
3975 | MachineInstr &BFM = |
3976 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::BFMXri)) |
3977 | .addDef(RegNo: I.getOperand(i: 0).getReg()) |
3978 | .addUse(RegNo: SubToRegDef) |
3979 | .addUse(RegNo: SubToRegDef2) |
3980 | .addImm(Val: 32) |
3981 | .addImm(Val: 31); |
3982 | constrainSelectedInstRegOperands(I&: SubRegMI, TII, TRI, RBI); |
3983 | constrainSelectedInstRegOperands(I&: SubRegMI2, TII, TRI, RBI); |
3984 | constrainSelectedInstRegOperands(I&: BFM, TII, TRI, RBI); |
3985 | I.eraseFromParent(); |
3986 | return true; |
3987 | } |
3988 | |
3989 | static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &, |
3990 | const unsigned EltSize) { |
3991 | // Choose a lane copy opcode and subregister based off of the size of the |
3992 | // vector's elements. |
3993 | switch (EltSize) { |
3994 | case 8: |
3995 | CopyOpc = AArch64::DUPi8; |
3996 | ExtractSubReg = AArch64::bsub; |
3997 | break; |
3998 | case 16: |
3999 | CopyOpc = AArch64::DUPi16; |
4000 | ExtractSubReg = AArch64::hsub; |
4001 | break; |
4002 | case 32: |
4003 | CopyOpc = AArch64::DUPi32; |
4004 | ExtractSubReg = AArch64::ssub; |
4005 | break; |
4006 | case 64: |
4007 | CopyOpc = AArch64::DUPi64; |
4008 | ExtractSubReg = AArch64::dsub; |
4009 | break; |
4010 | default: |
4011 | // Unknown size, bail out. |
4012 | LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n" ); |
4013 | return false; |
4014 | } |
4015 | return true; |
4016 | } |
4017 | |
4018 | MachineInstr *AArch64InstructionSelector::( |
4019 | std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, |
4020 | Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { |
4021 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4022 | unsigned CopyOpc = 0; |
4023 | unsigned = 0; |
4024 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) { |
4025 | LLVM_DEBUG( |
4026 | dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ); |
4027 | return nullptr; |
4028 | } |
4029 | |
4030 | const TargetRegisterClass *DstRC = |
4031 | getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true); |
4032 | if (!DstRC) { |
4033 | LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n" ); |
4034 | return nullptr; |
4035 | } |
4036 | |
4037 | const RegisterBank &VecRB = *RBI.getRegBank(Reg: VecReg, MRI, TRI); |
4038 | const LLT &VecTy = MRI.getType(Reg: VecReg); |
4039 | const TargetRegisterClass *VecRC = |
4040 | getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true); |
4041 | if (!VecRC) { |
4042 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n" ); |
4043 | return nullptr; |
4044 | } |
4045 | |
4046 | // The register that we're going to copy into. |
4047 | Register InsertReg = VecReg; |
4048 | if (!DstReg) |
4049 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
4050 | // If the lane index is 0, we just use a subregister COPY. |
4051 | if (LaneIdx == 0) { |
4052 | auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {}) |
4053 | .addReg(RegNo: VecReg, flags: 0, SubReg: ExtractSubReg); |
4054 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
4055 | return &*Copy; |
4056 | } |
4057 | |
4058 | // Lane copies require 128-bit wide registers. If we're dealing with an |
4059 | // unpacked vector, then we need to move up to that width. Insert an implicit |
4060 | // def and a subregister insert to get us there. |
4061 | if (VecTy.getSizeInBits() != 128) { |
4062 | MachineInstr *ScalarToVector = emitScalarToVector( |
4063 | EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: VecReg, MIRBuilder); |
4064 | if (!ScalarToVector) |
4065 | return nullptr; |
4066 | InsertReg = ScalarToVector->getOperand(i: 0).getReg(); |
4067 | } |
4068 | |
4069 | MachineInstr *LaneCopyMI = |
4070 | MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx); |
4071 | constrainSelectedInstRegOperands(I&: *LaneCopyMI, TII, TRI, RBI); |
4072 | |
4073 | // Make sure that we actually constrain the initial copy. |
4074 | RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI); |
4075 | return LaneCopyMI; |
4076 | } |
4077 | |
4078 | bool AArch64InstructionSelector::( |
4079 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4080 | assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && |
4081 | "unexpected opcode!" ); |
4082 | Register DstReg = I.getOperand(i: 0).getReg(); |
4083 | const LLT NarrowTy = MRI.getType(Reg: DstReg); |
4084 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
4085 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
4086 | (void)WideTy; |
4087 | assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && |
4088 | "source register size too small!" ); |
4089 | assert(!NarrowTy.isVector() && "cannot extract vector into vector!" ); |
4090 | |
4091 | // Need the lane index to determine the correct copy opcode. |
4092 | MachineOperand &LaneIdxOp = I.getOperand(i: 2); |
4093 | assert(LaneIdxOp.isReg() && "Lane index operand was not a register?" ); |
4094 | |
4095 | if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
4096 | LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n" ); |
4097 | return false; |
4098 | } |
4099 | |
4100 | // Find the index to extract from. |
4101 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI); |
4102 | if (!VRegAndVal) |
4103 | return false; |
4104 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); |
4105 | |
4106 | |
4107 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
4108 | MachineInstr * = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, |
4109 | LaneIdx, MIRBuilder&: MIB); |
4110 | if (!Extract) |
4111 | return false; |
4112 | |
4113 | I.eraseFromParent(); |
4114 | return true; |
4115 | } |
4116 | |
4117 | bool AArch64InstructionSelector::selectSplitVectorUnmerge( |
4118 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4119 | unsigned NumElts = I.getNumOperands() - 1; |
4120 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
4121 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
4122 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
4123 | |
4124 | assert(NarrowTy.isVector() && "Expected an unmerge into vectors" ); |
4125 | if (SrcTy.getSizeInBits() > 128) { |
4126 | LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge" ); |
4127 | return false; |
4128 | } |
4129 | |
4130 | // We implement a split vector operation by treating the sub-vectors as |
4131 | // scalars and extracting them. |
4132 | const RegisterBank &DstRB = |
4133 | *RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI); |
4134 | for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { |
4135 | Register Dst = I.getOperand(i: OpIdx).getReg(); |
4136 | MachineInstr * = |
4137 | emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB); |
4138 | if (!Extract) |
4139 | return false; |
4140 | } |
4141 | I.eraseFromParent(); |
4142 | return true; |
4143 | } |
4144 | |
4145 | bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, |
4146 | MachineRegisterInfo &MRI) { |
4147 | assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && |
4148 | "unexpected opcode" ); |
4149 | |
4150 | // TODO: Handle unmerging into GPRs and from scalars to scalars. |
4151 | if (RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI)->getID() != |
4152 | AArch64::FPRRegBankID || |
4153 | RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() != |
4154 | AArch64::FPRRegBankID) { |
4155 | LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " |
4156 | "currently unsupported.\n" ); |
4157 | return false; |
4158 | } |
4159 | |
4160 | // The last operand is the vector source register, and every other operand is |
4161 | // a register to unpack into. |
4162 | unsigned NumElts = I.getNumOperands() - 1; |
4163 | Register SrcReg = I.getOperand(i: NumElts).getReg(); |
4164 | const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
4165 | const LLT WideTy = MRI.getType(Reg: SrcReg); |
4166 | (void)WideTy; |
4167 | assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && |
4168 | "can only unmerge from vector or s128 types!" ); |
4169 | assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && |
4170 | "source register size too small!" ); |
4171 | |
4172 | if (!NarrowTy.isScalar()) |
4173 | return selectSplitVectorUnmerge(I, MRI); |
4174 | |
4175 | // Choose a lane copy opcode and subregister based off of the size of the |
4176 | // vector's elements. |
4177 | unsigned CopyOpc = 0; |
4178 | unsigned = 0; |
4179 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits())) |
4180 | return false; |
4181 | |
4182 | // Set up for the lane copies. |
4183 | MachineBasicBlock &MBB = *I.getParent(); |
4184 | |
4185 | // Stores the registers we'll be copying from. |
4186 | SmallVector<Register, 4> InsertRegs; |
4187 | |
4188 | // We'll use the first register twice, so we only need NumElts-1 registers. |
4189 | unsigned NumInsertRegs = NumElts - 1; |
4190 | |
4191 | // If our elements fit into exactly 128 bits, then we can copy from the source |
4192 | // directly. Otherwise, we need to do a bit of setup with some subregister |
4193 | // inserts. |
4194 | if (NarrowTy.getSizeInBits() * NumElts == 128) { |
4195 | InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); |
4196 | } else { |
4197 | // No. We have to perform subregister inserts. For each insert, create an |
4198 | // implicit def and a subregister insert, and save the register we create. |
4199 | const TargetRegisterClass *RC = getRegClassForTypeOnBank( |
4200 | Ty: LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()), |
4201 | RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI)); |
4202 | unsigned SubReg = 0; |
4203 | bool Found = getSubRegForClass(RC, TRI, SubReg); |
4204 | (void)Found; |
4205 | assert(Found && "expected to find last operand's subeg idx" ); |
4206 | for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { |
4207 | Register ImpDefReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass); |
4208 | MachineInstr &ImpDefMI = |
4209 | *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF), |
4210 | DestReg: ImpDefReg); |
4211 | |
4212 | // Now, create the subregister insert from SrcReg. |
4213 | Register InsertReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass); |
4214 | MachineInstr &InsMI = |
4215 | *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), |
4216 | MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: InsertReg) |
4217 | .addUse(RegNo: ImpDefReg) |
4218 | .addUse(RegNo: SrcReg) |
4219 | .addImm(Val: SubReg); |
4220 | |
4221 | constrainSelectedInstRegOperands(I&: ImpDefMI, TII, TRI, RBI); |
4222 | constrainSelectedInstRegOperands(I&: InsMI, TII, TRI, RBI); |
4223 | |
4224 | // Save the register so that we can copy from it after. |
4225 | InsertRegs.push_back(Elt: InsertReg); |
4226 | } |
4227 | } |
4228 | |
4229 | // Now that we've created any necessary subregister inserts, we can |
4230 | // create the copies. |
4231 | // |
4232 | // Perform the first copy separately as a subregister copy. |
4233 | Register CopyTo = I.getOperand(i: 0).getReg(); |
4234 | auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {}) |
4235 | .addReg(RegNo: InsertRegs[0], flags: 0, SubReg: ExtractSubReg); |
4236 | constrainSelectedInstRegOperands(I&: *FirstCopy, TII, TRI, RBI); |
4237 | |
4238 | // Now, perform the remaining copies as vector lane copies. |
4239 | unsigned LaneIdx = 1; |
4240 | for (Register InsReg : InsertRegs) { |
4241 | Register CopyTo = I.getOperand(i: LaneIdx).getReg(); |
4242 | MachineInstr &CopyInst = |
4243 | *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: CopyOpc), DestReg: CopyTo) |
4244 | .addUse(RegNo: InsReg) |
4245 | .addImm(Val: LaneIdx); |
4246 | constrainSelectedInstRegOperands(I&: CopyInst, TII, TRI, RBI); |
4247 | ++LaneIdx; |
4248 | } |
4249 | |
4250 | // Separately constrain the first copy's destination. Because of the |
4251 | // limitation in constrainOperandRegClass, we can't guarantee that this will |
4252 | // actually be constrained. So, do it ourselves using the second operand. |
4253 | const TargetRegisterClass *RC = |
4254 | MRI.getRegClassOrNull(Reg: I.getOperand(i: 1).getReg()); |
4255 | if (!RC) { |
4256 | LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n" ); |
4257 | return false; |
4258 | } |
4259 | |
4260 | RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI); |
4261 | I.eraseFromParent(); |
4262 | return true; |
4263 | } |
4264 | |
4265 | bool AArch64InstructionSelector::selectConcatVectors( |
4266 | MachineInstr &I, MachineRegisterInfo &MRI) { |
4267 | assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && |
4268 | "Unexpected opcode" ); |
4269 | Register Dst = I.getOperand(i: 0).getReg(); |
4270 | Register Op1 = I.getOperand(i: 1).getReg(); |
4271 | Register Op2 = I.getOperand(i: 2).getReg(); |
4272 | MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB); |
4273 | if (!ConcatMI) |
4274 | return false; |
4275 | I.eraseFromParent(); |
4276 | return true; |
4277 | } |
4278 | |
4279 | unsigned |
4280 | AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, |
4281 | MachineFunction &MF) const { |
4282 | Type *CPTy = CPVal->getType(); |
4283 | Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy); |
4284 | |
4285 | MachineConstantPool *MCP = MF.getConstantPool(); |
4286 | return MCP->getConstantPoolIndex(C: CPVal, Alignment); |
4287 | } |
4288 | |
4289 | MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( |
4290 | const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { |
4291 | const TargetRegisterClass *RC; |
4292 | unsigned Opc; |
4293 | bool IsTiny = TM.getCodeModel() == CodeModel::Tiny; |
4294 | unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType()); |
4295 | switch (Size) { |
4296 | case 16: |
4297 | RC = &AArch64::FPR128RegClass; |
4298 | Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui; |
4299 | break; |
4300 | case 8: |
4301 | RC = &AArch64::FPR64RegClass; |
4302 | Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui; |
4303 | break; |
4304 | case 4: |
4305 | RC = &AArch64::FPR32RegClass; |
4306 | Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui; |
4307 | break; |
4308 | case 2: |
4309 | RC = &AArch64::FPR16RegClass; |
4310 | Opc = AArch64::LDRHui; |
4311 | break; |
4312 | default: |
4313 | LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " |
4314 | << *CPVal->getType()); |
4315 | return nullptr; |
4316 | } |
4317 | |
4318 | MachineInstr *LoadMI = nullptr; |
4319 | auto &MF = MIRBuilder.getMF(); |
4320 | unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); |
4321 | if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) { |
4322 | // Use load(literal) for tiny code model. |
4323 | LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx); |
4324 | } else { |
4325 | auto Adrp = |
4326 | MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {}) |
4327 | .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE); |
4328 | |
4329 | LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {Adrp}) |
4330 | .addConstantPoolIndex( |
4331 | Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
4332 | |
4333 | constrainSelectedInstRegOperands(I&: *Adrp, TII, TRI, RBI); |
4334 | } |
4335 | |
4336 | MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); |
4337 | LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo, |
4338 | F: MachineMemOperand::MOLoad, |
4339 | Size, BaseAlignment: Align(Size))); |
4340 | constrainSelectedInstRegOperands(I&: *LoadMI, TII, TRI, RBI); |
4341 | return LoadMI; |
4342 | } |
4343 | |
4344 | /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given |
4345 | /// size and RB. |
4346 | static std::pair<unsigned, unsigned> |
4347 | getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { |
4348 | unsigned Opc, SubregIdx; |
4349 | if (RB.getID() == AArch64::GPRRegBankID) { |
4350 | if (EltSize == 8) { |
4351 | Opc = AArch64::INSvi8gpr; |
4352 | SubregIdx = AArch64::bsub; |
4353 | } else if (EltSize == 16) { |
4354 | Opc = AArch64::INSvi16gpr; |
4355 | SubregIdx = AArch64::ssub; |
4356 | } else if (EltSize == 32) { |
4357 | Opc = AArch64::INSvi32gpr; |
4358 | SubregIdx = AArch64::ssub; |
4359 | } else if (EltSize == 64) { |
4360 | Opc = AArch64::INSvi64gpr; |
4361 | SubregIdx = AArch64::dsub; |
4362 | } else { |
4363 | llvm_unreachable("invalid elt size!" ); |
4364 | } |
4365 | } else { |
4366 | if (EltSize == 8) { |
4367 | Opc = AArch64::INSvi8lane; |
4368 | SubregIdx = AArch64::bsub; |
4369 | } else if (EltSize == 16) { |
4370 | Opc = AArch64::INSvi16lane; |
4371 | SubregIdx = AArch64::hsub; |
4372 | } else if (EltSize == 32) { |
4373 | Opc = AArch64::INSvi32lane; |
4374 | SubregIdx = AArch64::ssub; |
4375 | } else if (EltSize == 64) { |
4376 | Opc = AArch64::INSvi64lane; |
4377 | SubregIdx = AArch64::dsub; |
4378 | } else { |
4379 | llvm_unreachable("invalid elt size!" ); |
4380 | } |
4381 | } |
4382 | return std::make_pair(x&: Opc, y&: SubregIdx); |
4383 | } |
4384 | |
4385 | MachineInstr *AArch64InstructionSelector::emitInstr( |
4386 | unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, |
4387 | std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, |
4388 | const ComplexRendererFns &RenderFns) const { |
4389 | assert(Opcode && "Expected an opcode?" ); |
4390 | assert(!isPreISelGenericOpcode(Opcode) && |
4391 | "Function should only be used to produce selected instructions!" ); |
4392 | auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps); |
4393 | if (RenderFns) |
4394 | for (auto &Fn : *RenderFns) |
4395 | Fn(MI); |
4396 | constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
4397 | return &*MI; |
4398 | } |
4399 | |
4400 | MachineInstr *AArch64InstructionSelector::emitAddSub( |
4401 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, |
4402 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, |
4403 | MachineIRBuilder &MIRBuilder) const { |
4404 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4405 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4406 | auto Ty = MRI.getType(Reg: LHS.getReg()); |
4407 | assert(!Ty.isVector() && "Expected a scalar or pointer?" ); |
4408 | unsigned Size = Ty.getSizeInBits(); |
4409 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only" ); |
4410 | bool Is32Bit = Size == 32; |
4411 | |
4412 | // INSTRri form with positive arithmetic immediate. |
4413 | if (auto Fns = selectArithImmed(Root&: RHS)) |
4414 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[0][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4415 | MIRBuilder, RenderFns: Fns); |
4416 | |
4417 | // INSTRri form with negative arithmetic immediate. |
4418 | if (auto Fns = selectNegArithImmed(Root&: RHS)) |
4419 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[3][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4420 | MIRBuilder, RenderFns: Fns); |
4421 | |
4422 | // INSTRrx form. |
4423 | if (auto Fns = selectArithExtendedRegister(Root&: RHS)) |
4424 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[4][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4425 | MIRBuilder, RenderFns: Fns); |
4426 | |
4427 | // INSTRrs form. |
4428 | if (auto Fns = selectShiftedRegister(Root&: RHS)) |
4429 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[1][Is32Bit], DstOps: {Dst}, SrcOps: {LHS}, |
4430 | MIRBuilder, RenderFns: Fns); |
4431 | return emitInstr(Opcode: AddrModeAndSizeToOpcode[2][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, |
4432 | MIRBuilder); |
4433 | } |
4434 | |
4435 | MachineInstr * |
4436 | AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, |
4437 | MachineOperand &RHS, |
4438 | MachineIRBuilder &MIRBuilder) const { |
4439 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4440 | ._M_elems: {{AArch64::ADDXri, AArch64::ADDWri}, |
4441 | {AArch64::ADDXrs, AArch64::ADDWrs}, |
4442 | {AArch64::ADDXrr, AArch64::ADDWrr}, |
4443 | {AArch64::SUBXri, AArch64::SUBWri}, |
4444 | {AArch64::ADDXrx, AArch64::ADDWrx}}}; |
4445 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder); |
4446 | } |
4447 | |
4448 | MachineInstr * |
4449 | AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, |
4450 | MachineOperand &RHS, |
4451 | MachineIRBuilder &MIRBuilder) const { |
4452 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4453 | ._M_elems: {{AArch64::ADDSXri, AArch64::ADDSWri}, |
4454 | {AArch64::ADDSXrs, AArch64::ADDSWrs}, |
4455 | {AArch64::ADDSXrr, AArch64::ADDSWrr}, |
4456 | {AArch64::SUBSXri, AArch64::SUBSWri}, |
4457 | {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; |
4458 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4459 | } |
4460 | |
4461 | MachineInstr * |
4462 | AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, |
4463 | MachineOperand &RHS, |
4464 | MachineIRBuilder &MIRBuilder) const { |
4465 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ |
4466 | ._M_elems: {{AArch64::SUBSXri, AArch64::SUBSWri}, |
4467 | {AArch64::SUBSXrs, AArch64::SUBSWrs}, |
4468 | {AArch64::SUBSXrr, AArch64::SUBSWrr}, |
4469 | {AArch64::ADDSXri, AArch64::ADDSWri}, |
4470 | {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; |
4471 | return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder); |
4472 | } |
4473 | |
4474 | MachineInstr * |
4475 | AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS, |
4476 | MachineOperand &RHS, |
4477 | MachineIRBuilder &MIRBuilder) const { |
4478 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4479 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4480 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4481 | static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr}; |
4482 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4483 | } |
4484 | |
4485 | MachineInstr * |
4486 | AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, |
4487 | MachineOperand &RHS, |
4488 | MachineIRBuilder &MIRBuilder) const { |
4489 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4490 | MachineRegisterInfo *MRI = MIRBuilder.getMRI(); |
4491 | bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4492 | static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr}; |
4493 | return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder); |
4494 | } |
4495 | |
4496 | MachineInstr * |
4497 | AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, |
4498 | MachineIRBuilder &MIRBuilder) const { |
4499 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4500 | bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == 32); |
4501 | auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; |
4502 | return emitADDS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder); |
4503 | } |
4504 | |
4505 | MachineInstr * |
4506 | AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, |
4507 | MachineIRBuilder &MIRBuilder) const { |
4508 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?" ); |
4509 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4510 | LLT Ty = MRI.getType(Reg: LHS.getReg()); |
4511 | unsigned RegSize = Ty.getSizeInBits(); |
4512 | bool Is32Bit = (RegSize == 32); |
4513 | const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, |
4514 | {AArch64::ANDSXrs, AArch64::ANDSWrs}, |
4515 | {AArch64::ANDSXrr, AArch64::ANDSWrr}}; |
4516 | // ANDS needs a logical immediate for its immediate form. Check if we can |
4517 | // fold one in. |
4518 | if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) { |
4519 | int64_t Imm = ValAndVReg->Value.getSExtValue(); |
4520 | |
4521 | if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) { |
4522 | auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[0][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}); |
4523 | TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize)); |
4524 | constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI); |
4525 | return &*TstMI; |
4526 | } |
4527 | } |
4528 | |
4529 | if (auto Fns = selectLogicalShiftedRegister(Root&: RHS)) |
4530 | return emitInstr(Opcode: OpcTable[1][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns); |
4531 | return emitInstr(Opcode: OpcTable[2][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder); |
4532 | } |
4533 | |
4534 | MachineInstr *AArch64InstructionSelector::emitIntegerCompare( |
4535 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
4536 | MachineIRBuilder &MIRBuilder) const { |
4537 | assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!" ); |
4538 | assert(Predicate.isPredicate() && "Expected predicate?" ); |
4539 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4540 | LLT CmpTy = MRI.getType(Reg: LHS.getReg()); |
4541 | assert(!CmpTy.isVector() && "Expected scalar or pointer" ); |
4542 | unsigned Size = CmpTy.getSizeInBits(); |
4543 | (void)Size; |
4544 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?" ); |
4545 | // Fold the compare into a cmn or tst if possible. |
4546 | if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) |
4547 | return FoldCmp; |
4548 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg()); |
4549 | return emitSUBS(Dst, LHS, RHS, MIRBuilder); |
4550 | } |
4551 | |
4552 | MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( |
4553 | Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { |
4554 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4555 | #ifndef NDEBUG |
4556 | LLT Ty = MRI.getType(Dst); |
4557 | assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && |
4558 | "Expected a 32-bit scalar register?" ); |
4559 | #endif |
4560 | const Register ZReg = AArch64::WZR; |
4561 | AArch64CC::CondCode CC1, CC2; |
4562 | changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2); |
4563 | auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1); |
4564 | if (CC2 == AArch64CC::AL) |
4565 | return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, |
4566 | MIRBuilder); |
4567 | const TargetRegisterClass *RC = &AArch64::GPR32RegClass; |
4568 | Register Def1Reg = MRI.createVirtualRegister(RegClass: RC); |
4569 | Register Def2Reg = MRI.createVirtualRegister(RegClass: RC); |
4570 | auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2); |
4571 | emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, MIRBuilder); |
4572 | emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC2, MIRBuilder); |
4573 | auto OrMI = MIRBuilder.buildInstr(Opc: AArch64::ORRWrr, DstOps: {Dst}, SrcOps: {Def1Reg, Def2Reg}); |
4574 | constrainSelectedInstRegOperands(I&: *OrMI, TII, TRI, RBI); |
4575 | return &*OrMI; |
4576 | } |
4577 | |
4578 | MachineInstr *AArch64InstructionSelector::emitFPCompare( |
4579 | Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, |
4580 | std::optional<CmpInst::Predicate> Pred) const { |
4581 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
4582 | LLT Ty = MRI.getType(Reg: LHS); |
4583 | if (Ty.isVector()) |
4584 | return nullptr; |
4585 | unsigned OpSize = Ty.getSizeInBits(); |
4586 | assert(OpSize == 16 || OpSize == 32 || OpSize == 64); |
4587 | |
4588 | // If this is a compare against +0.0, then we don't have |
4589 | // to explicitly materialize a constant. |
4590 | const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI); |
4591 | bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); |
4592 | |
4593 | auto IsEqualityPred = [](CmpInst::Predicate P) { |
4594 | return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || |
4595 | P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; |
4596 | }; |
4597 | if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { |
4598 | // Try commutating the operands. |
4599 | const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI); |
4600 | if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { |
4601 | ShouldUseImm = true; |
4602 | std::swap(a&: LHS, b&: RHS); |
4603 | } |
4604 | } |
4605 | unsigned CmpOpcTbl[2][3] = { |
4606 | {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr}, |
4607 | {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}}; |
4608 | unsigned CmpOpc = |
4609 | CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)]; |
4610 | |
4611 | // Partially build the compare. Decide if we need to add a use for the |
4612 | // third operand based off whether or not we're comparing against 0.0. |
4613 | auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS); |
4614 | CmpMI.setMIFlags(MachineInstr::NoFPExcept); |
4615 | if (!ShouldUseImm) |
4616 | CmpMI.addUse(RegNo: RHS); |
4617 | constrainSelectedInstRegOperands(I&: *CmpMI, TII, TRI, RBI); |
4618 | return &*CmpMI; |
4619 | } |
4620 | |
4621 | MachineInstr *AArch64InstructionSelector::emitVectorConcat( |
4622 | std::optional<Register> Dst, Register Op1, Register Op2, |
4623 | MachineIRBuilder &MIRBuilder) const { |
4624 | // We implement a vector concat by: |
4625 | // 1. Use scalar_to_vector to insert the lower vector into the larger dest |
4626 | // 2. Insert the upper vector into the destination's upper element |
4627 | // TODO: some of this code is common with G_BUILD_VECTOR handling. |
4628 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); |
4629 | |
4630 | const LLT Op1Ty = MRI.getType(Reg: Op1); |
4631 | const LLT Op2Ty = MRI.getType(Reg: Op2); |
4632 | |
4633 | if (Op1Ty != Op2Ty) { |
4634 | LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys" ); |
4635 | return nullptr; |
4636 | } |
4637 | assert(Op1Ty.isVector() && "Expected a vector for vector concat" ); |
4638 | |
4639 | if (Op1Ty.getSizeInBits() >= 128) { |
4640 | LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors" ); |
4641 | return nullptr; |
4642 | } |
4643 | |
4644 | // At the moment we just support 64 bit vector concats. |
4645 | if (Op1Ty.getSizeInBits() != 64) { |
4646 | LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors" ); |
4647 | return nullptr; |
4648 | } |
4649 | |
4650 | const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits()); |
4651 | const RegisterBank &FPRBank = *RBI.getRegBank(Reg: Op1, MRI, TRI); |
4652 | const TargetRegisterClass *DstRC = |
4653 | getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: 2), RB: FPRBank); |
4654 | |
4655 | MachineInstr *WidenedOp1 = |
4656 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder); |
4657 | MachineInstr *WidenedOp2 = |
4658 | emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder); |
4659 | if (!WidenedOp1 || !WidenedOp2) { |
4660 | LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value" ); |
4661 | return nullptr; |
4662 | } |
4663 | |
4664 | // Now do the insert of the upper element. |
4665 | unsigned InsertOpc, InsSubRegIdx; |
4666 | std::tie(args&: InsertOpc, args&: InsSubRegIdx) = |
4667 | getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits()); |
4668 | |
4669 | if (!Dst) |
4670 | Dst = MRI.createVirtualRegister(RegClass: DstRC); |
4671 | auto InsElt = |
4672 | MIRBuilder |
4673 | .buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: 0).getReg()}) |
4674 | .addImm(Val: 1) /* Lane index */ |
4675 | .addUse(RegNo: WidenedOp2->getOperand(i: 0).getReg()) |
4676 | .addImm(Val: 0); |
4677 | constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI); |
4678 | return &*InsElt; |
4679 | } |
4680 | |
4681 | MachineInstr * |
4682 | AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, |
4683 | Register Src2, AArch64CC::CondCode Pred, |
4684 | MachineIRBuilder &MIRBuilder) const { |
4685 | auto &MRI = *MIRBuilder.getMRI(); |
4686 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst); |
4687 | // If we used a register class, then this won't necessarily have an LLT. |
4688 | // Compute the size based off whether or not we have a class or bank. |
4689 | unsigned Size; |
4690 | if (const auto *RC = dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank)) |
4691 | Size = TRI.getRegSizeInBits(RC: *RC); |
4692 | else |
4693 | Size = MRI.getType(Reg: Dst).getSizeInBits(); |
4694 | // Some opcodes use s1. |
4695 | assert(Size <= 64 && "Expected 64 bits or less only!" ); |
4696 | static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; |
4697 | unsigned Opc = OpcTable[Size == 64]; |
4698 | auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred); |
4699 | constrainSelectedInstRegOperands(I&: *CSINC, TII, TRI, RBI); |
4700 | return &*CSINC; |
4701 | } |
4702 | |
4703 | MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I, |
4704 | Register CarryReg) { |
4705 | MachineRegisterInfo *MRI = MIB.getMRI(); |
4706 | unsigned Opcode = I.getOpcode(); |
4707 | |
4708 | // If the instruction is a SUB, we need to negate the carry, |
4709 | // because borrowing is indicated by carry-flag == 0. |
4710 | bool NeedsNegatedCarry = |
4711 | (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE); |
4712 | |
4713 | // If the previous instruction will already produce the correct carry, do not |
4714 | // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences |
4715 | // generated during legalization of wide add/sub. This optimization depends on |
4716 | // these sequences not being interrupted by other instructions. |
4717 | // We have to select the previous instruction before the carry-using |
4718 | // instruction is deleted by the calling function, otherwise the previous |
4719 | // instruction might become dead and would get deleted. |
4720 | MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg); |
4721 | if (SrcMI == I.getPrevNode()) { |
4722 | if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) { |
4723 | bool ProducesNegatedCarry = CarrySrcMI->isSub(); |
4724 | if (NeedsNegatedCarry == ProducesNegatedCarry && |
4725 | CarrySrcMI->isUnsigned() && |
4726 | CarrySrcMI->getCarryOutReg() == CarryReg && |
4727 | selectAndRestoreState(I&: *SrcMI)) |
4728 | return nullptr; |
4729 | } |
4730 | } |
4731 | |
4732 | Register DeadReg = MRI->createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
4733 | |
4734 | if (NeedsNegatedCarry) { |
4735 | // (0 - Carry) sets !C in NZCV when Carry == 1 |
4736 | Register ZReg = AArch64::WZR; |
4737 | return emitInstr(Opcode: AArch64::SUBSWrr, DstOps: {DeadReg}, SrcOps: {ZReg, CarryReg}, MIRBuilder&: MIB); |
4738 | } |
4739 | |
4740 | // (Carry - 1) sets !C in NZCV when Carry == 0 |
4741 | auto Fns = select12BitValueWithLeftShift(Immed: 1); |
4742 | return emitInstr(Opcode: AArch64::SUBSWri, DstOps: {DeadReg}, SrcOps: {CarryReg}, MIRBuilder&: MIB, RenderFns: Fns); |
4743 | } |
4744 | |
4745 | bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I, |
4746 | MachineRegisterInfo &MRI) { |
4747 | auto &CarryMI = cast<GAddSubCarryOut>(Val&: I); |
4748 | |
4749 | if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) { |
4750 | // Set NZCV carry according to carry-in VReg |
4751 | emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg()); |
4752 | } |
4753 | |
4754 | // Emit the operation and get the correct condition code. |
4755 | auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(), |
4756 | LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB); |
4757 | |
4758 | Register CarryOutReg = CarryMI.getCarryOutReg(); |
4759 | |
4760 | // Don't convert carry-out to VReg if it is never used |
4761 | if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) { |
4762 | // Now, put the overflow result in the register given by the first operand |
4763 | // to the overflow op. CSINC increments the result when the predicate is |
4764 | // false, so to get the increment when it's true, we need to use the |
4765 | // inverse. In this case, we want to increment when carry is set. |
4766 | Register ZReg = AArch64::WZR; |
4767 | emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg, |
4768 | Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB); |
4769 | } |
4770 | |
4771 | I.eraseFromParent(); |
4772 | return true; |
4773 | } |
4774 | |
4775 | std::pair<MachineInstr *, AArch64CC::CondCode> |
4776 | AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, |
4777 | MachineOperand &LHS, |
4778 | MachineOperand &RHS, |
4779 | MachineIRBuilder &MIRBuilder) const { |
4780 | switch (Opcode) { |
4781 | default: |
4782 | llvm_unreachable("Unexpected opcode!" ); |
4783 | case TargetOpcode::G_SADDO: |
4784 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4785 | case TargetOpcode::G_UADDO: |
4786 | return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4787 | case TargetOpcode::G_SSUBO: |
4788 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4789 | case TargetOpcode::G_USUBO: |
4790 | return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4791 | case TargetOpcode::G_SADDE: |
4792 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4793 | case TargetOpcode::G_UADDE: |
4794 | return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS); |
4795 | case TargetOpcode::G_SSUBE: |
4796 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS); |
4797 | case TargetOpcode::G_USUBE: |
4798 | return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO); |
4799 | } |
4800 | } |
4801 | |
4802 | /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be |
4803 | /// expressed as a conjunction. |
4804 | /// \param CanNegate Set to true if we can negate the whole sub-tree just by |
4805 | /// changing the conditions on the CMP tests. |
4806 | /// (this means we can call emitConjunctionRec() with |
4807 | /// Negate==true on this sub-tree) |
4808 | /// \param MustBeFirst Set to true if this subtree needs to be negated and we |
4809 | /// cannot do the negation naturally. We are required to |
4810 | /// emit the subtree first in this case. |
4811 | /// \param WillNegate Is true if are called when the result of this |
4812 | /// subexpression must be negated. This happens when the |
4813 | /// outer expression is an OR. We can use this fact to know |
4814 | /// that we have a double negation (or (or ...) ...) that |
4815 | /// can be implemented for free. |
4816 | static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, |
4817 | bool WillNegate, MachineRegisterInfo &MRI, |
4818 | unsigned Depth = 0) { |
4819 | if (!MRI.hasOneNonDBGUse(RegNo: Val)) |
4820 | return false; |
4821 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4822 | unsigned Opcode = ValDef->getOpcode(); |
4823 | if (isa<GAnyCmp>(Val: ValDef)) { |
4824 | CanNegate = true; |
4825 | MustBeFirst = false; |
4826 | return true; |
4827 | } |
4828 | // Protect against exponential runtime and stack overflow. |
4829 | if (Depth > 6) |
4830 | return false; |
4831 | if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { |
4832 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4833 | Register O0 = ValDef->getOperand(i: 1).getReg(); |
4834 | Register O1 = ValDef->getOperand(i: 2).getReg(); |
4835 | bool CanNegateL; |
4836 | bool MustBeFirstL; |
4837 | if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4838 | return false; |
4839 | bool CanNegateR; |
4840 | bool MustBeFirstR; |
4841 | if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + 1)) |
4842 | return false; |
4843 | |
4844 | if (MustBeFirstL && MustBeFirstR) |
4845 | return false; |
4846 | |
4847 | if (IsOR) { |
4848 | // For an OR expression we need to be able to naturally negate at least |
4849 | // one side or we cannot do the transformation at all. |
4850 | if (!CanNegateL && !CanNegateR) |
4851 | return false; |
4852 | // If we the result of the OR will be negated and we can naturally negate |
4853 | // the leaves, then this sub-tree as a whole negates naturally. |
4854 | CanNegate = WillNegate && CanNegateL && CanNegateR; |
4855 | // If we cannot naturally negate the whole sub-tree, then this must be |
4856 | // emitted first. |
4857 | MustBeFirst = !CanNegate; |
4858 | } else { |
4859 | assert(Opcode == TargetOpcode::G_AND && "Must be G_AND" ); |
4860 | // We cannot naturally negate an AND operation. |
4861 | CanNegate = false; |
4862 | MustBeFirst = MustBeFirstL || MustBeFirstR; |
4863 | } |
4864 | return true; |
4865 | } |
4866 | return false; |
4867 | } |
4868 | |
4869 | MachineInstr *AArch64InstructionSelector::emitConditionalComparison( |
4870 | Register LHS, Register RHS, CmpInst::Predicate CC, |
4871 | AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, |
4872 | MachineIRBuilder &MIB) const { |
4873 | auto &MRI = *MIB.getMRI(); |
4874 | LLT OpTy = MRI.getType(Reg: LHS); |
4875 | unsigned CCmpOpc; |
4876 | std::optional<ValueAndVReg> C; |
4877 | if (CmpInst::isIntPredicate(P: CC)) { |
4878 | assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64); |
4879 | C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
4880 | if (!C || C->Value.sgt(RHS: 31) || C->Value.slt(RHS: -31)) |
4881 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; |
4882 | else if (C->Value.ule(RHS: 31)) |
4883 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi; |
4884 | else |
4885 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi; |
4886 | } else { |
4887 | assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 || |
4888 | OpTy.getSizeInBits() == 64); |
4889 | switch (OpTy.getSizeInBits()) { |
4890 | case 16: |
4891 | assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons" ); |
4892 | CCmpOpc = AArch64::FCCMPHrr; |
4893 | break; |
4894 | case 32: |
4895 | CCmpOpc = AArch64::FCCMPSrr; |
4896 | break; |
4897 | case 64: |
4898 | CCmpOpc = AArch64::FCCMPDrr; |
4899 | break; |
4900 | default: |
4901 | return nullptr; |
4902 | } |
4903 | } |
4904 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
4905 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC); |
4906 | auto CCmp = |
4907 | MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS}); |
4908 | if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi) |
4909 | CCmp.addImm(Val: C->Value.getZExtValue()); |
4910 | else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi) |
4911 | CCmp.addImm(Val: C->Value.abs().getZExtValue()); |
4912 | else |
4913 | CCmp.addReg(RegNo: RHS); |
4914 | CCmp.addImm(Val: NZCV).addImm(Val: Predicate); |
4915 | constrainSelectedInstRegOperands(I&: *CCmp, TII, TRI, RBI); |
4916 | return &*CCmp; |
4917 | } |
4918 | |
4919 | MachineInstr *AArch64InstructionSelector::emitConjunctionRec( |
4920 | Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, |
4921 | AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { |
4922 | // We're at a tree leaf, produce a conditional comparison operation. |
4923 | auto &MRI = *MIB.getMRI(); |
4924 | MachineInstr *ValDef = MRI.getVRegDef(Reg: Val); |
4925 | unsigned Opcode = ValDef->getOpcode(); |
4926 | if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) { |
4927 | Register LHS = Cmp->getLHSReg(); |
4928 | Register RHS = Cmp->getRHSReg(); |
4929 | CmpInst::Predicate CC = Cmp->getCond(); |
4930 | if (Negate) |
4931 | CC = CmpInst::getInversePredicate(pred: CC); |
4932 | if (isa<GICmp>(Val: Cmp)) { |
4933 | OutCC = changeICMPPredToAArch64CC(P: CC); |
4934 | } else { |
4935 | // Handle special FP cases. |
4936 | AArch64CC::CondCode ; |
4937 | changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC); |
4938 | // Some floating point conditions can't be tested with a single condition |
4939 | // code. Construct an additional comparison in this case. |
4940 | if (ExtraCC != AArch64CC::AL) { |
4941 | MachineInstr *; |
4942 | if (!CCOp) |
4943 | ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC); |
4944 | else |
4945 | ExtraCmp = |
4946 | emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB); |
4947 | CCOp = ExtraCmp->getOperand(i: 0).getReg(); |
4948 | Predicate = ExtraCC; |
4949 | } |
4950 | } |
4951 | |
4952 | // Produce a normal comparison if we are first in the chain |
4953 | if (!CCOp) { |
4954 | auto Dst = MRI.cloneVirtualRegister(VReg: LHS); |
4955 | if (isa<GICmp>(Val: Cmp)) |
4956 | return emitSUBS(Dst, LHS&: Cmp->getOperand(i: 2), RHS&: Cmp->getOperand(i: 3), MIRBuilder&: MIB); |
4957 | return emitFPCompare(LHS: Cmp->getOperand(i: 2).getReg(), |
4958 | RHS: Cmp->getOperand(i: 3).getReg(), MIRBuilder&: MIB); |
4959 | } |
4960 | // Otherwise produce a ccmp. |
4961 | return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); |
4962 | } |
4963 | assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree" ); |
4964 | |
4965 | bool IsOR = Opcode == TargetOpcode::G_OR; |
4966 | |
4967 | Register LHS = ValDef->getOperand(i: 1).getReg(); |
4968 | bool CanNegateL; |
4969 | bool MustBeFirstL; |
4970 | bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI); |
4971 | assert(ValidL && "Valid conjunction/disjunction tree" ); |
4972 | (void)ValidL; |
4973 | |
4974 | Register RHS = ValDef->getOperand(i: 2).getReg(); |
4975 | bool CanNegateR; |
4976 | bool MustBeFirstR; |
4977 | bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI); |
4978 | assert(ValidR && "Valid conjunction/disjunction tree" ); |
4979 | (void)ValidR; |
4980 | |
4981 | // Swap sub-tree that must come first to the right side. |
4982 | if (MustBeFirstL) { |
4983 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree" ); |
4984 | std::swap(a&: LHS, b&: RHS); |
4985 | std::swap(a&: CanNegateL, b&: CanNegateR); |
4986 | std::swap(a&: MustBeFirstL, b&: MustBeFirstR); |
4987 | } |
4988 | |
4989 | bool NegateR; |
4990 | bool NegateAfterR; |
4991 | bool NegateL; |
4992 | bool NegateAfterAll; |
4993 | if (Opcode == TargetOpcode::G_OR) { |
4994 | // Swap the sub-tree that we can negate naturally to the left. |
4995 | if (!CanNegateL) { |
4996 | assert(CanNegateR && "at least one side must be negatable" ); |
4997 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree" ); |
4998 | assert(!Negate); |
4999 | std::swap(a&: LHS, b&: RHS); |
5000 | NegateR = false; |
5001 | NegateAfterR = true; |
5002 | } else { |
5003 | // Negate the left sub-tree if possible, otherwise negate the result. |
5004 | NegateR = CanNegateR; |
5005 | NegateAfterR = !CanNegateR; |
5006 | } |
5007 | NegateL = true; |
5008 | NegateAfterAll = !Negate; |
5009 | } else { |
5010 | assert(Opcode == TargetOpcode::G_AND && |
5011 | "Valid conjunction/disjunction tree" ); |
5012 | assert(!Negate && "Valid conjunction/disjunction tree" ); |
5013 | |
5014 | NegateL = false; |
5015 | NegateR = false; |
5016 | NegateAfterR = false; |
5017 | NegateAfterAll = false; |
5018 | } |
5019 | |
5020 | // Emit sub-trees. |
5021 | AArch64CC::CondCode RHSCC; |
5022 | MachineInstr *CmpR = |
5023 | emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB); |
5024 | if (NegateAfterR) |
5025 | RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC); |
5026 | MachineInstr *CmpL = emitConjunctionRec( |
5027 | Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: 0).getReg(), Predicate: RHSCC, MIB); |
5028 | if (NegateAfterAll) |
5029 | OutCC = AArch64CC::getInvertedCondCode(Code: OutCC); |
5030 | return CmpL; |
5031 | } |
5032 | |
5033 | MachineInstr *AArch64InstructionSelector::emitConjunction( |
5034 | Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { |
5035 | bool DummyCanNegate; |
5036 | bool DummyMustBeFirst; |
5037 | if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false, |
5038 | MRI&: *MIB.getMRI())) |
5039 | return nullptr; |
5040 | return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register(), Predicate: AArch64CC::AL, MIB); |
5041 | } |
5042 | |
5043 | bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, |
5044 | MachineInstr &CondMI) { |
5045 | AArch64CC::CondCode AArch64CC; |
5046 | MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB); |
5047 | if (!ConjMI) |
5048 | return false; |
5049 | |
5050 | emitSelect(Dst: SelI.getReg(Idx: 0), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB); |
5051 | SelI.eraseFromParent(); |
5052 | return true; |
5053 | } |
5054 | |
5055 | bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { |
5056 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
5057 | // We want to recognize this pattern: |
5058 | // |
5059 | // $z = G_FCMP pred, $x, $y |
5060 | // ... |
5061 | // $w = G_SELECT $z, $a, $b |
5062 | // |
5063 | // Where the value of $z is *only* ever used by the G_SELECT (possibly with |
5064 | // some copies/truncs in between.) |
5065 | // |
5066 | // If we see this, then we can emit something like this: |
5067 | // |
5068 | // fcmp $x, $y |
5069 | // fcsel $w, $a, $b, pred |
5070 | // |
5071 | // Rather than emitting both of the rather long sequences in the standard |
5072 | // G_FCMP/G_SELECT select methods. |
5073 | |
5074 | // First, check if the condition is defined by a compare. |
5075 | MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
5076 | |
5077 | // We can only fold if all of the defs have one use. |
5078 | Register CondDefReg = CondDef->getOperand(i: 0).getReg(); |
5079 | if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) { |
5080 | // Unless it's another select. |
5081 | for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) { |
5082 | if (CondDef == &UI) |
5083 | continue; |
5084 | if (UI.getOpcode() != TargetOpcode::G_SELECT) |
5085 | return false; |
5086 | } |
5087 | } |
5088 | |
5089 | // Is the condition defined by a compare? |
5090 | unsigned CondOpc = CondDef->getOpcode(); |
5091 | if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { |
5092 | if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef)) |
5093 | return true; |
5094 | return false; |
5095 | } |
5096 | |
5097 | AArch64CC::CondCode CondCode; |
5098 | if (CondOpc == TargetOpcode::G_ICMP) { |
5099 | auto Pred = |
5100 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
5101 | CondCode = changeICMPPredToAArch64CC(P: Pred); |
5102 | emitIntegerCompare(LHS&: CondDef->getOperand(i: 2), RHS&: CondDef->getOperand(i: 3), |
5103 | Predicate&: CondDef->getOperand(i: 1), MIRBuilder&: MIB); |
5104 | } else { |
5105 | // Get the condition code for the select. |
5106 | auto Pred = |
5107 | static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate()); |
5108 | AArch64CC::CondCode CondCode2; |
5109 | changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2); |
5110 | |
5111 | // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two |
5112 | // instructions to emit the comparison. |
5113 | // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be |
5114 | // unnecessary. |
5115 | if (CondCode2 != AArch64CC::AL) |
5116 | return false; |
5117 | |
5118 | if (!emitFPCompare(LHS: CondDef->getOperand(i: 2).getReg(), |
5119 | RHS: CondDef->getOperand(i: 3).getReg(), MIRBuilder&: MIB)) { |
5120 | LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n" ); |
5121 | return false; |
5122 | } |
5123 | } |
5124 | |
5125 | // Emit the select. |
5126 | emitSelect(Dst: I.getOperand(i: 0).getReg(), True: I.getOperand(i: 2).getReg(), |
5127 | False: I.getOperand(i: 3).getReg(), CC: CondCode, MIB); |
5128 | I.eraseFromParent(); |
5129 | return true; |
5130 | } |
5131 | |
5132 | MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( |
5133 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, |
5134 | MachineIRBuilder &MIRBuilder) const { |
5135 | assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && |
5136 | "Unexpected MachineOperand" ); |
5137 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5138 | // We want to find this sort of thing: |
5139 | // x = G_SUB 0, y |
5140 | // G_ICMP z, x |
5141 | // |
5142 | // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. |
5143 | // e.g: |
5144 | // |
5145 | // cmn z, y |
5146 | |
5147 | // Check if the RHS or LHS of the G_ICMP is defined by a SUB |
5148 | MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
5149 | MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
5150 | auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); |
5151 | // Given this: |
5152 | // |
5153 | // x = G_SUB 0, y |
5154 | // G_ICMP x, z |
5155 | // |
5156 | // Produce this: |
5157 | // |
5158 | // cmn y, z |
5159 | if (isCMN(MaybeSub: LHSDef, Pred: P, MRI)) |
5160 | return emitCMN(LHS&: LHSDef->getOperand(i: 2), RHS, MIRBuilder); |
5161 | |
5162 | // Same idea here, but with the RHS of the compare instead: |
5163 | // |
5164 | // Given this: |
5165 | // |
5166 | // x = G_SUB 0, y |
5167 | // G_ICMP z, x |
5168 | // |
5169 | // Produce this: |
5170 | // |
5171 | // cmn z, y |
5172 | if (isCMN(MaybeSub: RHSDef, Pred: P, MRI)) |
5173 | return emitCMN(LHS, RHS&: RHSDef->getOperand(i: 2), MIRBuilder); |
5174 | |
5175 | // Given this: |
5176 | // |
5177 | // z = G_AND x, y |
5178 | // G_ICMP z, 0 |
5179 | // |
5180 | // Produce this if the compare is signed: |
5181 | // |
5182 | // tst x, y |
5183 | if (!CmpInst::isUnsigned(predicate: P) && LHSDef && |
5184 | LHSDef->getOpcode() == TargetOpcode::G_AND) { |
5185 | // Make sure that the RHS is 0. |
5186 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI); |
5187 | if (!ValAndVReg || ValAndVReg->Value != 0) |
5188 | return nullptr; |
5189 | |
5190 | return emitTST(LHS&: LHSDef->getOperand(i: 1), |
5191 | RHS&: LHSDef->getOperand(i: 2), MIRBuilder); |
5192 | } |
5193 | |
5194 | return nullptr; |
5195 | } |
5196 | |
5197 | bool AArch64InstructionSelector::selectShuffleVector( |
5198 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5199 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5200 | Register Src1Reg = I.getOperand(i: 1).getReg(); |
5201 | const LLT Src1Ty = MRI.getType(Reg: Src1Reg); |
5202 | Register Src2Reg = I.getOperand(i: 2).getReg(); |
5203 | const LLT Src2Ty = MRI.getType(Reg: Src2Reg); |
5204 | ArrayRef<int> Mask = I.getOperand(i: 3).getShuffleMask(); |
5205 | |
5206 | MachineBasicBlock &MBB = *I.getParent(); |
5207 | MachineFunction &MF = *MBB.getParent(); |
5208 | LLVMContext &Ctx = MF.getFunction().getContext(); |
5209 | |
5210 | // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if |
5211 | // it's originated from a <1 x T> type. Those should have been lowered into |
5212 | // G_BUILD_VECTOR earlier. |
5213 | if (!Src1Ty.isVector() || !Src2Ty.isVector()) { |
5214 | LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n" ); |
5215 | return false; |
5216 | } |
5217 | |
5218 | unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; |
5219 | |
5220 | SmallVector<Constant *, 64> CstIdxs; |
5221 | for (int Val : Mask) { |
5222 | // For now, any undef indexes we'll just assume to be 0. This should be |
5223 | // optimized in future, e.g. to select DUP etc. |
5224 | Val = Val < 0 ? 0 : Val; |
5225 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { |
5226 | unsigned Offset = Byte + Val * BytesPerElt; |
5227 | CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset)); |
5228 | } |
5229 | } |
5230 | |
5231 | // Use a constant pool to load the index vector for TBL. |
5232 | Constant *CPVal = ConstantVector::get(V: CstIdxs); |
5233 | MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB); |
5234 | if (!IndexLoad) { |
5235 | LLVM_DEBUG(dbgs() << "Could not load from a constant pool" ); |
5236 | return false; |
5237 | } |
5238 | |
5239 | if (DstTy.getSizeInBits() != 128) { |
5240 | assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty" ); |
5241 | // This case can be done with TBL1. |
5242 | MachineInstr *Concat = |
5243 | emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB); |
5244 | if (!Concat) { |
5245 | LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1" ); |
5246 | return false; |
5247 | } |
5248 | |
5249 | // The constant pool load will be 64 bits, so need to convert to FPR128 reg. |
5250 | IndexLoad = emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, |
5251 | Scalar: IndexLoad->getOperand(i: 0).getReg(), MIRBuilder&: MIB); |
5252 | |
5253 | auto TBL1 = MIB.buildInstr( |
5254 | Opc: AArch64::TBLv16i8One, DstOps: {&AArch64::FPR128RegClass}, |
5255 | SrcOps: {Concat->getOperand(i: 0).getReg(), IndexLoad->getOperand(i: 0).getReg()}); |
5256 | constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI); |
5257 | |
5258 | auto Copy = |
5259 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {}) |
5260 | .addReg(RegNo: TBL1.getReg(Idx: 0), flags: 0, SubReg: AArch64::dsub); |
5261 | RBI.constrainGenericRegister(Reg: Copy.getReg(Idx: 0), RC: AArch64::FPR64RegClass, MRI); |
5262 | I.eraseFromParent(); |
5263 | return true; |
5264 | } |
5265 | |
5266 | // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive |
5267 | // Q registers for regalloc. |
5268 | SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; |
5269 | auto RegSeq = createQTuple(Regs, MIB); |
5270 | auto TBL2 = MIB.buildInstr(Opc: AArch64::TBLv16i8Two, DstOps: {I.getOperand(i: 0)}, |
5271 | SrcOps: {RegSeq, IndexLoad->getOperand(i: 0)}); |
5272 | constrainSelectedInstRegOperands(I&: *TBL2, TII, TRI, RBI); |
5273 | I.eraseFromParent(); |
5274 | return true; |
5275 | } |
5276 | |
5277 | MachineInstr *AArch64InstructionSelector::emitLaneInsert( |
5278 | std::optional<Register> DstReg, Register SrcReg, Register EltReg, |
5279 | unsigned LaneIdx, const RegisterBank &RB, |
5280 | MachineIRBuilder &MIRBuilder) const { |
5281 | MachineInstr *InsElt = nullptr; |
5282 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5283 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
5284 | |
5285 | // Create a register to define with the insert if one wasn't passed in. |
5286 | if (!DstReg) |
5287 | DstReg = MRI.createVirtualRegister(RegClass: DstRC); |
5288 | |
5289 | unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits(); |
5290 | unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; |
5291 | |
5292 | if (RB.getID() == AArch64::FPRRegBankID) { |
5293 | auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder); |
5294 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5295 | .addImm(Val: LaneIdx) |
5296 | .addUse(RegNo: InsSub->getOperand(i: 0).getReg()) |
5297 | .addImm(Val: 0); |
5298 | } else { |
5299 | InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg}) |
5300 | .addImm(Val: LaneIdx) |
5301 | .addUse(RegNo: EltReg); |
5302 | } |
5303 | |
5304 | constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI); |
5305 | return InsElt; |
5306 | } |
5307 | |
5308 | bool AArch64InstructionSelector::selectUSMovFromExtend( |
5309 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5310 | if (MI.getOpcode() != TargetOpcode::G_SEXT && |
5311 | MI.getOpcode() != TargetOpcode::G_ZEXT && |
5312 | MI.getOpcode() != TargetOpcode::G_ANYEXT) |
5313 | return false; |
5314 | bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; |
5315 | const Register DefReg = MI.getOperand(i: 0).getReg(); |
5316 | const LLT DstTy = MRI.getType(Reg: DefReg); |
5317 | unsigned DstSize = DstTy.getSizeInBits(); |
5318 | |
5319 | if (DstSize != 32 && DstSize != 64) |
5320 | return false; |
5321 | |
5322 | MachineInstr * = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT, |
5323 | Reg: MI.getOperand(i: 1).getReg(), MRI); |
5324 | int64_t Lane; |
5325 | if (!Extract || !mi_match(R: Extract->getOperand(i: 2).getReg(), MRI, P: m_ICst(Cst&: Lane))) |
5326 | return false; |
5327 | Register Src0 = Extract->getOperand(i: 1).getReg(); |
5328 | |
5329 | const LLT VecTy = MRI.getType(Reg: Src0); |
5330 | if (VecTy.isScalableVector()) |
5331 | return false; |
5332 | |
5333 | if (VecTy.getSizeInBits() != 128) { |
5334 | const MachineInstr *ScalarToVector = emitScalarToVector( |
5335 | EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: Src0, MIRBuilder&: MIB); |
5336 | assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!" ); |
5337 | Src0 = ScalarToVector->getOperand(i: 0).getReg(); |
5338 | } |
5339 | |
5340 | unsigned Opcode; |
5341 | if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) |
5342 | Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; |
5343 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) |
5344 | Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; |
5345 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) |
5346 | Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; |
5347 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) |
5348 | Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; |
5349 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) |
5350 | Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; |
5351 | else |
5352 | llvm_unreachable("Unexpected type combo for S/UMov!" ); |
5353 | |
5354 | // We may need to generate one of these, depending on the type and sign of the |
5355 | // input: |
5356 | // DstReg = SMOV Src0, Lane; |
5357 | // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; |
5358 | MachineInstr *ExtI = nullptr; |
5359 | if (DstSize == 64 && !IsSigned) { |
5360 | Register NewReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass); |
5361 | MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5362 | ExtI = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {}) |
5363 | .addImm(Val: 0) |
5364 | .addUse(RegNo: NewReg) |
5365 | .addImm(Val: AArch64::sub_32); |
5366 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
5367 | } else |
5368 | ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane); |
5369 | |
5370 | constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI); |
5371 | MI.eraseFromParent(); |
5372 | return true; |
5373 | } |
5374 | |
5375 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8( |
5376 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5377 | unsigned int Op; |
5378 | if (DstSize == 128) { |
5379 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5380 | return nullptr; |
5381 | Op = AArch64::MOVIv16b_ns; |
5382 | } else { |
5383 | Op = AArch64::MOVIv8b_ns; |
5384 | } |
5385 | |
5386 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5387 | |
5388 | if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) { |
5389 | Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val); |
5390 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5391 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5392 | return &*Mov; |
5393 | } |
5394 | return nullptr; |
5395 | } |
5396 | |
5397 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16( |
5398 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5399 | bool Inv) { |
5400 | |
5401 | unsigned int Op; |
5402 | if (DstSize == 128) { |
5403 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5404 | return nullptr; |
5405 | Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16; |
5406 | } else { |
5407 | Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16; |
5408 | } |
5409 | |
5410 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5411 | uint64_t Shift; |
5412 | |
5413 | if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) { |
5414 | Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val); |
5415 | Shift = 0; |
5416 | } else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) { |
5417 | Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val); |
5418 | Shift = 8; |
5419 | } else |
5420 | return nullptr; |
5421 | |
5422 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5423 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5424 | return &*Mov; |
5425 | } |
5426 | |
5427 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32( |
5428 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5429 | bool Inv) { |
5430 | |
5431 | unsigned int Op; |
5432 | if (DstSize == 128) { |
5433 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5434 | return nullptr; |
5435 | Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32; |
5436 | } else { |
5437 | Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32; |
5438 | } |
5439 | |
5440 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5441 | uint64_t Shift; |
5442 | |
5443 | if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) { |
5444 | Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val); |
5445 | Shift = 0; |
5446 | } else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) { |
5447 | Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val); |
5448 | Shift = 8; |
5449 | } else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) { |
5450 | Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val); |
5451 | Shift = 16; |
5452 | } else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) { |
5453 | Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val); |
5454 | Shift = 24; |
5455 | } else |
5456 | return nullptr; |
5457 | |
5458 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5459 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5460 | return &*Mov; |
5461 | } |
5462 | |
5463 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64( |
5464 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5465 | |
5466 | unsigned int Op; |
5467 | if (DstSize == 128) { |
5468 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5469 | return nullptr; |
5470 | Op = AArch64::MOVIv2d_ns; |
5471 | } else { |
5472 | Op = AArch64::MOVID; |
5473 | } |
5474 | |
5475 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5476 | if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) { |
5477 | Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val); |
5478 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5479 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5480 | return &*Mov; |
5481 | } |
5482 | return nullptr; |
5483 | } |
5484 | |
5485 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s( |
5486 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder, |
5487 | bool Inv) { |
5488 | |
5489 | unsigned int Op; |
5490 | if (DstSize == 128) { |
5491 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5492 | return nullptr; |
5493 | Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl; |
5494 | } else { |
5495 | Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl; |
5496 | } |
5497 | |
5498 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5499 | uint64_t Shift; |
5500 | |
5501 | if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) { |
5502 | Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val); |
5503 | Shift = 264; |
5504 | } else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) { |
5505 | Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val); |
5506 | Shift = 272; |
5507 | } else |
5508 | return nullptr; |
5509 | |
5510 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift); |
5511 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5512 | return &*Mov; |
5513 | } |
5514 | |
5515 | MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP( |
5516 | Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { |
5517 | |
5518 | unsigned int Op; |
5519 | bool IsWide = false; |
5520 | if (DstSize == 128) { |
5521 | if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64)) |
5522 | return nullptr; |
5523 | Op = AArch64::FMOVv4f32_ns; |
5524 | IsWide = true; |
5525 | } else { |
5526 | Op = AArch64::FMOVv2f32_ns; |
5527 | } |
5528 | |
5529 | uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue(); |
5530 | |
5531 | if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) { |
5532 | Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val); |
5533 | } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) { |
5534 | Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val); |
5535 | Op = AArch64::FMOVv2f64_ns; |
5536 | } else |
5537 | return nullptr; |
5538 | |
5539 | auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val); |
5540 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5541 | return &*Mov; |
5542 | } |
5543 | |
5544 | bool AArch64InstructionSelector::selectIndexedExtLoad( |
5545 | MachineInstr &MI, MachineRegisterInfo &MRI) { |
5546 | auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI); |
5547 | Register Dst = ExtLd.getDstReg(); |
5548 | Register WriteBack = ExtLd.getWritebackReg(); |
5549 | Register Base = ExtLd.getBaseReg(); |
5550 | Register Offset = ExtLd.getOffsetReg(); |
5551 | LLT Ty = MRI.getType(Reg: Dst); |
5552 | assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs. |
5553 | unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits(); |
5554 | bool IsPre = ExtLd.isPre(); |
5555 | bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd); |
5556 | unsigned InsertIntoSubReg = 0; |
5557 | bool IsDst64 = Ty.getSizeInBits() == 64; |
5558 | |
5559 | // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so |
5560 | // long as they are scalar. |
5561 | bool IsFPR = RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID; |
5562 | if ((IsSExt && IsFPR) || Ty.isVector()) |
5563 | return false; |
5564 | |
5565 | unsigned Opc = 0; |
5566 | LLT NewLdDstTy; |
5567 | LLT s32 = LLT::scalar(SizeInBits: 32); |
5568 | LLT s64 = LLT::scalar(SizeInBits: 64); |
5569 | |
5570 | if (MemSizeBits == 8) { |
5571 | if (IsSExt) { |
5572 | if (IsDst64) |
5573 | Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; |
5574 | else |
5575 | Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; |
5576 | NewLdDstTy = IsDst64 ? s64 : s32; |
5577 | } else if (IsFPR) { |
5578 | Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost; |
5579 | InsertIntoSubReg = AArch64::bsub; |
5580 | NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits); |
5581 | } else { |
5582 | Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; |
5583 | InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0; |
5584 | NewLdDstTy = s32; |
5585 | } |
5586 | } else if (MemSizeBits == 16) { |
5587 | if (IsSExt) { |
5588 | if (IsDst64) |
5589 | Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; |
5590 | else |
5591 | Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; |
5592 | NewLdDstTy = IsDst64 ? s64 : s32; |
5593 | } else if (IsFPR) { |
5594 | Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; |
5595 | InsertIntoSubReg = AArch64::hsub; |
5596 | NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits); |
5597 | } else { |
5598 | Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; |
5599 | InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0; |
5600 | NewLdDstTy = s32; |
5601 | } |
5602 | } else if (MemSizeBits == 32) { |
5603 | if (IsSExt) { |
5604 | Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; |
5605 | NewLdDstTy = s64; |
5606 | } else if (IsFPR) { |
5607 | Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; |
5608 | InsertIntoSubReg = AArch64::ssub; |
5609 | NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits); |
5610 | } else { |
5611 | Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; |
5612 | InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0; |
5613 | NewLdDstTy = s32; |
5614 | } |
5615 | } else { |
5616 | llvm_unreachable("Unexpected size for indexed load" ); |
5617 | } |
5618 | |
5619 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5620 | if (!Cst) |
5621 | return false; // Shouldn't happen, but just in case. |
5622 | |
5623 | auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base}) |
5624 | .addImm(Val: Cst->getSExtValue()); |
5625 | LdMI.cloneMemRefs(OtherMI: ExtLd); |
5626 | constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI); |
5627 | // Make sure to select the load with the MemTy as the dest type, and then |
5628 | // insert into a larger reg if needed. |
5629 | if (InsertIntoSubReg) { |
5630 | // Generate a SUBREG_TO_REG. |
5631 | auto SubToReg = MIB.buildInstr(Opc: TargetOpcode::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {}) |
5632 | .addImm(Val: 0) |
5633 | .addUse(RegNo: LdMI.getReg(Idx: 1)) |
5634 | .addImm(Val: InsertIntoSubReg); |
5635 | RBI.constrainGenericRegister( |
5636 | Reg: SubToReg.getReg(Idx: 0), |
5637 | RC: *getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), |
5638 | RB: *RBI.getRegBank(Reg: Dst, MRI, TRI)), |
5639 | MRI); |
5640 | } else { |
5641 | auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: 1)); |
5642 | selectCopy(I&: *Copy, TII, MRI, TRI, RBI); |
5643 | } |
5644 | MI.eraseFromParent(); |
5645 | |
5646 | return true; |
5647 | } |
5648 | |
5649 | bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI, |
5650 | MachineRegisterInfo &MRI) { |
5651 | auto &Ld = cast<GIndexedLoad>(Val&: MI); |
5652 | Register Dst = Ld.getDstReg(); |
5653 | Register WriteBack = Ld.getWritebackReg(); |
5654 | Register Base = Ld.getBaseReg(); |
5655 | Register Offset = Ld.getOffsetReg(); |
5656 | assert(MRI.getType(Dst).getSizeInBits() <= 128 && |
5657 | "Unexpected type for indexed load" ); |
5658 | unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes(); |
5659 | |
5660 | if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes()) |
5661 | return selectIndexedExtLoad(MI, MRI); |
5662 | |
5663 | unsigned Opc = 0; |
5664 | if (Ld.isPre()) { |
5665 | static constexpr unsigned GPROpcodes[] = { |
5666 | AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre, |
5667 | AArch64::LDRXpre}; |
5668 | static constexpr unsigned FPROpcodes[] = { |
5669 | AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre, |
5670 | AArch64::LDRQpre}; |
5671 | if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5672 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5673 | else |
5674 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5675 | } else { |
5676 | static constexpr unsigned GPROpcodes[] = { |
5677 | AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost, |
5678 | AArch64::LDRXpost}; |
5679 | static constexpr unsigned FPROpcodes[] = { |
5680 | AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost, |
5681 | AArch64::LDRDpost, AArch64::LDRQpost}; |
5682 | if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5683 | Opc = FPROpcodes[Log2_32(Value: MemSize)]; |
5684 | else |
5685 | Opc = GPROpcodes[Log2_32(Value: MemSize)]; |
5686 | } |
5687 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5688 | if (!Cst) |
5689 | return false; // Shouldn't happen, but just in case. |
5690 | auto LdMI = |
5691 | MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst->getSExtValue()); |
5692 | LdMI.cloneMemRefs(OtherMI: Ld); |
5693 | constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI); |
5694 | MI.eraseFromParent(); |
5695 | return true; |
5696 | } |
5697 | |
5698 | bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I, |
5699 | MachineRegisterInfo &MRI) { |
5700 | Register Dst = I.getWritebackReg(); |
5701 | Register Val = I.getValueReg(); |
5702 | Register Base = I.getBaseReg(); |
5703 | Register Offset = I.getOffsetReg(); |
5704 | LLT ValTy = MRI.getType(Reg: Val); |
5705 | assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store" ); |
5706 | |
5707 | unsigned Opc = 0; |
5708 | if (I.isPre()) { |
5709 | static constexpr unsigned GPROpcodes[] = { |
5710 | AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre, |
5711 | AArch64::STRXpre}; |
5712 | static constexpr unsigned FPROpcodes[] = { |
5713 | AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre, |
5714 | AArch64::STRQpre}; |
5715 | |
5716 | if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5717 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5718 | else |
5719 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5720 | } else { |
5721 | static constexpr unsigned GPROpcodes[] = { |
5722 | AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost, |
5723 | AArch64::STRXpost}; |
5724 | static constexpr unsigned FPROpcodes[] = { |
5725 | AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost, |
5726 | AArch64::STRDpost, AArch64::STRQpost}; |
5727 | |
5728 | if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID) |
5729 | Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5730 | else |
5731 | Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())]; |
5732 | } |
5733 | |
5734 | auto Cst = getIConstantVRegVal(VReg: Offset, MRI); |
5735 | if (!Cst) |
5736 | return false; // Shouldn't happen, but just in case. |
5737 | auto Str = |
5738 | MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst->getSExtValue()); |
5739 | Str.cloneMemRefs(OtherMI: I); |
5740 | constrainSelectedInstRegOperands(I&: *Str, TII, TRI, RBI); |
5741 | I.eraseFromParent(); |
5742 | return true; |
5743 | } |
5744 | |
5745 | MachineInstr * |
5746 | AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, |
5747 | MachineIRBuilder &MIRBuilder, |
5748 | MachineRegisterInfo &MRI) { |
5749 | LLT DstTy = MRI.getType(Reg: Dst); |
5750 | unsigned DstSize = DstTy.getSizeInBits(); |
5751 | if (CV->isNullValue()) { |
5752 | if (DstSize == 128) { |
5753 | auto Mov = |
5754 | MIRBuilder.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {Dst}, SrcOps: {}).addImm(Val: 0); |
5755 | constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI); |
5756 | return &*Mov; |
5757 | } |
5758 | |
5759 | if (DstSize == 64) { |
5760 | auto Mov = |
5761 | MIRBuilder |
5762 | .buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {&AArch64::FPR128RegClass}, SrcOps: {}) |
5763 | .addImm(Val: 0); |
5764 | auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {Dst}, SrcOps: {}) |
5765 | .addReg(RegNo: Mov.getReg(Idx: 0), flags: 0, SubReg: AArch64::dsub); |
5766 | RBI.constrainGenericRegister(Reg: Dst, RC: AArch64::FPR64RegClass, MRI); |
5767 | return &*Copy; |
5768 | } |
5769 | } |
5770 | |
5771 | if (Constant *SplatValue = CV->getSplatValue()) { |
5772 | APInt SplatValueAsInt = |
5773 | isa<ConstantFP>(Val: SplatValue) |
5774 | ? cast<ConstantFP>(Val: SplatValue)->getValueAPF().bitcastToAPInt() |
5775 | : SplatValue->getUniqueInteger(); |
5776 | APInt DefBits = APInt::getSplat( |
5777 | NewLen: DstSize, V: SplatValueAsInt.trunc(width: DstTy.getScalarSizeInBits())); |
5778 | auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * { |
5779 | MachineInstr *NewOp; |
5780 | bool Inv = false; |
5781 | if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5782 | (NewOp = |
5783 | tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5784 | (NewOp = |
5785 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5786 | (NewOp = |
5787 | tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5788 | (NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) || |
5789 | (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder))) |
5790 | return NewOp; |
5791 | |
5792 | DefBits = ~DefBits; |
5793 | Inv = true; |
5794 | if ((NewOp = |
5795 | tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5796 | (NewOp = |
5797 | tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) || |
5798 | (NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv))) |
5799 | return NewOp; |
5800 | return nullptr; |
5801 | }; |
5802 | |
5803 | if (auto *NewOp = TryMOVIWithBits(DefBits)) |
5804 | return NewOp; |
5805 | |
5806 | // See if a fneg of the constant can be materialized with a MOVI, etc |
5807 | auto TryWithFNeg = [&](APInt DefBits, int NumBits, |
5808 | unsigned NegOpc) -> MachineInstr * { |
5809 | // FNegate each sub-element of the constant |
5810 | APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: 1).zext(width: DstSize); |
5811 | APInt NegBits(DstSize, 0); |
5812 | unsigned NumElts = DstSize / NumBits; |
5813 | for (unsigned i = 0; i < NumElts; i++) |
5814 | NegBits |= Neg << (NumBits * i); |
5815 | NegBits = DefBits ^ NegBits; |
5816 | |
5817 | // Try to create the new constants with MOVI, and if so generate a fneg |
5818 | // for it. |
5819 | if (auto *NewOp = TryMOVIWithBits(NegBits)) { |
5820 | Register NewDst = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass); |
5821 | NewOp->getOperand(i: 0).setReg(NewDst); |
5822 | return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst}); |
5823 | } |
5824 | return nullptr; |
5825 | }; |
5826 | MachineInstr *R; |
5827 | if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) || |
5828 | (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) || |
5829 | (STI.hasFullFP16() && |
5830 | (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16)))) |
5831 | return R; |
5832 | } |
5833 | |
5834 | auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder); |
5835 | if (!CPLoad) { |
5836 | LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!" ); |
5837 | return nullptr; |
5838 | } |
5839 | |
5840 | auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: 0)); |
5841 | RBI.constrainGenericRegister( |
5842 | Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: 0).getReg()), MRI); |
5843 | return &*Copy; |
5844 | } |
5845 | |
5846 | bool AArch64InstructionSelector::tryOptConstantBuildVec( |
5847 | MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { |
5848 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5849 | unsigned DstSize = DstTy.getSizeInBits(); |
5850 | assert(DstSize <= 128 && "Unexpected build_vec type!" ); |
5851 | if (DstSize < 32) |
5852 | return false; |
5853 | // Check if we're building a constant vector, in which case we want to |
5854 | // generate a constant pool load instead of a vector insert sequence. |
5855 | SmallVector<Constant *, 16> Csts; |
5856 | for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { |
5857 | // Try to find G_CONSTANT or G_FCONSTANT |
5858 | auto *OpMI = |
5859 | getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI); |
5860 | if (OpMI) |
5861 | Csts.emplace_back( |
5862 | Args: const_cast<ConstantInt *>(OpMI->getOperand(i: 1).getCImm())); |
5863 | else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT, |
5864 | Reg: I.getOperand(i: Idx).getReg(), MRI))) |
5865 | Csts.emplace_back( |
5866 | Args: const_cast<ConstantFP *>(OpMI->getOperand(i: 1).getFPImm())); |
5867 | else |
5868 | return false; |
5869 | } |
5870 | Constant *CV = ConstantVector::get(V: Csts); |
5871 | if (!emitConstantVector(Dst: I.getOperand(i: 0).getReg(), CV, MIRBuilder&: MIB, MRI)) |
5872 | return false; |
5873 | I.eraseFromParent(); |
5874 | return true; |
5875 | } |
5876 | |
5877 | bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( |
5878 | MachineInstr &I, MachineRegisterInfo &MRI) { |
5879 | // Given: |
5880 | // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef |
5881 | // |
5882 | // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. |
5883 | Register Dst = I.getOperand(i: 0).getReg(); |
5884 | Register EltReg = I.getOperand(i: 1).getReg(); |
5885 | LLT EltTy = MRI.getType(Reg: EltReg); |
5886 | // If the index isn't on the same bank as its elements, then this can't be a |
5887 | // SUBREG_TO_REG. |
5888 | const RegisterBank &EltRB = *RBI.getRegBank(Reg: EltReg, MRI, TRI); |
5889 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: Dst, MRI, TRI); |
5890 | if (EltRB != DstRB) |
5891 | return false; |
5892 | if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: 2), P: [&MRI](const MachineOperand &Op) { |
5893 | return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI); |
5894 | })) |
5895 | return false; |
5896 | unsigned SubReg; |
5897 | const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB); |
5898 | if (!EltRC) |
5899 | return false; |
5900 | const TargetRegisterClass *DstRC = |
5901 | getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB); |
5902 | if (!DstRC) |
5903 | return false; |
5904 | if (!getSubRegForClass(RC: EltRC, TRI, SubReg)) |
5905 | return false; |
5906 | auto SubregToReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {}) |
5907 | .addImm(Val: 0) |
5908 | .addUse(RegNo: EltReg) |
5909 | .addImm(Val: SubReg); |
5910 | I.eraseFromParent(); |
5911 | constrainSelectedInstRegOperands(I&: *SubregToReg, TII, TRI, RBI); |
5912 | return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI); |
5913 | } |
5914 | |
5915 | bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, |
5916 | MachineRegisterInfo &MRI) { |
5917 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
5918 | // Until we port more of the optimized selections, for now just use a vector |
5919 | // insert sequence. |
5920 | const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
5921 | const LLT EltTy = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
5922 | unsigned EltSize = EltTy.getSizeInBits(); |
5923 | |
5924 | if (tryOptConstantBuildVec(I, DstTy, MRI)) |
5925 | return true; |
5926 | if (tryOptBuildVecToSubregToReg(I, MRI)) |
5927 | return true; |
5928 | |
5929 | if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64) |
5930 | return false; // Don't support all element types yet. |
5931 | const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI); |
5932 | |
5933 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; |
5934 | MachineInstr *ScalarToVec = |
5935 | emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC, |
5936 | Scalar: I.getOperand(i: 1).getReg(), MIRBuilder&: MIB); |
5937 | if (!ScalarToVec) |
5938 | return false; |
5939 | |
5940 | Register DstVec = ScalarToVec->getOperand(i: 0).getReg(); |
5941 | unsigned DstSize = DstTy.getSizeInBits(); |
5942 | |
5943 | // Keep track of the last MI we inserted. Later on, we might be able to save |
5944 | // a copy using it. |
5945 | MachineInstr *PrevMI = ScalarToVec; |
5946 | for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { |
5947 | // Note that if we don't do a subregister copy, we can end up making an |
5948 | // extra register. |
5949 | Register OpReg = I.getOperand(i).getReg(); |
5950 | // Do not emit inserts for undefs |
5951 | if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) { |
5952 | PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - 1, RB, MIRBuilder&: MIB); |
5953 | DstVec = PrevMI->getOperand(i: 0).getReg(); |
5954 | } |
5955 | } |
5956 | |
5957 | // If DstTy's size in bits is less than 128, then emit a subregister copy |
5958 | // from DstVec to the last register we've defined. |
5959 | if (DstSize < 128) { |
5960 | // Force this to be FPR using the destination vector. |
5961 | const TargetRegisterClass *RC = |
5962 | getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI)); |
5963 | if (!RC) |
5964 | return false; |
5965 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { |
5966 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n" ); |
5967 | return false; |
5968 | } |
5969 | |
5970 | unsigned SubReg = 0; |
5971 | if (!getSubRegForClass(RC, TRI, SubReg)) |
5972 | return false; |
5973 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { |
5974 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize |
5975 | << "\n" ); |
5976 | return false; |
5977 | } |
5978 | |
5979 | Register Reg = MRI.createVirtualRegister(RegClass: RC); |
5980 | Register DstReg = I.getOperand(i: 0).getReg(); |
5981 | |
5982 | MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: 0, SubReg); |
5983 | MachineOperand &RegOp = I.getOperand(i: 1); |
5984 | RegOp.setReg(Reg); |
5985 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
5986 | } else { |
5987 | // We either have a vector with all elements (except the first one) undef or |
5988 | // at least one non-undef non-first element. In the first case, we need to |
5989 | // constrain the output register ourselves as we may have generated an |
5990 | // INSERT_SUBREG operation which is a generic operation for which the |
5991 | // output regclass cannot be automatically chosen. |
5992 | // |
5993 | // In the second case, there is no need to do this as it may generate an |
5994 | // instruction like INSvi32gpr where the regclass can be automatically |
5995 | // chosen. |
5996 | // |
5997 | // Also, we save a copy by re-using the destination register on the final |
5998 | // insert. |
5999 | PrevMI->getOperand(i: 0).setReg(I.getOperand(i: 0).getReg()); |
6000 | constrainSelectedInstRegOperands(I&: *PrevMI, TII, TRI, RBI); |
6001 | |
6002 | Register DstReg = PrevMI->getOperand(i: 0).getReg(); |
6003 | if (PrevMI == ScalarToVec && DstReg.isVirtual()) { |
6004 | const TargetRegisterClass *RC = |
6005 | getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI)); |
6006 | RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI); |
6007 | } |
6008 | } |
6009 | |
6010 | I.eraseFromParent(); |
6011 | return true; |
6012 | } |
6013 | |
6014 | bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, |
6015 | unsigned NumVecs, |
6016 | MachineInstr &I) { |
6017 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
6018 | assert(Opc && "Expected an opcode?" ); |
6019 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
6020 | auto &MRI = *MIB.getMRI(); |
6021 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6022 | unsigned Size = Ty.getSizeInBits(); |
6023 | assert((Size == 64 || Size == 128) && |
6024 | "Destination must be 64 bits or 128 bits?" ); |
6025 | unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; |
6026 | auto Ptr = I.getOperand(i: I.getNumOperands() - 1).getReg(); |
6027 | assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?" ); |
6028 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr}); |
6029 | Load.cloneMemRefs(OtherMI: I); |
6030 | constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI); |
6031 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
6032 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
6033 | auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {}) |
6034 | .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx); |
6035 | // Emit the subreg copies and immediately select them. |
6036 | // FIXME: We should refactor our copy code into an emitCopy helper and |
6037 | // clean up uses of this pattern elsewhere in the selector. |
6038 | selectCopy(I&: *Vec, TII, MRI, TRI, RBI); |
6039 | } |
6040 | return true; |
6041 | } |
6042 | |
6043 | bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic( |
6044 | unsigned Opc, unsigned NumVecs, MachineInstr &I) { |
6045 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); |
6046 | assert(Opc && "Expected an opcode?" ); |
6047 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors" ); |
6048 | auto &MRI = *MIB.getMRI(); |
6049 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6050 | bool Narrow = Ty.getSizeInBits() == 64; |
6051 | |
6052 | auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1; |
6053 | SmallVector<Register, 4> Regs(NumVecs); |
6054 | std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(), |
6055 | unary_op: [](auto MO) { return MO.getReg(); }); |
6056 | |
6057 | if (Narrow) { |
6058 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
6059 | return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB) |
6060 | ->getOperand(i: 0) |
6061 | .getReg(); |
6062 | }); |
6063 | Ty = Ty.multiplyElements(Factor: 2); |
6064 | } |
6065 | |
6066 | Register Tuple = createQTuple(Regs, MIB); |
6067 | auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI); |
6068 | if (!LaneNo) |
6069 | return false; |
6070 | |
6071 | Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg(); |
6072 | auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {}) |
6073 | .addReg(RegNo: Tuple) |
6074 | .addImm(Val: LaneNo->getZExtValue()) |
6075 | .addReg(RegNo: Ptr); |
6076 | Load.cloneMemRefs(OtherMI: I); |
6077 | constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI); |
6078 | Register SelectedLoadDst = Load->getOperand(i: 0).getReg(); |
6079 | unsigned SubReg = AArch64::qsub0; |
6080 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { |
6081 | auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, |
6082 | DstOps: {Narrow ? DstOp(&AArch64::FPR128RegClass) |
6083 | : DstOp(I.getOperand(i: Idx).getReg())}, |
6084 | SrcOps: {}) |
6085 | .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx); |
6086 | Register WideReg = Vec.getReg(Idx: 0); |
6087 | // Emit the subreg copies and immediately select them. |
6088 | selectCopy(I&: *Vec, TII, MRI, TRI, RBI); |
6089 | if (Narrow && |
6090 | !emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI)) |
6091 | return false; |
6092 | } |
6093 | return true; |
6094 | } |
6095 | |
6096 | void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I, |
6097 | unsigned NumVecs, |
6098 | unsigned Opc) { |
6099 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
6100 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6101 | Register Ptr = I.getOperand(i: 1 + NumVecs).getReg(); |
6102 | |
6103 | SmallVector<Register, 2> Regs(NumVecs); |
6104 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
6105 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
6106 | |
6107 | Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) |
6108 | : createDTuple(Regs, MIB); |
6109 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr}); |
6110 | Store.cloneMemRefs(OtherMI: I); |
6111 | constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI); |
6112 | } |
6113 | |
6114 | bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic( |
6115 | MachineInstr &I, unsigned NumVecs, unsigned Opc) { |
6116 | MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo(); |
6117 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6118 | bool Narrow = Ty.getSizeInBits() == 64; |
6119 | |
6120 | SmallVector<Register, 2> Regs(NumVecs); |
6121 | std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs, |
6122 | result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); }); |
6123 | |
6124 | if (Narrow) |
6125 | transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) { |
6126 | return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB) |
6127 | ->getOperand(i: 0) |
6128 | .getReg(); |
6129 | }); |
6130 | |
6131 | Register Tuple = createQTuple(Regs, MIB); |
6132 | |
6133 | auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: 1 + NumVecs).getReg(), MRI); |
6134 | if (!LaneNo) |
6135 | return false; |
6136 | Register Ptr = I.getOperand(i: 1 + NumVecs + 1).getReg(); |
6137 | auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {}) |
6138 | .addReg(RegNo: Tuple) |
6139 | .addImm(Val: LaneNo->getZExtValue()) |
6140 | .addReg(RegNo: Ptr); |
6141 | Store.cloneMemRefs(OtherMI: I); |
6142 | constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI); |
6143 | return true; |
6144 | } |
6145 | |
6146 | bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( |
6147 | MachineInstr &I, MachineRegisterInfo &MRI) { |
6148 | // Find the intrinsic ID. |
6149 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
6150 | |
6151 | const LLT S8 = LLT::scalar(SizeInBits: 8); |
6152 | const LLT S16 = LLT::scalar(SizeInBits: 16); |
6153 | const LLT S32 = LLT::scalar(SizeInBits: 32); |
6154 | const LLT S64 = LLT::scalar(SizeInBits: 64); |
6155 | const LLT P0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
6156 | // Select the instruction. |
6157 | switch (IntrinID) { |
6158 | default: |
6159 | return false; |
6160 | case Intrinsic::aarch64_ldxp: |
6161 | case Intrinsic::aarch64_ldaxp: { |
6162 | auto NewI = MIB.buildInstr( |
6163 | Opc: IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, |
6164 | DstOps: {I.getOperand(i: 0).getReg(), I.getOperand(i: 1).getReg()}, |
6165 | SrcOps: {I.getOperand(i: 3)}); |
6166 | NewI.cloneMemRefs(OtherMI: I); |
6167 | constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI); |
6168 | break; |
6169 | } |
6170 | case Intrinsic::aarch64_neon_ld1x2: { |
6171 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6172 | unsigned Opc = 0; |
6173 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6174 | Opc = AArch64::LD1Twov8b; |
6175 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6176 | Opc = AArch64::LD1Twov16b; |
6177 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6178 | Opc = AArch64::LD1Twov4h; |
6179 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6180 | Opc = AArch64::LD1Twov8h; |
6181 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6182 | Opc = AArch64::LD1Twov2s; |
6183 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6184 | Opc = AArch64::LD1Twov4s; |
6185 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6186 | Opc = AArch64::LD1Twov2d; |
6187 | else if (Ty == S64 || Ty == P0) |
6188 | Opc = AArch64::LD1Twov1d; |
6189 | else |
6190 | llvm_unreachable("Unexpected type for ld1x2!" ); |
6191 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6192 | break; |
6193 | } |
6194 | case Intrinsic::aarch64_neon_ld1x3: { |
6195 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6196 | unsigned Opc = 0; |
6197 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6198 | Opc = AArch64::LD1Threev8b; |
6199 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6200 | Opc = AArch64::LD1Threev16b; |
6201 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6202 | Opc = AArch64::LD1Threev4h; |
6203 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6204 | Opc = AArch64::LD1Threev8h; |
6205 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6206 | Opc = AArch64::LD1Threev2s; |
6207 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6208 | Opc = AArch64::LD1Threev4s; |
6209 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6210 | Opc = AArch64::LD1Threev2d; |
6211 | else if (Ty == S64 || Ty == P0) |
6212 | Opc = AArch64::LD1Threev1d; |
6213 | else |
6214 | llvm_unreachable("Unexpected type for ld1x3!" ); |
6215 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6216 | break; |
6217 | } |
6218 | case Intrinsic::aarch64_neon_ld1x4: { |
6219 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6220 | unsigned Opc = 0; |
6221 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6222 | Opc = AArch64::LD1Fourv8b; |
6223 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6224 | Opc = AArch64::LD1Fourv16b; |
6225 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6226 | Opc = AArch64::LD1Fourv4h; |
6227 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6228 | Opc = AArch64::LD1Fourv8h; |
6229 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6230 | Opc = AArch64::LD1Fourv2s; |
6231 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6232 | Opc = AArch64::LD1Fourv4s; |
6233 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6234 | Opc = AArch64::LD1Fourv2d; |
6235 | else if (Ty == S64 || Ty == P0) |
6236 | Opc = AArch64::LD1Fourv1d; |
6237 | else |
6238 | llvm_unreachable("Unexpected type for ld1x4!" ); |
6239 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6240 | break; |
6241 | } |
6242 | case Intrinsic::aarch64_neon_ld2: { |
6243 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6244 | unsigned Opc = 0; |
6245 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6246 | Opc = AArch64::LD2Twov8b; |
6247 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6248 | Opc = AArch64::LD2Twov16b; |
6249 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6250 | Opc = AArch64::LD2Twov4h; |
6251 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6252 | Opc = AArch64::LD2Twov8h; |
6253 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6254 | Opc = AArch64::LD2Twov2s; |
6255 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6256 | Opc = AArch64::LD2Twov4s; |
6257 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6258 | Opc = AArch64::LD2Twov2d; |
6259 | else if (Ty == S64 || Ty == P0) |
6260 | Opc = AArch64::LD1Twov1d; |
6261 | else |
6262 | llvm_unreachable("Unexpected type for ld2!" ); |
6263 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6264 | break; |
6265 | } |
6266 | case Intrinsic::aarch64_neon_ld2lane: { |
6267 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6268 | unsigned Opc; |
6269 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6270 | Opc = AArch64::LD2i8; |
6271 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6272 | Opc = AArch64::LD2i16; |
6273 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6274 | Opc = AArch64::LD2i32; |
6275 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6276 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6277 | Opc = AArch64::LD2i64; |
6278 | else |
6279 | llvm_unreachable("Unexpected type for st2lane!" ); |
6280 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 2, I)) |
6281 | return false; |
6282 | break; |
6283 | } |
6284 | case Intrinsic::aarch64_neon_ld2r: { |
6285 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6286 | unsigned Opc = 0; |
6287 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6288 | Opc = AArch64::LD2Rv8b; |
6289 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6290 | Opc = AArch64::LD2Rv16b; |
6291 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6292 | Opc = AArch64::LD2Rv4h; |
6293 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6294 | Opc = AArch64::LD2Rv8h; |
6295 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6296 | Opc = AArch64::LD2Rv2s; |
6297 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6298 | Opc = AArch64::LD2Rv4s; |
6299 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6300 | Opc = AArch64::LD2Rv2d; |
6301 | else if (Ty == S64 || Ty == P0) |
6302 | Opc = AArch64::LD2Rv1d; |
6303 | else |
6304 | llvm_unreachable("Unexpected type for ld2r!" ); |
6305 | selectVectorLoadIntrinsic(Opc, NumVecs: 2, I); |
6306 | break; |
6307 | } |
6308 | case Intrinsic::aarch64_neon_ld3: { |
6309 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6310 | unsigned Opc = 0; |
6311 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6312 | Opc = AArch64::LD3Threev8b; |
6313 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6314 | Opc = AArch64::LD3Threev16b; |
6315 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6316 | Opc = AArch64::LD3Threev4h; |
6317 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6318 | Opc = AArch64::LD3Threev8h; |
6319 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6320 | Opc = AArch64::LD3Threev2s; |
6321 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6322 | Opc = AArch64::LD3Threev4s; |
6323 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6324 | Opc = AArch64::LD3Threev2d; |
6325 | else if (Ty == S64 || Ty == P0) |
6326 | Opc = AArch64::LD1Threev1d; |
6327 | else |
6328 | llvm_unreachable("Unexpected type for ld3!" ); |
6329 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6330 | break; |
6331 | } |
6332 | case Intrinsic::aarch64_neon_ld3lane: { |
6333 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6334 | unsigned Opc; |
6335 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6336 | Opc = AArch64::LD3i8; |
6337 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6338 | Opc = AArch64::LD3i16; |
6339 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6340 | Opc = AArch64::LD3i32; |
6341 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6342 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6343 | Opc = AArch64::LD3i64; |
6344 | else |
6345 | llvm_unreachable("Unexpected type for st3lane!" ); |
6346 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 3, I)) |
6347 | return false; |
6348 | break; |
6349 | } |
6350 | case Intrinsic::aarch64_neon_ld3r: { |
6351 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6352 | unsigned Opc = 0; |
6353 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6354 | Opc = AArch64::LD3Rv8b; |
6355 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6356 | Opc = AArch64::LD3Rv16b; |
6357 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6358 | Opc = AArch64::LD3Rv4h; |
6359 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6360 | Opc = AArch64::LD3Rv8h; |
6361 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6362 | Opc = AArch64::LD3Rv2s; |
6363 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6364 | Opc = AArch64::LD3Rv4s; |
6365 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6366 | Opc = AArch64::LD3Rv2d; |
6367 | else if (Ty == S64 || Ty == P0) |
6368 | Opc = AArch64::LD3Rv1d; |
6369 | else |
6370 | llvm_unreachable("Unexpected type for ld3r!" ); |
6371 | selectVectorLoadIntrinsic(Opc, NumVecs: 3, I); |
6372 | break; |
6373 | } |
6374 | case Intrinsic::aarch64_neon_ld4: { |
6375 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6376 | unsigned Opc = 0; |
6377 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6378 | Opc = AArch64::LD4Fourv8b; |
6379 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6380 | Opc = AArch64::LD4Fourv16b; |
6381 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6382 | Opc = AArch64::LD4Fourv4h; |
6383 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6384 | Opc = AArch64::LD4Fourv8h; |
6385 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6386 | Opc = AArch64::LD4Fourv2s; |
6387 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6388 | Opc = AArch64::LD4Fourv4s; |
6389 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6390 | Opc = AArch64::LD4Fourv2d; |
6391 | else if (Ty == S64 || Ty == P0) |
6392 | Opc = AArch64::LD1Fourv1d; |
6393 | else |
6394 | llvm_unreachable("Unexpected type for ld4!" ); |
6395 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6396 | break; |
6397 | } |
6398 | case Intrinsic::aarch64_neon_ld4lane: { |
6399 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6400 | unsigned Opc; |
6401 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6402 | Opc = AArch64::LD4i8; |
6403 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6404 | Opc = AArch64::LD4i16; |
6405 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6406 | Opc = AArch64::LD4i32; |
6407 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6408 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6409 | Opc = AArch64::LD4i64; |
6410 | else |
6411 | llvm_unreachable("Unexpected type for st4lane!" ); |
6412 | if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 4, I)) |
6413 | return false; |
6414 | break; |
6415 | } |
6416 | case Intrinsic::aarch64_neon_ld4r: { |
6417 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg()); |
6418 | unsigned Opc = 0; |
6419 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6420 | Opc = AArch64::LD4Rv8b; |
6421 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6422 | Opc = AArch64::LD4Rv16b; |
6423 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6424 | Opc = AArch64::LD4Rv4h; |
6425 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6426 | Opc = AArch64::LD4Rv8h; |
6427 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6428 | Opc = AArch64::LD4Rv2s; |
6429 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6430 | Opc = AArch64::LD4Rv4s; |
6431 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6432 | Opc = AArch64::LD4Rv2d; |
6433 | else if (Ty == S64 || Ty == P0) |
6434 | Opc = AArch64::LD4Rv1d; |
6435 | else |
6436 | llvm_unreachable("Unexpected type for ld4r!" ); |
6437 | selectVectorLoadIntrinsic(Opc, NumVecs: 4, I); |
6438 | break; |
6439 | } |
6440 | case Intrinsic::aarch64_neon_st1x2: { |
6441 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6442 | unsigned Opc; |
6443 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6444 | Opc = AArch64::ST1Twov8b; |
6445 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6446 | Opc = AArch64::ST1Twov16b; |
6447 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6448 | Opc = AArch64::ST1Twov4h; |
6449 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6450 | Opc = AArch64::ST1Twov8h; |
6451 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6452 | Opc = AArch64::ST1Twov2s; |
6453 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6454 | Opc = AArch64::ST1Twov4s; |
6455 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6456 | Opc = AArch64::ST1Twov2d; |
6457 | else if (Ty == S64 || Ty == P0) |
6458 | Opc = AArch64::ST1Twov1d; |
6459 | else |
6460 | llvm_unreachable("Unexpected type for st1x2!" ); |
6461 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6462 | break; |
6463 | } |
6464 | case Intrinsic::aarch64_neon_st1x3: { |
6465 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6466 | unsigned Opc; |
6467 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6468 | Opc = AArch64::ST1Threev8b; |
6469 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6470 | Opc = AArch64::ST1Threev16b; |
6471 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6472 | Opc = AArch64::ST1Threev4h; |
6473 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6474 | Opc = AArch64::ST1Threev8h; |
6475 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6476 | Opc = AArch64::ST1Threev2s; |
6477 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6478 | Opc = AArch64::ST1Threev4s; |
6479 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6480 | Opc = AArch64::ST1Threev2d; |
6481 | else if (Ty == S64 || Ty == P0) |
6482 | Opc = AArch64::ST1Threev1d; |
6483 | else |
6484 | llvm_unreachable("Unexpected type for st1x3!" ); |
6485 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6486 | break; |
6487 | } |
6488 | case Intrinsic::aarch64_neon_st1x4: { |
6489 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6490 | unsigned Opc; |
6491 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6492 | Opc = AArch64::ST1Fourv8b; |
6493 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6494 | Opc = AArch64::ST1Fourv16b; |
6495 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6496 | Opc = AArch64::ST1Fourv4h; |
6497 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6498 | Opc = AArch64::ST1Fourv8h; |
6499 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6500 | Opc = AArch64::ST1Fourv2s; |
6501 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6502 | Opc = AArch64::ST1Fourv4s; |
6503 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6504 | Opc = AArch64::ST1Fourv2d; |
6505 | else if (Ty == S64 || Ty == P0) |
6506 | Opc = AArch64::ST1Fourv1d; |
6507 | else |
6508 | llvm_unreachable("Unexpected type for st1x4!" ); |
6509 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6510 | break; |
6511 | } |
6512 | case Intrinsic::aarch64_neon_st2: { |
6513 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6514 | unsigned Opc; |
6515 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6516 | Opc = AArch64::ST2Twov8b; |
6517 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6518 | Opc = AArch64::ST2Twov16b; |
6519 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6520 | Opc = AArch64::ST2Twov4h; |
6521 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6522 | Opc = AArch64::ST2Twov8h; |
6523 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6524 | Opc = AArch64::ST2Twov2s; |
6525 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6526 | Opc = AArch64::ST2Twov4s; |
6527 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6528 | Opc = AArch64::ST2Twov2d; |
6529 | else if (Ty == S64 || Ty == P0) |
6530 | Opc = AArch64::ST1Twov1d; |
6531 | else |
6532 | llvm_unreachable("Unexpected type for st2!" ); |
6533 | selectVectorStoreIntrinsic(I, NumVecs: 2, Opc); |
6534 | break; |
6535 | } |
6536 | case Intrinsic::aarch64_neon_st3: { |
6537 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6538 | unsigned Opc; |
6539 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6540 | Opc = AArch64::ST3Threev8b; |
6541 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6542 | Opc = AArch64::ST3Threev16b; |
6543 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6544 | Opc = AArch64::ST3Threev4h; |
6545 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6546 | Opc = AArch64::ST3Threev8h; |
6547 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6548 | Opc = AArch64::ST3Threev2s; |
6549 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6550 | Opc = AArch64::ST3Threev4s; |
6551 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6552 | Opc = AArch64::ST3Threev2d; |
6553 | else if (Ty == S64 || Ty == P0) |
6554 | Opc = AArch64::ST1Threev1d; |
6555 | else |
6556 | llvm_unreachable("Unexpected type for st3!" ); |
6557 | selectVectorStoreIntrinsic(I, NumVecs: 3, Opc); |
6558 | break; |
6559 | } |
6560 | case Intrinsic::aarch64_neon_st4: { |
6561 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6562 | unsigned Opc; |
6563 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8)) |
6564 | Opc = AArch64::ST4Fourv8b; |
6565 | else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6566 | Opc = AArch64::ST4Fourv16b; |
6567 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16)) |
6568 | Opc = AArch64::ST4Fourv4h; |
6569 | else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6570 | Opc = AArch64::ST4Fourv8h; |
6571 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32)) |
6572 | Opc = AArch64::ST4Fourv2s; |
6573 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6574 | Opc = AArch64::ST4Fourv4s; |
6575 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0)) |
6576 | Opc = AArch64::ST4Fourv2d; |
6577 | else if (Ty == S64 || Ty == P0) |
6578 | Opc = AArch64::ST1Fourv1d; |
6579 | else |
6580 | llvm_unreachable("Unexpected type for st4!" ); |
6581 | selectVectorStoreIntrinsic(I, NumVecs: 4, Opc); |
6582 | break; |
6583 | } |
6584 | case Intrinsic::aarch64_neon_st2lane: { |
6585 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6586 | unsigned Opc; |
6587 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6588 | Opc = AArch64::ST2i8; |
6589 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6590 | Opc = AArch64::ST2i16; |
6591 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6592 | Opc = AArch64::ST2i32; |
6593 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6594 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6595 | Opc = AArch64::ST2i64; |
6596 | else |
6597 | llvm_unreachable("Unexpected type for st2lane!" ); |
6598 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 2, Opc)) |
6599 | return false; |
6600 | break; |
6601 | } |
6602 | case Intrinsic::aarch64_neon_st3lane: { |
6603 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6604 | unsigned Opc; |
6605 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6606 | Opc = AArch64::ST3i8; |
6607 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6608 | Opc = AArch64::ST3i16; |
6609 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6610 | Opc = AArch64::ST3i32; |
6611 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6612 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6613 | Opc = AArch64::ST3i64; |
6614 | else |
6615 | llvm_unreachable("Unexpected type for st3lane!" ); |
6616 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 3, Opc)) |
6617 | return false; |
6618 | break; |
6619 | } |
6620 | case Intrinsic::aarch64_neon_st4lane: { |
6621 | LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg()); |
6622 | unsigned Opc; |
6623 | if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8)) |
6624 | Opc = AArch64::ST4i8; |
6625 | else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16)) |
6626 | Opc = AArch64::ST4i16; |
6627 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32)) |
6628 | Opc = AArch64::ST4i32; |
6629 | else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || |
6630 | Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0) |
6631 | Opc = AArch64::ST4i64; |
6632 | else |
6633 | llvm_unreachable("Unexpected type for st4lane!" ); |
6634 | if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 4, Opc)) |
6635 | return false; |
6636 | break; |
6637 | } |
6638 | case Intrinsic::aarch64_mops_memset_tag: { |
6639 | // Transform |
6640 | // %dst:gpr(p0) = \ |
6641 | // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), |
6642 | // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) |
6643 | // where %dst is updated, into |
6644 | // %Rd:GPR64common, %Rn:GPR64) = \ |
6645 | // MOPSMemorySetTaggingPseudo \ |
6646 | // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 |
6647 | // where Rd and Rn are tied. |
6648 | // It is expected that %val has been extended to s64 in legalization. |
6649 | // Note that the order of the size/value operands are swapped. |
6650 | |
6651 | Register DstDef = I.getOperand(i: 0).getReg(); |
6652 | // I.getOperand(1) is the intrinsic function |
6653 | Register DstUse = I.getOperand(i: 2).getReg(); |
6654 | Register ValUse = I.getOperand(i: 3).getReg(); |
6655 | Register SizeUse = I.getOperand(i: 4).getReg(); |
6656 | |
6657 | // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. |
6658 | // Therefore an additional virtual register is required for the updated size |
6659 | // operand. This value is not accessible via the semantics of the intrinsic. |
6660 | Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64)); |
6661 | |
6662 | auto Memset = MIB.buildInstr(Opc: AArch64::MOPSMemorySetTaggingPseudo, |
6663 | DstOps: {DstDef, SizeDef}, SrcOps: {DstUse, SizeUse, ValUse}); |
6664 | Memset.cloneMemRefs(OtherMI: I); |
6665 | constrainSelectedInstRegOperands(I&: *Memset, TII, TRI, RBI); |
6666 | break; |
6667 | } |
6668 | } |
6669 | |
6670 | I.eraseFromParent(); |
6671 | return true; |
6672 | } |
6673 | |
6674 | bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, |
6675 | MachineRegisterInfo &MRI) { |
6676 | unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID(); |
6677 | |
6678 | switch (IntrinID) { |
6679 | default: |
6680 | break; |
6681 | case Intrinsic::aarch64_crypto_sha1h: { |
6682 | Register DstReg = I.getOperand(i: 0).getReg(); |
6683 | Register SrcReg = I.getOperand(i: 2).getReg(); |
6684 | |
6685 | // FIXME: Should this be an assert? |
6686 | if (MRI.getType(Reg: DstReg).getSizeInBits() != 32 || |
6687 | MRI.getType(Reg: SrcReg).getSizeInBits() != 32) |
6688 | return false; |
6689 | |
6690 | // The operation has to happen on FPRs. Set up some new FPR registers for |
6691 | // the source and destination if they are on GPRs. |
6692 | if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { |
6693 | SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass); |
6694 | MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: 2)}); |
6695 | |
6696 | // Make sure the copy ends up getting constrained properly. |
6697 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 2).getReg(), |
6698 | RC: AArch64::GPR32RegClass, MRI); |
6699 | } |
6700 | |
6701 | if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) |
6702 | DstReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass); |
6703 | |
6704 | // Actually insert the instruction. |
6705 | auto SHA1Inst = MIB.buildInstr(Opc: AArch64::SHA1Hrr, DstOps: {DstReg}, SrcOps: {SrcReg}); |
6706 | constrainSelectedInstRegOperands(I&: *SHA1Inst, TII, TRI, RBI); |
6707 | |
6708 | // Did we create a new register for the destination? |
6709 | if (DstReg != I.getOperand(i: 0).getReg()) { |
6710 | // Yep. Copy the result of the instruction back into the original |
6711 | // destination. |
6712 | MIB.buildCopy(Res: {I.getOperand(i: 0)}, Op: {DstReg}); |
6713 | RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), |
6714 | RC: AArch64::GPR32RegClass, MRI); |
6715 | } |
6716 | |
6717 | I.eraseFromParent(); |
6718 | return true; |
6719 | } |
6720 | case Intrinsic::ptrauth_resign: { |
6721 | Register DstReg = I.getOperand(i: 0).getReg(); |
6722 | Register ValReg = I.getOperand(i: 2).getReg(); |
6723 | uint64_t AUTKey = I.getOperand(i: 3).getImm(); |
6724 | Register AUTDisc = I.getOperand(i: 4).getReg(); |
6725 | uint64_t PACKey = I.getOperand(i: 5).getImm(); |
6726 | Register PACDisc = I.getOperand(i: 6).getReg(); |
6727 | |
6728 | Register AUTAddrDisc = AUTDisc; |
6729 | uint16_t AUTConstDiscC = 0; |
6730 | std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) = |
6731 | extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI); |
6732 | |
6733 | Register PACAddrDisc = PACDisc; |
6734 | uint16_t PACConstDiscC = 0; |
6735 | std::tie(args&: PACConstDiscC, args&: PACAddrDisc) = |
6736 | extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI); |
6737 | |
6738 | MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg}); |
6739 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
6740 | MIB.buildInstr(Opcode: AArch64::AUTPAC) |
6741 | .addImm(Val: AUTKey) |
6742 | .addImm(Val: AUTConstDiscC) |
6743 | .addUse(RegNo: AUTAddrDisc) |
6744 | .addImm(Val: PACKey) |
6745 | .addImm(Val: PACConstDiscC) |
6746 | .addUse(RegNo: PACAddrDisc) |
6747 | .constrainAllUses(TII, TRI, RBI); |
6748 | MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16)); |
6749 | |
6750 | RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI); |
6751 | I.eraseFromParent(); |
6752 | return true; |
6753 | } |
6754 | case Intrinsic::ptrauth_auth: { |
6755 | Register DstReg = I.getOperand(i: 0).getReg(); |
6756 | Register ValReg = I.getOperand(i: 2).getReg(); |
6757 | uint64_t AUTKey = I.getOperand(i: 3).getImm(); |
6758 | Register AUTDisc = I.getOperand(i: 4).getReg(); |
6759 | |
6760 | Register AUTAddrDisc = AUTDisc; |
6761 | uint16_t AUTConstDiscC = 0; |
6762 | std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) = |
6763 | extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI); |
6764 | |
6765 | MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg}); |
6766 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
6767 | MIB.buildInstr(Opcode: AArch64::AUT) |
6768 | .addImm(Val: AUTKey) |
6769 | .addImm(Val: AUTConstDiscC) |
6770 | .addUse(RegNo: AUTAddrDisc) |
6771 | .constrainAllUses(TII, TRI, RBI); |
6772 | MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16)); |
6773 | |
6774 | RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI); |
6775 | I.eraseFromParent(); |
6776 | return true; |
6777 | } |
6778 | case Intrinsic::frameaddress: |
6779 | case Intrinsic::returnaddress: { |
6780 | MachineFunction &MF = *I.getParent()->getParent(); |
6781 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6782 | |
6783 | unsigned Depth = I.getOperand(i: 2).getImm(); |
6784 | Register DstReg = I.getOperand(i: 0).getReg(); |
6785 | RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI); |
6786 | |
6787 | if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { |
6788 | if (!MFReturnAddr) { |
6789 | // Insert the copy from LR/X30 into the entry block, before it can be |
6790 | // clobbered by anything. |
6791 | MFI.setReturnAddressIsTaken(true); |
6792 | MFReturnAddr = getFunctionLiveInPhysReg( |
6793 | MF, TII, PhysReg: AArch64::LR, RC: AArch64::GPR64RegClass, DL: I.getDebugLoc()); |
6794 | } |
6795 | |
6796 | if (STI.hasPAuth()) { |
6797 | MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {MFReturnAddr}); |
6798 | } else { |
6799 | MIB.buildCopy(Res: {Register(AArch64::LR)}, Op: {MFReturnAddr}); |
6800 | MIB.buildInstr(Opcode: AArch64::XPACLRI); |
6801 | MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)}); |
6802 | } |
6803 | |
6804 | I.eraseFromParent(); |
6805 | return true; |
6806 | } |
6807 | |
6808 | MFI.setFrameAddressIsTaken(true); |
6809 | Register FrameAddr(AArch64::FP); |
6810 | while (Depth--) { |
6811 | Register NextFrame = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass); |
6812 | auto Ldr = |
6813 | MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {NextFrame}, SrcOps: {FrameAddr}).addImm(Val: 0); |
6814 | constrainSelectedInstRegOperands(I&: *Ldr, TII, TRI, RBI); |
6815 | FrameAddr = NextFrame; |
6816 | } |
6817 | |
6818 | if (IntrinID == Intrinsic::frameaddress) |
6819 | MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr}); |
6820 | else { |
6821 | MFI.setReturnAddressIsTaken(true); |
6822 | |
6823 | if (STI.hasPAuth()) { |
6824 | Register TmpReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
6825 | MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {TmpReg}, SrcOps: {FrameAddr}).addImm(Val: 1); |
6826 | MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {TmpReg}); |
6827 | } else { |
6828 | MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {Register(AArch64::LR)}, SrcOps: {FrameAddr}) |
6829 | .addImm(Val: 1); |
6830 | MIB.buildInstr(Opcode: AArch64::XPACLRI); |
6831 | MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)}); |
6832 | } |
6833 | } |
6834 | |
6835 | I.eraseFromParent(); |
6836 | return true; |
6837 | } |
6838 | case Intrinsic::aarch64_neon_tbl2: |
6839 | SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBLv8i8Two, Opc2: AArch64::TBLv16i8Two, isExt: false); |
6840 | return true; |
6841 | case Intrinsic::aarch64_neon_tbl3: |
6842 | SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBLv8i8Three, Opc2: AArch64::TBLv16i8Three, |
6843 | isExt: false); |
6844 | return true; |
6845 | case Intrinsic::aarch64_neon_tbl4: |
6846 | SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBLv8i8Four, Opc2: AArch64::TBLv16i8Four, isExt: false); |
6847 | return true; |
6848 | case Intrinsic::aarch64_neon_tbx2: |
6849 | SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBXv8i8Two, Opc2: AArch64::TBXv16i8Two, isExt: true); |
6850 | return true; |
6851 | case Intrinsic::aarch64_neon_tbx3: |
6852 | SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBXv8i8Three, Opc2: AArch64::TBXv16i8Three, isExt: true); |
6853 | return true; |
6854 | case Intrinsic::aarch64_neon_tbx4: |
6855 | SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBXv8i8Four, Opc2: AArch64::TBXv16i8Four, isExt: true); |
6856 | return true; |
6857 | case Intrinsic::swift_async_context_addr: |
6858 | auto Sub = MIB.buildInstr(Opc: AArch64::SUBXri, DstOps: {I.getOperand(i: 0).getReg()}, |
6859 | SrcOps: {Register(AArch64::FP)}) |
6860 | .addImm(Val: 8) |
6861 | .addImm(Val: 0); |
6862 | constrainSelectedInstRegOperands(I&: *Sub, TII, TRI, RBI); |
6863 | |
6864 | MF->getFrameInfo().setFrameAddressIsTaken(true); |
6865 | MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
6866 | I.eraseFromParent(); |
6867 | return true; |
6868 | } |
6869 | return false; |
6870 | } |
6871 | |
6872 | // G_PTRAUTH_GLOBAL_VALUE lowering |
6873 | // |
6874 | // We have 3 lowering alternatives to choose from: |
6875 | // - MOVaddrPAC: similar to MOVaddr, with added PAC. |
6876 | // If the GV doesn't need a GOT load (i.e., is locally defined) |
6877 | // materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC. |
6878 | // |
6879 | // - LOADgotPAC: similar to LOADgot, with added PAC. |
6880 | // If the GV needs a GOT load, materialize the pointer using the usual |
6881 | // GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT |
6882 | // section is assumed to be read-only (for example, via relro mechanism). See |
6883 | // LowerMOVaddrPAC. |
6884 | // |
6885 | // - LOADauthptrstatic: similar to LOADgot, but use a |
6886 | // special stub slot instead of a GOT slot. |
6887 | // Load a signed pointer for symbol 'sym' from a stub slot named |
6888 | // 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation |
6889 | // resolving. This usually lowers to adrp+ldr, but also emits an entry into |
6890 | // .data with an |
6891 | // @AUTH relocation. See LowerLOADauthptrstatic. |
6892 | // |
6893 | // All 3 are pseudos that are expand late to longer sequences: this lets us |
6894 | // provide integrity guarantees on the to-be-signed intermediate values. |
6895 | // |
6896 | // LOADauthptrstatic is undesirable because it requires a large section filled |
6897 | // with often similarly-signed pointers, making it a good harvesting target. |
6898 | // Thus, it's only used for ptrauth references to extern_weak to avoid null |
6899 | // checks. |
6900 | |
6901 | bool AArch64InstructionSelector::selectPtrAuthGlobalValue( |
6902 | MachineInstr &I, MachineRegisterInfo &MRI) const { |
6903 | Register DefReg = I.getOperand(i: 0).getReg(); |
6904 | Register Addr = I.getOperand(i: 1).getReg(); |
6905 | uint64_t Key = I.getOperand(i: 2).getImm(); |
6906 | Register AddrDisc = I.getOperand(i: 3).getReg(); |
6907 | uint64_t Disc = I.getOperand(i: 4).getImm(); |
6908 | int64_t Offset = 0; |
6909 | |
6910 | if (Key > AArch64PACKey::LAST) |
6911 | report_fatal_error(reason: "key in ptrauth global out of range [0, " + |
6912 | Twine((int)AArch64PACKey::LAST) + "]" ); |
6913 | |
6914 | // Blend only works if the integer discriminator is 16-bit wide. |
6915 | if (!isUInt<16>(x: Disc)) |
6916 | report_fatal_error( |
6917 | reason: "constant discriminator in ptrauth global out of range [0, 0xffff]" ); |
6918 | |
6919 | // Choosing between 3 lowering alternatives is target-specific. |
6920 | if (!STI.isTargetELF() && !STI.isTargetMachO()) |
6921 | report_fatal_error(reason: "ptrauth global lowering only supported on MachO/ELF" ); |
6922 | |
6923 | if (!MRI.hasOneDef(RegNo: Addr)) |
6924 | return false; |
6925 | |
6926 | // First match any offset we take from the real global. |
6927 | const MachineInstr *DefMI = &*MRI.def_instr_begin(RegNo: Addr); |
6928 | if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) { |
6929 | Register OffsetReg = DefMI->getOperand(i: 2).getReg(); |
6930 | if (!MRI.hasOneDef(RegNo: OffsetReg)) |
6931 | return false; |
6932 | const MachineInstr &OffsetMI = *MRI.def_instr_begin(RegNo: OffsetReg); |
6933 | if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT) |
6934 | return false; |
6935 | |
6936 | Addr = DefMI->getOperand(i: 1).getReg(); |
6937 | if (!MRI.hasOneDef(RegNo: Addr)) |
6938 | return false; |
6939 | |
6940 | DefMI = &*MRI.def_instr_begin(RegNo: Addr); |
6941 | Offset = OffsetMI.getOperand(i: 1).getCImm()->getSExtValue(); |
6942 | } |
6943 | |
6944 | // We should be left with a genuine unauthenticated GlobalValue. |
6945 | const GlobalValue *GV; |
6946 | if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { |
6947 | GV = DefMI->getOperand(i: 1).getGlobal(); |
6948 | Offset += DefMI->getOperand(i: 1).getOffset(); |
6949 | } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) { |
6950 | GV = DefMI->getOperand(i: 2).getGlobal(); |
6951 | Offset += DefMI->getOperand(i: 2).getOffset(); |
6952 | } else { |
6953 | return false; |
6954 | } |
6955 | |
6956 | MachineIRBuilder MIB(I); |
6957 | |
6958 | // Classify the reference to determine whether it needs a GOT load. |
6959 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); |
6960 | const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0); |
6961 | assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) && |
6962 | "unsupported non-GOT op flags on ptrauth global reference" ); |
6963 | assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) && |
6964 | "unsupported non-GOT reference to weak ptrauth global" ); |
6965 | |
6966 | std::optional<APInt> AddrDiscVal = getIConstantVRegVal(VReg: AddrDisc, MRI); |
6967 | bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0; |
6968 | |
6969 | // Non-extern_weak: |
6970 | // - No GOT load needed -> MOVaddrPAC |
6971 | // - GOT load for non-extern_weak -> LOADgotPAC |
6972 | // Note that we disallow extern_weak refs to avoid null checks later. |
6973 | if (!GV->hasExternalWeakLinkage()) { |
6974 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {}); |
6975 | MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {}); |
6976 | MIB.buildInstr(Opcode: NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC) |
6977 | .addGlobalAddress(GV, Offset) |
6978 | .addImm(Val: Key) |
6979 | .addReg(RegNo: HasAddrDisc ? AddrDisc : AArch64::XZR) |
6980 | .addImm(Val: Disc) |
6981 | .constrainAllUses(TII, TRI, RBI); |
6982 | MIB.buildCopy(Res: DefReg, Op: Register(AArch64::X16)); |
6983 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
6984 | I.eraseFromParent(); |
6985 | return true; |
6986 | } |
6987 | |
6988 | // extern_weak -> LOADauthptrstatic |
6989 | |
6990 | // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the |
6991 | // offset alone as a pointer if the symbol wasn't available, which would |
6992 | // probably break null checks in users. Ptrauth complicates things further: |
6993 | // error out. |
6994 | if (Offset != 0) |
6995 | report_fatal_error( |
6996 | reason: "unsupported non-zero offset in weak ptrauth global reference" ); |
6997 | |
6998 | if (HasAddrDisc) |
6999 | report_fatal_error(reason: "unsupported weak addr-div ptrauth global" ); |
7000 | |
7001 | MIB.buildInstr(Opc: AArch64::LOADauthptrstatic, DstOps: {DefReg}, SrcOps: {}) |
7002 | .addGlobalAddress(GV, Offset) |
7003 | .addImm(Val: Key) |
7004 | .addImm(Val: Disc); |
7005 | RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI); |
7006 | |
7007 | I.eraseFromParent(); |
7008 | return true; |
7009 | } |
7010 | |
7011 | void AArch64InstructionSelector::SelectTable(MachineInstr &I, |
7012 | MachineRegisterInfo &MRI, |
7013 | unsigned NumVec, unsigned Opc1, |
7014 | unsigned Opc2, bool isExt) { |
7015 | Register DstReg = I.getOperand(i: 0).getReg(); |
7016 | unsigned Opc = MRI.getType(Reg: DstReg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8) ? Opc1 : Opc2; |
7017 | |
7018 | // Create the REG_SEQUENCE |
7019 | SmallVector<Register, 4> Regs; |
7020 | for (unsigned i = 0; i < NumVec; i++) |
7021 | Regs.push_back(Elt: I.getOperand(i: i + 2 + isExt).getReg()); |
7022 | Register RegSeq = createQTuple(Regs, MIB); |
7023 | |
7024 | Register IdxReg = I.getOperand(i: 2 + NumVec + isExt).getReg(); |
7025 | MachineInstrBuilder Instr; |
7026 | if (isExt) { |
7027 | Register Reg = I.getOperand(i: 2).getReg(); |
7028 | Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Reg, RegSeq, IdxReg}); |
7029 | } else |
7030 | Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {RegSeq, IdxReg}); |
7031 | constrainSelectedInstRegOperands(I&: *Instr, TII, TRI, RBI); |
7032 | I.eraseFromParent(); |
7033 | } |
7034 | |
7035 | InstructionSelector::ComplexRendererFns |
7036 | AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { |
7037 | auto MaybeImmed = getImmedFromMO(Root); |
7038 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
7039 | return std::nullopt; |
7040 | uint64_t Enc = (32 - *MaybeImmed) & 0x1f; |
7041 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
7042 | } |
7043 | |
7044 | InstructionSelector::ComplexRendererFns |
7045 | AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { |
7046 | auto MaybeImmed = getImmedFromMO(Root); |
7047 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) |
7048 | return std::nullopt; |
7049 | uint64_t Enc = 31 - *MaybeImmed; |
7050 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
7051 | } |
7052 | |
7053 | InstructionSelector::ComplexRendererFns |
7054 | AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { |
7055 | auto MaybeImmed = getImmedFromMO(Root); |
7056 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
7057 | return std::nullopt; |
7058 | uint64_t Enc = (64 - *MaybeImmed) & 0x3f; |
7059 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
7060 | } |
7061 | |
7062 | InstructionSelector::ComplexRendererFns |
7063 | AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { |
7064 | auto MaybeImmed = getImmedFromMO(Root); |
7065 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) |
7066 | return std::nullopt; |
7067 | uint64_t Enc = 63 - *MaybeImmed; |
7068 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}}; |
7069 | } |
7070 | |
7071 | /// Helper to select an immediate value that can be represented as a 12-bit |
7072 | /// value shifted left by either 0 or 12. If it is possible to do so, return |
7073 | /// the immediate and shift value. If not, return std::nullopt. |
7074 | /// |
7075 | /// Used by selectArithImmed and selectNegArithImmed. |
7076 | InstructionSelector::ComplexRendererFns |
7077 | AArch64InstructionSelector::select12BitValueWithLeftShift( |
7078 | uint64_t Immed) const { |
7079 | unsigned ShiftAmt; |
7080 | if (Immed >> 12 == 0) { |
7081 | ShiftAmt = 0; |
7082 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { |
7083 | ShiftAmt = 12; |
7084 | Immed = Immed >> 12; |
7085 | } else |
7086 | return std::nullopt; |
7087 | |
7088 | unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt); |
7089 | return {{ |
7090 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); }, |
7091 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); }, |
7092 | }}; |
7093 | } |
7094 | |
7095 | /// SelectArithImmed - Select an immediate value that can be represented as |
7096 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with |
7097 | /// Val set to the 12-bit value and Shift set to the shifter operand. |
7098 | InstructionSelector::ComplexRendererFns |
7099 | AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { |
7100 | // This function is called from the addsub_shifted_imm ComplexPattern, |
7101 | // which lists [imm] as the list of opcode it's interested in, however |
7102 | // we still need to check whether the operand is actually an immediate |
7103 | // here because the ComplexPattern opcode list is only used in |
7104 | // root-level opcode matching. |
7105 | auto MaybeImmed = getImmedFromMO(Root); |
7106 | if (MaybeImmed == std::nullopt) |
7107 | return std::nullopt; |
7108 | return select12BitValueWithLeftShift(Immed: *MaybeImmed); |
7109 | } |
7110 | |
7111 | /// SelectNegArithImmed - As above, but negates the value before trying to |
7112 | /// select it. |
7113 | InstructionSelector::ComplexRendererFns |
7114 | AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { |
7115 | // We need a register here, because we need to know if we have a 64 or 32 |
7116 | // bit immediate. |
7117 | if (!Root.isReg()) |
7118 | return std::nullopt; |
7119 | auto MaybeImmed = getImmedFromMO(Root); |
7120 | if (MaybeImmed == std::nullopt) |
7121 | return std::nullopt; |
7122 | uint64_t Immed = *MaybeImmed; |
7123 | |
7124 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" |
7125 | // have the opposite effect on the C flag, so this pattern mustn't match under |
7126 | // those circumstances. |
7127 | if (Immed == 0) |
7128 | return std::nullopt; |
7129 | |
7130 | // Check if we're dealing with a 32-bit type on the root or a 64-bit type on |
7131 | // the root. |
7132 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7133 | if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == 32) |
7134 | Immed = ~((uint32_t)Immed) + 1; |
7135 | else |
7136 | Immed = ~Immed + 1ULL; |
7137 | |
7138 | if (Immed & 0xFFFFFFFFFF000000ULL) |
7139 | return std::nullopt; |
7140 | |
7141 | Immed &= 0xFFFFFFULL; |
7142 | return select12BitValueWithLeftShift(Immed); |
7143 | } |
7144 | |
7145 | /// Checks if we are sure that folding MI into load/store addressing mode is |
7146 | /// beneficial or not. |
7147 | /// |
7148 | /// Returns: |
7149 | /// - true if folding MI would be beneficial. |
7150 | /// - false if folding MI would be bad. |
7151 | /// - std::nullopt if it is not sure whether folding MI is beneficial. |
7152 | /// |
7153 | /// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example: |
7154 | /// |
7155 | /// %13:gpr(s64) = G_CONSTANT i64 1 |
7156 | /// %8:gpr(s64) = G_SHL %6, %13(s64) |
7157 | /// %9:gpr(p0) = G_PTR_ADD %0, %8(s64) |
7158 | /// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16)) |
7159 | std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode( |
7160 | MachineInstr &MI, const MachineRegisterInfo &MRI) const { |
7161 | if (MI.getOpcode() == AArch64::G_SHL) { |
7162 | // Address operands with shifts are free, except for running on subtargets |
7163 | // with AddrLSLSlow14. |
7164 | if (const auto ValAndVeg = getIConstantVRegValWithLookThrough( |
7165 | VReg: MI.getOperand(i: 2).getReg(), MRI)) { |
7166 | const APInt ShiftVal = ValAndVeg->Value; |
7167 | |
7168 | // Don't fold if we know this will be slow. |
7169 | return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4)); |
7170 | } |
7171 | } |
7172 | return std::nullopt; |
7173 | } |
7174 | |
7175 | /// Return true if it is worth folding MI into an extended register. That is, |
7176 | /// if it's safe to pull it into the addressing mode of a load or store as a |
7177 | /// shift. |
7178 | /// \p IsAddrOperand whether the def of MI is used as an address operand |
7179 | /// (e.g. feeding into an LDR/STR). |
7180 | bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( |
7181 | MachineInstr &MI, const MachineRegisterInfo &MRI, |
7182 | bool IsAddrOperand) const { |
7183 | |
7184 | // Always fold if there is one use, or if we're optimizing for size. |
7185 | Register DefReg = MI.getOperand(i: 0).getReg(); |
7186 | if (MRI.hasOneNonDBGUse(RegNo: DefReg) || |
7187 | MI.getParent()->getParent()->getFunction().hasOptSize()) |
7188 | return true; |
7189 | |
7190 | if (IsAddrOperand) { |
7191 | // If we are already sure that folding MI is good or bad, return the result. |
7192 | if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI)) |
7193 | return *Worth; |
7194 | |
7195 | // Fold G_PTR_ADD if its offset operand can be folded |
7196 | if (MI.getOpcode() == AArch64::G_PTR_ADD) { |
7197 | MachineInstr *OffsetInst = |
7198 | getDefIgnoringCopies(Reg: MI.getOperand(i: 2).getReg(), MRI); |
7199 | |
7200 | // Note, we already know G_PTR_ADD is used by at least two instructions. |
7201 | // If we are also sure about whether folding is beneficial or not, |
7202 | // return the result. |
7203 | if (const auto Worth = isWorthFoldingIntoAddrMode(MI&: *OffsetInst, MRI)) |
7204 | return *Worth; |
7205 | } |
7206 | } |
7207 | |
7208 | // FIXME: Consider checking HasALULSLFast as appropriate. |
7209 | |
7210 | // We have a fastpath, so folding a shift in and potentially computing it |
7211 | // many times may be beneficial. Check if this is only used in memory ops. |
7212 | // If it is, then we should fold. |
7213 | return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg), |
7214 | P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); |
7215 | } |
7216 | |
7217 | static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { |
7218 | switch (Type) { |
7219 | case AArch64_AM::SXTB: |
7220 | case AArch64_AM::SXTH: |
7221 | case AArch64_AM::SXTW: |
7222 | return true; |
7223 | default: |
7224 | return false; |
7225 | } |
7226 | } |
7227 | |
7228 | InstructionSelector::ComplexRendererFns |
7229 | AArch64InstructionSelector::selectExtendedSHL( |
7230 | MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, |
7231 | unsigned SizeInBytes, bool WantsExt) const { |
7232 | assert(Base.isReg() && "Expected base to be a register operand" ); |
7233 | assert(Offset.isReg() && "Expected offset to be a register operand" ); |
7234 | |
7235 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7236 | MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg()); |
7237 | |
7238 | unsigned OffsetOpc = OffsetInst->getOpcode(); |
7239 | bool LookedThroughZExt = false; |
7240 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { |
7241 | // Try to look through a ZEXT. |
7242 | if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) |
7243 | return std::nullopt; |
7244 | |
7245 | OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: 1).getReg()); |
7246 | OffsetOpc = OffsetInst->getOpcode(); |
7247 | LookedThroughZExt = true; |
7248 | |
7249 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) |
7250 | return std::nullopt; |
7251 | } |
7252 | // Make sure that the memory op is a valid size. |
7253 | int64_t LegalShiftVal = Log2_32(Value: SizeInBytes); |
7254 | if (LegalShiftVal == 0) |
7255 | return std::nullopt; |
7256 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI, IsAddrOperand: true)) |
7257 | return std::nullopt; |
7258 | |
7259 | // Now, try to find the specific G_CONSTANT. Start by assuming that the |
7260 | // register we will offset is the LHS, and the register containing the |
7261 | // constant is the RHS. |
7262 | Register OffsetReg = OffsetInst->getOperand(i: 1).getReg(); |
7263 | Register ConstantReg = OffsetInst->getOperand(i: 2).getReg(); |
7264 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
7265 | if (!ValAndVReg) { |
7266 | // We didn't get a constant on the RHS. If the opcode is a shift, then |
7267 | // we're done. |
7268 | if (OffsetOpc == TargetOpcode::G_SHL) |
7269 | return std::nullopt; |
7270 | |
7271 | // If we have a G_MUL, we can use either register. Try looking at the RHS. |
7272 | std::swap(a&: OffsetReg, b&: ConstantReg); |
7273 | ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI); |
7274 | if (!ValAndVReg) |
7275 | return std::nullopt; |
7276 | } |
7277 | |
7278 | // The value must fit into 3 bits, and must be positive. Make sure that is |
7279 | // true. |
7280 | int64_t ImmVal = ValAndVReg->Value.getSExtValue(); |
7281 | |
7282 | // Since we're going to pull this into a shift, the constant value must be |
7283 | // a power of 2. If we got a multiply, then we need to check this. |
7284 | if (OffsetOpc == TargetOpcode::G_MUL) { |
7285 | if (!llvm::has_single_bit<uint32_t>(Value: ImmVal)) |
7286 | return std::nullopt; |
7287 | |
7288 | // Got a power of 2. So, the amount we'll shift is the log base-2 of that. |
7289 | ImmVal = Log2_32(Value: ImmVal); |
7290 | } |
7291 | |
7292 | if ((ImmVal & 0x7) != ImmVal) |
7293 | return std::nullopt; |
7294 | |
7295 | // We are only allowed to shift by LegalShiftVal. This shift value is built |
7296 | // into the instruction, so we can't just use whatever we want. |
7297 | if (ImmVal != LegalShiftVal) |
7298 | return std::nullopt; |
7299 | |
7300 | unsigned SignExtend = 0; |
7301 | if (WantsExt) { |
7302 | // Check if the offset is defined by an extend, unless we looked through a |
7303 | // G_ZEXT earlier. |
7304 | if (!LookedThroughZExt) { |
7305 | MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI); |
7306 | auto Ext = getExtendTypeForInst(MI&: *ExtInst, MRI, IsLoadStore: true); |
7307 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7308 | return std::nullopt; |
7309 | |
7310 | SignExtend = isSignExtendShiftType(Type: Ext) ? 1 : 0; |
7311 | // We only support SXTW for signed extension here. |
7312 | if (SignExtend && Ext != AArch64_AM::SXTW) |
7313 | return std::nullopt; |
7314 | OffsetReg = ExtInst->getOperand(i: 1).getReg(); |
7315 | } |
7316 | |
7317 | // Need a 32-bit wide register here. |
7318 | MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg())); |
7319 | OffsetReg = moveScalarRegClass(Reg: OffsetReg, RC: AArch64::GPR32RegClass, MIB); |
7320 | } |
7321 | |
7322 | // We can use the LHS of the GEP as the base, and the LHS of the shift as an |
7323 | // offset. Signify that we are shifting by setting the shift flag to 1. |
7324 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); }, |
7325 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); }, |
7326 | [=](MachineInstrBuilder &MIB) { |
7327 | // Need to add both immediates here to make sure that they are both |
7328 | // added to the instruction. |
7329 | MIB.addImm(Val: SignExtend); |
7330 | MIB.addImm(Val: 1); |
7331 | }}}; |
7332 | } |
7333 | |
7334 | /// This is used for computing addresses like this: |
7335 | /// |
7336 | /// ldr x1, [x2, x3, lsl #3] |
7337 | /// |
7338 | /// Where x2 is the base register, and x3 is an offset register. The shift-left |
7339 | /// is a constant value specific to this load instruction. That is, we'll never |
7340 | /// see anything other than a 3 here (which corresponds to the size of the |
7341 | /// element being loaded.) |
7342 | InstructionSelector::ComplexRendererFns |
7343 | AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( |
7344 | MachineOperand &Root, unsigned SizeInBytes) const { |
7345 | if (!Root.isReg()) |
7346 | return std::nullopt; |
7347 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7348 | |
7349 | // We want to find something like this: |
7350 | // |
7351 | // val = G_CONSTANT LegalShiftVal |
7352 | // shift = G_SHL off_reg val |
7353 | // ptr = G_PTR_ADD base_reg shift |
7354 | // x = G_LOAD ptr |
7355 | // |
7356 | // And fold it into this addressing mode: |
7357 | // |
7358 | // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] |
7359 | |
7360 | // Check if we can find the G_PTR_ADD. |
7361 | MachineInstr *PtrAdd = |
7362 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7363 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI, IsAddrOperand: true)) |
7364 | return std::nullopt; |
7365 | |
7366 | // Now, try to match an opcode which will match our specific offset. |
7367 | // We want a G_SHL or a G_MUL. |
7368 | MachineInstr *OffsetInst = |
7369 | getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7370 | return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: 1), |
7371 | Offset&: OffsetInst->getOperand(i: 0), SizeInBytes, |
7372 | /*WantsExt=*/false); |
7373 | } |
7374 | |
7375 | /// This is used for computing addresses like this: |
7376 | /// |
7377 | /// ldr x1, [x2, x3] |
7378 | /// |
7379 | /// Where x2 is the base register, and x3 is an offset register. |
7380 | /// |
7381 | /// When possible (or profitable) to fold a G_PTR_ADD into the address |
7382 | /// calculation, this will do so. Otherwise, it will return std::nullopt. |
7383 | InstructionSelector::ComplexRendererFns |
7384 | AArch64InstructionSelector::selectAddrModeRegisterOffset( |
7385 | MachineOperand &Root) const { |
7386 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7387 | |
7388 | // We need a GEP. |
7389 | MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg()); |
7390 | if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) |
7391 | return std::nullopt; |
7392 | |
7393 | // If this is used more than once, let's not bother folding. |
7394 | // TODO: Check if they are memory ops. If they are, then we can still fold |
7395 | // without having to recompute anything. |
7396 | if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: 0).getReg())) |
7397 | return std::nullopt; |
7398 | |
7399 | // Base is the GEP's LHS, offset is its RHS. |
7400 | return {{[=](MachineInstrBuilder &MIB) { |
7401 | MIB.addUse(RegNo: Gep->getOperand(i: 1).getReg()); |
7402 | }, |
7403 | [=](MachineInstrBuilder &MIB) { |
7404 | MIB.addUse(RegNo: Gep->getOperand(i: 2).getReg()); |
7405 | }, |
7406 | [=](MachineInstrBuilder &MIB) { |
7407 | // Need to add both immediates here to make sure that they are both |
7408 | // added to the instruction. |
7409 | MIB.addImm(Val: 0); |
7410 | MIB.addImm(Val: 0); |
7411 | }}}; |
7412 | } |
7413 | |
7414 | /// This is intended to be equivalent to selectAddrModeXRO in |
7415 | /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. |
7416 | InstructionSelector::ComplexRendererFns |
7417 | AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, |
7418 | unsigned SizeInBytes) const { |
7419 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7420 | if (!Root.isReg()) |
7421 | return std::nullopt; |
7422 | MachineInstr *PtrAdd = |
7423 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7424 | if (!PtrAdd) |
7425 | return std::nullopt; |
7426 | |
7427 | // Check for an immediates which cannot be encoded in the [base + imm] |
7428 | // addressing mode, and can't be encoded in an add/sub. If this happens, we'll |
7429 | // end up with code like: |
7430 | // |
7431 | // mov x0, wide |
7432 | // add x1 base, x0 |
7433 | // ldr x2, [x1, x0] |
7434 | // |
7435 | // In this situation, we can use the [base, xreg] addressing mode to save an |
7436 | // add/sub: |
7437 | // |
7438 | // mov x0, wide |
7439 | // ldr x2, [base, x0] |
7440 | auto ValAndVReg = |
7441 | getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: 2).getReg(), MRI); |
7442 | if (ValAndVReg) { |
7443 | unsigned Scale = Log2_32(Value: SizeInBytes); |
7444 | int64_t ImmOff = ValAndVReg->Value.getSExtValue(); |
7445 | |
7446 | // Skip immediates that can be selected in the load/store addressing |
7447 | // mode. |
7448 | if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && |
7449 | ImmOff < (0x1000 << Scale)) |
7450 | return std::nullopt; |
7451 | |
7452 | // Helper lambda to decide whether or not it is preferable to emit an add. |
7453 | auto isPreferredADD = [](int64_t ImmOff) { |
7454 | // Constants in [0x0, 0xfff] can be encoded in an add. |
7455 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) |
7456 | return true; |
7457 | |
7458 | // Can it be encoded in an add lsl #12? |
7459 | if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) |
7460 | return false; |
7461 | |
7462 | // It can be encoded in an add lsl #12, but we may not want to. If it is |
7463 | // possible to select this as a single movz, then prefer that. A single |
7464 | // movz is faster than an add with a shift. |
7465 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && |
7466 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; |
7467 | }; |
7468 | |
7469 | // If the immediate can be encoded in a single add/sub, then bail out. |
7470 | if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) |
7471 | return std::nullopt; |
7472 | } |
7473 | |
7474 | // Try to fold shifts into the addressing mode. |
7475 | auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); |
7476 | if (AddrModeFns) |
7477 | return AddrModeFns; |
7478 | |
7479 | // If that doesn't work, see if it's possible to fold in registers from |
7480 | // a GEP. |
7481 | return selectAddrModeRegisterOffset(Root); |
7482 | } |
7483 | |
7484 | /// This is used for computing addresses like this: |
7485 | /// |
7486 | /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] |
7487 | /// |
7488 | /// Where we have a 64-bit base register, a 32-bit offset register, and an |
7489 | /// extend (which may or may not be signed). |
7490 | InstructionSelector::ComplexRendererFns |
7491 | AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, |
7492 | unsigned SizeInBytes) const { |
7493 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); |
7494 | |
7495 | MachineInstr *PtrAdd = |
7496 | getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI); |
7497 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI, IsAddrOperand: true)) |
7498 | return std::nullopt; |
7499 | |
7500 | MachineOperand &LHS = PtrAdd->getOperand(i: 1); |
7501 | MachineOperand &RHS = PtrAdd->getOperand(i: 2); |
7502 | MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI); |
7503 | |
7504 | // The first case is the same as selectAddrModeXRO, except we need an extend. |
7505 | // In this case, we try to find a shift and extend, and fold them into the |
7506 | // addressing mode. |
7507 | // |
7508 | // E.g. |
7509 | // |
7510 | // off_reg = G_Z/S/ANYEXT ext_reg |
7511 | // val = G_CONSTANT LegalShiftVal |
7512 | // shift = G_SHL off_reg val |
7513 | // ptr = G_PTR_ADD base_reg shift |
7514 | // x = G_LOAD ptr |
7515 | // |
7516 | // In this case we can get a load like this: |
7517 | // |
7518 | // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] |
7519 | auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: 0), |
7520 | SizeInBytes, /*WantsExt=*/true); |
7521 | if (ExtendedShl) |
7522 | return ExtendedShl; |
7523 | |
7524 | // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. |
7525 | // |
7526 | // e.g. |
7527 | // ldr something, [base_reg, ext_reg, sxtw] |
7528 | if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI, IsAddrOperand: true)) |
7529 | return std::nullopt; |
7530 | |
7531 | // Check if this is an extend. We'll get an extend type if it is. |
7532 | AArch64_AM::ShiftExtendType Ext = |
7533 | getExtendTypeForInst(MI&: *OffsetInst, MRI, /*IsLoadStore=*/true); |
7534 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7535 | return std::nullopt; |
7536 | |
7537 | // Need a 32-bit wide register. |
7538 | MachineIRBuilder MIB(*PtrAdd); |
7539 | Register ExtReg = moveScalarRegClass(Reg: OffsetInst->getOperand(i: 1).getReg(), |
7540 | RC: AArch64::GPR32RegClass, MIB); |
7541 | unsigned SignExtend = Ext == AArch64_AM::SXTW; |
7542 | |
7543 | // Base is LHS, offset is ExtReg. |
7544 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); }, |
7545 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7546 | [=](MachineInstrBuilder &MIB) { |
7547 | MIB.addImm(Val: SignExtend); |
7548 | MIB.addImm(Val: 0); |
7549 | }}}; |
7550 | } |
7551 | |
7552 | /// Select a "register plus unscaled signed 9-bit immediate" address. This |
7553 | /// should only match when there is an offset that is not valid for a scaled |
7554 | /// immediate addressing mode. The "Size" argument is the size in bytes of the |
7555 | /// memory reference, which is needed here to know what is valid for a scaled |
7556 | /// immediate. |
7557 | InstructionSelector::ComplexRendererFns |
7558 | AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, |
7559 | unsigned Size) const { |
7560 | MachineRegisterInfo &MRI = |
7561 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7562 | |
7563 | if (!Root.isReg()) |
7564 | return std::nullopt; |
7565 | |
7566 | if (!isBaseWithConstantOffset(Root, MRI)) |
7567 | return std::nullopt; |
7568 | |
7569 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7570 | |
7571 | MachineOperand &OffImm = RootDef->getOperand(i: 2); |
7572 | if (!OffImm.isReg()) |
7573 | return std::nullopt; |
7574 | MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg()); |
7575 | if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) |
7576 | return std::nullopt; |
7577 | int64_t RHSC; |
7578 | MachineOperand &RHSOp1 = RHS->getOperand(i: 1); |
7579 | if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) |
7580 | return std::nullopt; |
7581 | RHSC = RHSOp1.getCImm()->getSExtValue(); |
7582 | |
7583 | if (RHSC >= -256 && RHSC < 256) { |
7584 | MachineOperand &Base = RootDef->getOperand(i: 1); |
7585 | return {{ |
7586 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); }, |
7587 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); }, |
7588 | }}; |
7589 | } |
7590 | return std::nullopt; |
7591 | } |
7592 | |
7593 | InstructionSelector::ComplexRendererFns |
7594 | AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, |
7595 | unsigned Size, |
7596 | MachineRegisterInfo &MRI) const { |
7597 | if (RootDef.getOpcode() != AArch64::G_ADD_LOW) |
7598 | return std::nullopt; |
7599 | MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: 1).getReg()); |
7600 | if (Adrp.getOpcode() != AArch64::ADRP) |
7601 | return std::nullopt; |
7602 | |
7603 | // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. |
7604 | auto Offset = Adrp.getOperand(i: 1).getOffset(); |
7605 | if (Offset % Size != 0) |
7606 | return std::nullopt; |
7607 | |
7608 | auto GV = Adrp.getOperand(i: 1).getGlobal(); |
7609 | if (GV->isThreadLocal()) |
7610 | return std::nullopt; |
7611 | |
7612 | auto &MF = *RootDef.getParent()->getParent(); |
7613 | if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size) |
7614 | return std::nullopt; |
7615 | |
7616 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget()); |
7617 | MachineIRBuilder MIRBuilder(RootDef); |
7618 | Register AdrpReg = Adrp.getOperand(i: 0).getReg(); |
7619 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); }, |
7620 | [=](MachineInstrBuilder &MIB) { |
7621 | MIB.addGlobalAddress(GV, Offset, |
7622 | TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | |
7623 | AArch64II::MO_NC); |
7624 | }}}; |
7625 | } |
7626 | |
7627 | /// Select a "register plus scaled unsigned 12-bit immediate" address. The |
7628 | /// "Size" argument is the size in bytes of the memory reference, which |
7629 | /// determines the scale. |
7630 | InstructionSelector::ComplexRendererFns |
7631 | AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, |
7632 | unsigned Size) const { |
7633 | MachineFunction &MF = *Root.getParent()->getParent()->getParent(); |
7634 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
7635 | |
7636 | if (!Root.isReg()) |
7637 | return std::nullopt; |
7638 | |
7639 | MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg()); |
7640 | if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { |
7641 | return {{ |
7642 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: 1)); }, |
7643 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7644 | }}; |
7645 | } |
7646 | |
7647 | CodeModel::Model CM = MF.getTarget().getCodeModel(); |
7648 | // Check if we can fold in the ADD of small code model ADRP + ADD address. |
7649 | // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold |
7650 | // globals into the offset. |
7651 | MachineInstr *RootParent = Root.getParent(); |
7652 | if (CM == CodeModel::Small && |
7653 | !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH && |
7654 | STI.isTargetDarwin())) { |
7655 | auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI); |
7656 | if (OpFns) |
7657 | return OpFns; |
7658 | } |
7659 | |
7660 | if (isBaseWithConstantOffset(Root, MRI)) { |
7661 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7662 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7663 | MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg()); |
7664 | MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg()); |
7665 | |
7666 | int64_t RHSC = (int64_t)RHSDef->getOperand(i: 1).getCImm()->getZExtValue(); |
7667 | unsigned Scale = Log2_32(Value: Size); |
7668 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { |
7669 | if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) |
7670 | return {{ |
7671 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: 1)); }, |
7672 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7673 | }}; |
7674 | |
7675 | return {{ |
7676 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); }, |
7677 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); }, |
7678 | }}; |
7679 | } |
7680 | } |
7681 | |
7682 | // Before falling back to our general case, check if the unscaled |
7683 | // instructions can handle this. If so, that's preferable. |
7684 | if (selectAddrModeUnscaled(Root, Size)) |
7685 | return std::nullopt; |
7686 | |
7687 | return {{ |
7688 | [=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); }, |
7689 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); }, |
7690 | }}; |
7691 | } |
7692 | |
7693 | /// Given a shift instruction, return the correct shift type for that |
7694 | /// instruction. |
7695 | static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { |
7696 | switch (MI.getOpcode()) { |
7697 | default: |
7698 | return AArch64_AM::InvalidShiftExtend; |
7699 | case TargetOpcode::G_SHL: |
7700 | return AArch64_AM::LSL; |
7701 | case TargetOpcode::G_LSHR: |
7702 | return AArch64_AM::LSR; |
7703 | case TargetOpcode::G_ASHR: |
7704 | return AArch64_AM::ASR; |
7705 | case TargetOpcode::G_ROTR: |
7706 | return AArch64_AM::ROR; |
7707 | } |
7708 | } |
7709 | |
7710 | /// Select a "shifted register" operand. If the value is not shifted, set the |
7711 | /// shift operand to a default value of "lsl 0". |
7712 | InstructionSelector::ComplexRendererFns |
7713 | AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, |
7714 | bool AllowROR) const { |
7715 | if (!Root.isReg()) |
7716 | return std::nullopt; |
7717 | MachineRegisterInfo &MRI = |
7718 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7719 | |
7720 | // Check if the operand is defined by an instruction which corresponds to |
7721 | // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. |
7722 | MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg()); |
7723 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst); |
7724 | if (ShType == AArch64_AM::InvalidShiftExtend) |
7725 | return std::nullopt; |
7726 | if (ShType == AArch64_AM::ROR && !AllowROR) |
7727 | return std::nullopt; |
7728 | if (!isWorthFoldingIntoExtendedReg(MI&: *ShiftInst, MRI, IsAddrOperand: false)) |
7729 | return std::nullopt; |
7730 | |
7731 | // Need an immediate on the RHS. |
7732 | MachineOperand &ShiftRHS = ShiftInst->getOperand(i: 2); |
7733 | auto Immed = getImmedFromMO(Root: ShiftRHS); |
7734 | if (!Immed) |
7735 | return std::nullopt; |
7736 | |
7737 | // We have something that we can fold. Fold in the shift's LHS and RHS into |
7738 | // the instruction. |
7739 | MachineOperand &ShiftLHS = ShiftInst->getOperand(i: 1); |
7740 | Register ShiftReg = ShiftLHS.getReg(); |
7741 | |
7742 | unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits(); |
7743 | unsigned Val = *Immed & (NumBits - 1); |
7744 | unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val); |
7745 | |
7746 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); }, |
7747 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}}; |
7748 | } |
7749 | |
7750 | AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( |
7751 | MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { |
7752 | unsigned Opc = MI.getOpcode(); |
7753 | |
7754 | // Handle explicit extend instructions first. |
7755 | if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { |
7756 | unsigned Size; |
7757 | if (Opc == TargetOpcode::G_SEXT) |
7758 | Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7759 | else |
7760 | Size = MI.getOperand(i: 2).getImm(); |
7761 | assert(Size != 64 && "Extend from 64 bits?" ); |
7762 | switch (Size) { |
7763 | case 8: |
7764 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; |
7765 | case 16: |
7766 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; |
7767 | case 32: |
7768 | return AArch64_AM::SXTW; |
7769 | default: |
7770 | return AArch64_AM::InvalidShiftExtend; |
7771 | } |
7772 | } |
7773 | |
7774 | if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { |
7775 | unsigned Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
7776 | assert(Size != 64 && "Extend from 64 bits?" ); |
7777 | switch (Size) { |
7778 | case 8: |
7779 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; |
7780 | case 16: |
7781 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; |
7782 | case 32: |
7783 | return AArch64_AM::UXTW; |
7784 | default: |
7785 | return AArch64_AM::InvalidShiftExtend; |
7786 | } |
7787 | } |
7788 | |
7789 | // Don't have an explicit extend. Try to handle a G_AND with a constant mask |
7790 | // on the RHS. |
7791 | if (Opc != TargetOpcode::G_AND) |
7792 | return AArch64_AM::InvalidShiftExtend; |
7793 | |
7794 | std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: 2)); |
7795 | if (!MaybeAndMask) |
7796 | return AArch64_AM::InvalidShiftExtend; |
7797 | uint64_t AndMask = *MaybeAndMask; |
7798 | switch (AndMask) { |
7799 | default: |
7800 | return AArch64_AM::InvalidShiftExtend; |
7801 | case 0xFF: |
7802 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; |
7803 | case 0xFFFF: |
7804 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; |
7805 | case 0xFFFFFFFF: |
7806 | return AArch64_AM::UXTW; |
7807 | } |
7808 | } |
7809 | |
7810 | Register AArch64InstructionSelector::moveScalarRegClass( |
7811 | Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { |
7812 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
7813 | auto Ty = MRI.getType(Reg); |
7814 | assert(!Ty.isVector() && "Expected scalars only!" ); |
7815 | if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) |
7816 | return Reg; |
7817 | |
7818 | // Create a copy and immediately select it. |
7819 | // FIXME: We should have an emitCopy function? |
7820 | auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg}); |
7821 | selectCopy(I&: *Copy, TII, MRI, TRI, RBI); |
7822 | return Copy.getReg(Idx: 0); |
7823 | } |
7824 | |
7825 | /// Select an "extended register" operand. This operand folds in an extend |
7826 | /// followed by an optional left shift. |
7827 | InstructionSelector::ComplexRendererFns |
7828 | AArch64InstructionSelector::selectArithExtendedRegister( |
7829 | MachineOperand &Root) const { |
7830 | if (!Root.isReg()) |
7831 | return std::nullopt; |
7832 | MachineRegisterInfo &MRI = |
7833 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7834 | |
7835 | uint64_t ShiftVal = 0; |
7836 | Register ExtReg; |
7837 | AArch64_AM::ShiftExtendType Ext; |
7838 | MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI); |
7839 | if (!RootDef) |
7840 | return std::nullopt; |
7841 | |
7842 | if (!isWorthFoldingIntoExtendedReg(MI&: *RootDef, MRI, IsAddrOperand: false)) |
7843 | return std::nullopt; |
7844 | |
7845 | // Check if we can fold a shift and an extend. |
7846 | if (RootDef->getOpcode() == TargetOpcode::G_SHL) { |
7847 | // Look for a constant on the RHS of the shift. |
7848 | MachineOperand &RHS = RootDef->getOperand(i: 2); |
7849 | std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS); |
7850 | if (!MaybeShiftVal) |
7851 | return std::nullopt; |
7852 | ShiftVal = *MaybeShiftVal; |
7853 | if (ShiftVal > 4) |
7854 | return std::nullopt; |
7855 | // Look for a valid extend instruction on the LHS of the shift. |
7856 | MachineOperand &LHS = RootDef->getOperand(i: 1); |
7857 | MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI); |
7858 | if (!ExtDef) |
7859 | return std::nullopt; |
7860 | Ext = getExtendTypeForInst(MI&: *ExtDef, MRI); |
7861 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7862 | return std::nullopt; |
7863 | ExtReg = ExtDef->getOperand(i: 1).getReg(); |
7864 | } else { |
7865 | // Didn't get a shift. Try just folding an extend. |
7866 | Ext = getExtendTypeForInst(MI&: *RootDef, MRI); |
7867 | if (Ext == AArch64_AM::InvalidShiftExtend) |
7868 | return std::nullopt; |
7869 | ExtReg = RootDef->getOperand(i: 1).getReg(); |
7870 | |
7871 | // If we have a 32 bit instruction which zeroes out the high half of a |
7872 | // register, we get an implicit zero extend for free. Check if we have one. |
7873 | // FIXME: We actually emit the extend right now even though we don't have |
7874 | // to. |
7875 | if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == 32) { |
7876 | MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg); |
7877 | if (isDef32(MI: *ExtInst)) |
7878 | return std::nullopt; |
7879 | } |
7880 | } |
7881 | |
7882 | // We require a GPR32 here. Narrow the ExtReg if needed using a subregister |
7883 | // copy. |
7884 | MachineIRBuilder MIB(*RootDef); |
7885 | ExtReg = moveScalarRegClass(Reg: ExtReg, RC: AArch64::GPR32RegClass, MIB); |
7886 | |
7887 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }, |
7888 | [=](MachineInstrBuilder &MIB) { |
7889 | MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal)); |
7890 | }}}; |
7891 | } |
7892 | |
7893 | InstructionSelector::ComplexRendererFns |
7894 | AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const { |
7895 | if (!Root.isReg()) |
7896 | return std::nullopt; |
7897 | MachineRegisterInfo &MRI = |
7898 | Root.getParent()->getParent()->getParent()->getRegInfo(); |
7899 | |
7900 | auto = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI); |
7901 | while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST && |
7902 | STI.isLittleEndian()) |
7903 | Extract = |
7904 | getDefSrcRegIgnoringCopies(Reg: Extract->MI->getOperand(i: 1).getReg(), MRI); |
7905 | if (!Extract) |
7906 | return std::nullopt; |
7907 | |
7908 | if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { |
7909 | if (Extract->Reg == Extract->MI->getOperand(i: 1).getReg()) { |
7910 | Register ExtReg = Extract->MI->getOperand(i: 2).getReg(); |
7911 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7912 | } |
7913 | } |
7914 | if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) { |
7915 | LLT SrcTy = MRI.getType(Reg: Extract->MI->getOperand(i: 1).getReg()); |
7916 | auto LaneIdx = getIConstantVRegValWithLookThrough( |
7917 | VReg: Extract->MI->getOperand(i: 2).getReg(), MRI); |
7918 | if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) && |
7919 | LaneIdx->Value.getSExtValue() == 1) { |
7920 | Register ExtReg = Extract->MI->getOperand(i: 1).getReg(); |
7921 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}}; |
7922 | } |
7923 | } |
7924 | |
7925 | return std::nullopt; |
7926 | } |
7927 | |
7928 | void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, |
7929 | const MachineInstr &MI, |
7930 | int OpIdx) const { |
7931 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
7932 | assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7933 | "Expected G_CONSTANT" ); |
7934 | std::optional<int64_t> CstVal = |
7935 | getIConstantVRegSExtVal(VReg: MI.getOperand(i: 0).getReg(), MRI); |
7936 | assert(CstVal && "Expected constant value" ); |
7937 | MIB.addImm(Val: *CstVal); |
7938 | } |
7939 | |
7940 | void AArch64InstructionSelector::renderLogicalImm32( |
7941 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7942 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7943 | "Expected G_CONSTANT" ); |
7944 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7945 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 32); |
7946 | MIB.addImm(Val: Enc); |
7947 | } |
7948 | |
7949 | void AArch64InstructionSelector::renderLogicalImm64( |
7950 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { |
7951 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && |
7952 | "Expected G_CONSTANT" ); |
7953 | uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue(); |
7954 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 64); |
7955 | MIB.addImm(Val: Enc); |
7956 | } |
7957 | |
7958 | void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB, |
7959 | const MachineInstr &MI, |
7960 | int OpIdx) const { |
7961 | assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 && |
7962 | "Expected G_UBSANTRAP" ); |
7963 | MIB.addImm(Val: MI.getOperand(i: 0).getImm() | ('U' << 8)); |
7964 | } |
7965 | |
7966 | void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, |
7967 | const MachineInstr &MI, |
7968 | int OpIdx) const { |
7969 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7970 | "Expected G_FCONSTANT" ); |
7971 | MIB.addImm( |
7972 | Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7973 | } |
7974 | |
7975 | void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, |
7976 | const MachineInstr &MI, |
7977 | int OpIdx) const { |
7978 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7979 | "Expected G_FCONSTANT" ); |
7980 | MIB.addImm( |
7981 | Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7982 | } |
7983 | |
7984 | void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, |
7985 | const MachineInstr &MI, |
7986 | int OpIdx) const { |
7987 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7988 | "Expected G_FCONSTANT" ); |
7989 | MIB.addImm( |
7990 | Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF())); |
7991 | } |
7992 | |
7993 | void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( |
7994 | MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { |
7995 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && |
7996 | "Expected G_FCONSTANT" ); |
7997 | MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: 1) |
7998 | .getFPImm() |
7999 | ->getValueAPF() |
8000 | .bitcastToAPInt() |
8001 | .getZExtValue())); |
8002 | } |
8003 | |
8004 | bool AArch64InstructionSelector::isLoadStoreOfNumBytes( |
8005 | const MachineInstr &MI, unsigned NumBytes) const { |
8006 | if (!MI.mayLoadOrStore()) |
8007 | return false; |
8008 | assert(MI.hasOneMemOperand() && |
8009 | "Expected load/store to have only one mem op!" ); |
8010 | return (*MI.memoperands_begin())->getSize() == NumBytes; |
8011 | } |
8012 | |
8013 | bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { |
8014 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
8015 | if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() != 32) |
8016 | return false; |
8017 | |
8018 | // Only return true if we know the operation will zero-out the high half of |
8019 | // the 64-bit register. Truncates can be subregister copies, which don't |
8020 | // zero out the high bits. Copies and other copy-like instructions can be |
8021 | // fed by truncates, or could be lowered as subregister copies. |
8022 | switch (MI.getOpcode()) { |
8023 | default: |
8024 | return true; |
8025 | case TargetOpcode::COPY: |
8026 | case TargetOpcode::G_BITCAST: |
8027 | case TargetOpcode::G_TRUNC: |
8028 | case TargetOpcode::G_PHI: |
8029 | return false; |
8030 | } |
8031 | } |
8032 | |
8033 | |
8034 | // Perform fixups on the given PHI instruction's operands to force them all |
8035 | // to be the same as the destination regbank. |
8036 | static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, |
8037 | const AArch64RegisterBankInfo &RBI) { |
8038 | assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI" ); |
8039 | Register DstReg = MI.getOperand(i: 0).getReg(); |
8040 | const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg); |
8041 | assert(DstRB && "Expected PHI dst to have regbank assigned" ); |
8042 | MachineIRBuilder MIB(MI); |
8043 | |
8044 | // Go through each operand and ensure it has the same regbank. |
8045 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) { |
8046 | if (!MO.isReg()) |
8047 | continue; |
8048 | Register OpReg = MO.getReg(); |
8049 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg); |
8050 | if (RB != DstRB) { |
8051 | // Insert a cross-bank copy. |
8052 | auto *OpDef = MRI.getVRegDef(Reg: OpReg); |
8053 | const LLT &Ty = MRI.getType(Reg: OpReg); |
8054 | MachineBasicBlock &OpDefBB = *OpDef->getParent(); |
8055 | |
8056 | // Any instruction we insert must appear after all PHIs in the block |
8057 | // for the block to be valid MIR. |
8058 | MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator()); |
8059 | if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) |
8060 | InsertPt = OpDefBB.getFirstNonPHI(); |
8061 | MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt); |
8062 | auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg); |
8063 | MRI.setRegBank(Reg: Copy.getReg(Idx: 0), RegBank: *DstRB); |
8064 | MO.setReg(Copy.getReg(Idx: 0)); |
8065 | } |
8066 | } |
8067 | } |
8068 | |
8069 | void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { |
8070 | // We're looking for PHIs, build a list so we don't invalidate iterators. |
8071 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8072 | SmallVector<MachineInstr *, 32> Phis; |
8073 | for (auto &BB : MF) { |
8074 | for (auto &MI : BB) { |
8075 | if (MI.getOpcode() == TargetOpcode::G_PHI) |
8076 | Phis.emplace_back(Args: &MI); |
8077 | } |
8078 | } |
8079 | |
8080 | for (auto *MI : Phis) { |
8081 | // We need to do some work here if the operand types are < 16 bit and they |
8082 | // are split across fpr/gpr banks. Since all types <32b on gpr |
8083 | // end up being assigned gpr32 regclasses, we can end up with PHIs here |
8084 | // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't |
8085 | // be selecting heterogenous regbanks for operands if possible, but we |
8086 | // still need to be able to deal with it here. |
8087 | // |
8088 | // To fix this, if we have a gpr-bank operand < 32b in size and at least |
8089 | // one other operand is on the fpr bank, then we add cross-bank copies |
8090 | // to homogenize the operand banks. For simplicity the bank that we choose |
8091 | // to settle on is whatever bank the def operand has. For example: |
8092 | // |
8093 | // %endbb: |
8094 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 |
8095 | // => |
8096 | // %bb2: |
8097 | // ... |
8098 | // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) |
8099 | // ... |
8100 | // %endbb: |
8101 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 |
8102 | bool HasGPROp = false, HasFPROp = false; |
8103 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) { |
8104 | if (!MO.isReg()) |
8105 | continue; |
8106 | const LLT &Ty = MRI.getType(Reg: MO.getReg()); |
8107 | if (!Ty.isValid() || !Ty.isScalar()) |
8108 | break; |
8109 | if (Ty.getSizeInBits() >= 32) |
8110 | break; |
8111 | const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg()); |
8112 | // If for some reason we don't have a regbank yet. Don't try anything. |
8113 | if (!RB) |
8114 | break; |
8115 | |
8116 | if (RB->getID() == AArch64::GPRRegBankID) |
8117 | HasGPROp = true; |
8118 | else |
8119 | HasFPROp = true; |
8120 | } |
8121 | // We have heterogenous regbanks, need to fixup. |
8122 | if (HasGPROp && HasFPROp) |
8123 | fixupPHIOpBanks(MI&: *MI, MRI, RBI); |
8124 | } |
8125 | } |
8126 | |
8127 | namespace llvm { |
8128 | InstructionSelector * |
8129 | createAArch64InstructionSelector(const AArch64TargetMachine &TM, |
8130 | const AArch64Subtarget &Subtarget, |
8131 | const AArch64RegisterBankInfo &RBI) { |
8132 | return new AArch64InstructionSelector(TM, Subtarget, RBI); |
8133 | } |
8134 | } |
8135 | |