1//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64SelectionDAGInfo.h"
14#include "AArch64MachineFunctionInfo.h"
15
16#define GET_SDNODE_DESC
17#include "AArch64GenSDNodeInfo.inc"
18#undef GET_SDNODE_DESC
19
20using namespace llvm;
21
22#define DEBUG_TYPE "aarch64-selectiondag-info"
23
24static cl::opt<bool>
25 LowerToSMERoutines("aarch64-lower-to-sme-routines", cl::Hidden,
26 cl::desc("Enable AArch64 SME memory operations "
27 "to lower to librt functions"),
28 cl::init(Val: true));
29
30static cl::opt<bool> UseMOPS("aarch64-use-mops", cl::Hidden,
31 cl::desc("Enable AArch64 MOPS instructions "
32 "for memcpy/memset/memmove"),
33 cl::init(Val: true));
34
35AArch64SelectionDAGInfo::AArch64SelectionDAGInfo()
36 : SelectionDAGGenTargetInfo(AArch64GenSDNodeInfo) {}
37
38void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
39 const SDNode *N) const {
40 switch (N->getOpcode()) {
41 case AArch64ISD::WrapperLarge:
42 // operand #0 must have type i32, but has type i64
43 return;
44 }
45
46 SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
47
48#ifndef NDEBUG
49 // Some additional checks not yet implemented by verifyTargetNode.
50 switch (N->getOpcode()) {
51 case AArch64ISD::SADDWT:
52 case AArch64ISD::SADDWB:
53 case AArch64ISD::UADDWT:
54 case AArch64ISD::UADDWB: {
55 EVT VT = N->getValueType(0);
56 EVT Op0VT = N->getOperand(0).getValueType();
57 EVT Op1VT = N->getOperand(1).getValueType();
58 assert(VT.isVector() && Op0VT.isVector() && Op1VT.isVector() &&
59 VT.isInteger() && Op0VT.isInteger() && Op1VT.isInteger() &&
60 "Expected integer vectors!");
61 assert(VT == Op0VT &&
62 "Expected result and first input to have the same type!");
63 assert(Op0VT.getSizeInBits() == Op1VT.getSizeInBits() &&
64 "Expected vectors of equal size!");
65 assert(Op0VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount() &&
66 "Expected result vector and first input vector to have half the "
67 "lanes of the second input vector!");
68 break;
69 }
70 case AArch64ISD::SUNPKLO:
71 case AArch64ISD::SUNPKHI:
72 case AArch64ISD::UUNPKLO:
73 case AArch64ISD::UUNPKHI: {
74 EVT VT = N->getValueType(0);
75 EVT OpVT = N->getOperand(0).getValueType();
76 assert(OpVT.isVector() && VT.isVector() && OpVT.isInteger() &&
77 VT.isInteger() && "Expected integer vectors!");
78 assert(OpVT.getSizeInBits() == VT.getSizeInBits() &&
79 "Expected vectors of equal size!");
80 assert(OpVT.getVectorElementCount() == VT.getVectorElementCount() * 2 &&
81 "Expected result vector with half the lanes of its input!");
82 break;
83 }
84 case AArch64ISD::TRN1:
85 case AArch64ISD::TRN2:
86 case AArch64ISD::UZP1:
87 case AArch64ISD::UZP2:
88 case AArch64ISD::ZIP1:
89 case AArch64ISD::ZIP2: {
90 EVT VT = N->getValueType(0);
91 EVT Op0VT = N->getOperand(0).getValueType();
92 EVT Op1VT = N->getOperand(1).getValueType();
93 assert(VT.isVector() && Op0VT.isVector() && Op1VT.isVector() &&
94 "Expected vectors!");
95 assert(VT == Op0VT && VT == Op1VT && "Expected matching vectors!");
96 break;
97 }
98 case AArch64ISD::RSHRNB_I: {
99 EVT VT = N->getValueType(0);
100 EVT Op0VT = N->getOperand(0).getValueType();
101 assert(VT.isVector() && VT.isInteger() &&
102 "Expected integer vector result type!");
103 assert(Op0VT.isVector() && Op0VT.isInteger() &&
104 "Expected first operand to be an integer vector!");
105 assert(VT.getSizeInBits() == Op0VT.getSizeInBits() &&
106 "Expected vectors of equal size!");
107 assert(VT.getVectorElementCount() == Op0VT.getVectorElementCount() * 2 &&
108 "Expected input vector with half the lanes of its result!");
109 assert(isa<ConstantSDNode>(N->getOperand(1)) &&
110 "Expected second operand to be a constant!");
111 break;
112 }
113 }
114#endif
115}
116
117SDValue AArch64SelectionDAGInfo::EmitMOPS(unsigned Opcode, SelectionDAG &DAG,
118 const SDLoc &DL, SDValue Chain,
119 SDValue Dst, SDValue SrcOrValue,
120 SDValue Size, Align Alignment,
121 bool isVolatile,
122 MachinePointerInfo DstPtrInfo,
123 MachinePointerInfo SrcPtrInfo) const {
124
125 // Get the constant size of the copy/set.
126 uint64_t ConstSize = 0;
127 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Size))
128 ConstSize = C->getZExtValue();
129
130 const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo ||
131 Opcode == AArch64::MOPSMemorySetTaggingPseudo;
132
133 MachineFunction &MF = DAG.getMachineFunction();
134
135 auto Vol =
136 isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
137 auto DstFlags = MachineMemOperand::MOStore | Vol;
138 auto *DstOp =
139 MF.getMachineMemOperand(PtrInfo: DstPtrInfo, F: DstFlags, Size: ConstSize, BaseAlignment: Alignment);
140
141 if (IsSet) {
142 // Extend value to i64, if required.
143 if (SrcOrValue.getValueType() != MVT::i64)
144 SrcOrValue = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: SrcOrValue);
145 SDValue Ops[] = {Dst, Size, SrcOrValue, Chain};
146 const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other};
147 MachineSDNode *Node = DAG.getMachineNode(Opcode, dl: DL, ResultTys, Ops);
148 DAG.setNodeMemRefs(N: Node, NewMemRefs: {DstOp});
149 return SDValue(Node, 2);
150 } else {
151 SDValue Ops[] = {Dst, SrcOrValue, Size, Chain};
152 const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other};
153 MachineSDNode *Node = DAG.getMachineNode(Opcode, dl: DL, ResultTys, Ops);
154
155 auto SrcFlags = MachineMemOperand::MOLoad | Vol;
156 auto *SrcOp =
157 MF.getMachineMemOperand(PtrInfo: SrcPtrInfo, F: SrcFlags, Size: ConstSize, BaseAlignment: Alignment);
158 DAG.setNodeMemRefs(N: Node, NewMemRefs: {DstOp, SrcOp});
159 return SDValue(Node, 3);
160 }
161}
162
163SDValue AArch64SelectionDAGInfo::EmitStreamingCompatibleMemLibCall(
164 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op0, SDValue Op1,
165 SDValue Size, RTLIB::Libcall LC) const {
166 const AArch64Subtarget &STI =
167 DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
168 const AArch64TargetLowering *TLI = STI.getTargetLowering();
169 TargetLowering::ArgListTy Args;
170 Args.emplace_back(args&: Op0, args: PointerType::getUnqual(C&: *DAG.getContext()));
171
172 bool UsesResult = false;
173 RTLIB::Libcall NewLC;
174 switch (LC) {
175 case RTLIB::MEMCPY: {
176 NewLC = RTLIB::SC_MEMCPY;
177 Args.emplace_back(args&: Op1, args: PointerType::getUnqual(C&: *DAG.getContext()));
178 break;
179 }
180 case RTLIB::MEMMOVE: {
181 NewLC = RTLIB::SC_MEMMOVE;
182 Args.emplace_back(args&: Op1, args: PointerType::getUnqual(C&: *DAG.getContext()));
183 break;
184 }
185 case RTLIB::MEMSET: {
186 NewLC = RTLIB::SC_MEMSET;
187 Args.emplace_back(args: DAG.getZExtOrTrunc(Op: Op1, DL, VT: MVT::i32),
188 args: Type::getInt32Ty(C&: *DAG.getContext()));
189 break;
190 }
191 case RTLIB::MEMCHR: {
192 UsesResult = true;
193 NewLC = RTLIB::SC_MEMCHR;
194 Args.emplace_back(args: DAG.getZExtOrTrunc(Op: Op1, DL, VT: MVT::i32),
195 args: Type::getInt32Ty(C&: *DAG.getContext()));
196 break;
197 }
198 default:
199 return SDValue();
200 }
201
202 RTLIB::LibcallImpl NewLCImpl = DAG.getLibcalls().getLibcallImpl(Call: NewLC);
203 if (NewLCImpl == RTLIB::Unsupported)
204 return SDValue();
205
206 EVT PointerVT = TLI->getPointerTy(DL: DAG.getDataLayout());
207 SDValue Symbol = DAG.getExternalSymbol(LCImpl: NewLCImpl, VT: PointerVT);
208 Args.emplace_back(args&: Size, args: DAG.getDataLayout().getIntPtrType(C&: *DAG.getContext()));
209
210 TargetLowering::CallLoweringInfo CLI(DAG);
211 PointerType *RetTy = PointerType::getUnqual(C&: *DAG.getContext());
212 CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
213 CC: DAG.getLibcalls().getLibcallImplCallingConv(Call: NewLCImpl), ResultType: RetTy, Target: Symbol,
214 ArgsList: std::move(Args));
215
216 auto [Result, ChainOut] = TLI->LowerCallTo(CLI);
217 return UsesResult ? DAG.getMergeValues(Ops: {Result, ChainOut}, dl: DL) : ChainOut;
218}
219
220SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
221 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
222 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
223 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
224 const AArch64Subtarget &STI =
225 DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
226
227 if (UseMOPS && STI.hasMOPS())
228 return EmitMOPS(Opcode: AArch64::MOPSMemoryCopyPseudo, DAG, DL, Chain, Dst, SrcOrValue: Src,
229 Size, Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
230
231 auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
232 SMEAttrs Attrs = AFI->getSMEFnAttrs();
233 if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody())
234 return EmitStreamingCompatibleMemLibCall(DAG, DL, Chain, Op0: Dst, Op1: Src, Size,
235 LC: RTLIB::MEMCPY);
236 return SDValue();
237}
238
239SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
240 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
241 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
242 MachinePointerInfo DstPtrInfo) const {
243 const AArch64Subtarget &STI =
244 DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
245
246 if (UseMOPS && STI.hasMOPS())
247 return EmitMOPS(Opcode: AArch64::MOPSMemorySetPseudo, DAG, DL: dl, Chain, Dst, SrcOrValue: Src,
248 Size, Alignment, isVolatile, DstPtrInfo,
249 SrcPtrInfo: MachinePointerInfo{});
250
251 auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
252 SMEAttrs Attrs = AFI->getSMEFnAttrs();
253 if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody())
254 return EmitStreamingCompatibleMemLibCall(DAG, DL: dl, Chain, Op0: Dst, Op1: Src, Size,
255 LC: RTLIB::MEMSET);
256 return SDValue();
257}
258
259SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
260 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
261 SDValue Size, Align Alignment, bool isVolatile,
262 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
263 const AArch64Subtarget &STI =
264 DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
265
266 if (UseMOPS && STI.hasMOPS())
267 return EmitMOPS(Opcode: AArch64::MOPSMemoryMovePseudo, DAG, DL: dl, Chain, Dst, SrcOrValue: Src,
268 Size, Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
269
270 auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
271 SMEAttrs Attrs = AFI->getSMEFnAttrs();
272 if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody())
273 return EmitStreamingCompatibleMemLibCall(DAG, DL: dl, Chain, Op0: Dst, Op1: Src, Size,
274 LC: RTLIB::MEMMOVE);
275 return SDValue();
276}
277
278std::pair<SDValue, SDValue> AArch64SelectionDAGInfo::EmitTargetCodeForMemchr(
279 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Src,
280 SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const {
281 auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
282 SMEAttrs Attrs = AFI->getSMEFnAttrs();
283 if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody()) {
284 SDValue Result = EmitStreamingCompatibleMemLibCall(
285 DAG, DL: dl, Chain, Op0: Src, Op1: Char, Size: Length, LC: RTLIB::MEMCHR);
286 return std::make_pair(x: Result.getValue(R: 0), y: Result.getValue(R: 1));
287 }
288 return std::make_pair(x: SDValue(), y: SDValue());
289}
290
291static const int kSetTagLoopThreshold = 176;
292
293static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
294 SDValue Chain, SDValue Ptr, uint64_t ObjSize,
295 const MachineMemOperand *BaseMemOperand,
296 bool ZeroData) {
297 MachineFunction &MF = DAG.getMachineFunction();
298 unsigned ObjSizeScaled = ObjSize / 16;
299
300 SDValue TagSrc = Ptr;
301 if (Ptr.getOpcode() == ISD::FrameIndex) {
302 int FI = cast<FrameIndexSDNode>(Val&: Ptr)->getIndex();
303 Ptr = DAG.getTargetFrameIndex(FI, VT: MVT::i64);
304 // A frame index operand may end up as [SP + offset] => it is fine to use SP
305 // register as the tag source.
306 TagSrc = DAG.getRegister(Reg: AArch64::SP, VT: MVT::i64);
307 }
308
309 const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
310 const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;
311
312 SmallVector<SDValue, 8> OutChains;
313 unsigned OffsetScaled = 0;
314 while (OffsetScaled < ObjSizeScaled) {
315 if (ObjSizeScaled - OffsetScaled >= 2) {
316 SDValue AddrNode = DAG.getMemBasePlusOffset(
317 Base: Ptr, Offset: TypeSize::getFixed(ExactSize: OffsetScaled * 16), DL: dl);
318 SDValue St = DAG.getMemIntrinsicNode(
319 Opcode: OpCode2, dl, VTList: DAG.getVTList(VT: MVT::Other),
320 Ops: {Chain, TagSrc, AddrNode},
321 MemVT: MVT::v4i64,
322 MMO: MF.getMachineMemOperand(MMO: BaseMemOperand, Offset: OffsetScaled * 16, Size: 16 * 2));
323 OffsetScaled += 2;
324 OutChains.push_back(Elt: St);
325 continue;
326 }
327
328 if (ObjSizeScaled - OffsetScaled > 0) {
329 SDValue AddrNode = DAG.getMemBasePlusOffset(
330 Base: Ptr, Offset: TypeSize::getFixed(ExactSize: OffsetScaled * 16), DL: dl);
331 SDValue St = DAG.getMemIntrinsicNode(
332 Opcode: OpCode1, dl, VTList: DAG.getVTList(VT: MVT::Other),
333 Ops: {Chain, TagSrc, AddrNode},
334 MemVT: MVT::v2i64,
335 MMO: MF.getMachineMemOperand(MMO: BaseMemOperand, Offset: OffsetScaled * 16, Size: 16));
336 OffsetScaled += 1;
337 OutChains.push_back(Elt: St);
338 }
339 }
340
341 SDValue Res = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OutChains);
342 return Res;
343}
344
345SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
346 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
347 SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
348 uint64_t ObjSize = Size->getAsZExtVal();
349 assert(ObjSize % 16 == 0);
350
351 MachineFunction &MF = DAG.getMachineFunction();
352 MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
353 PtrInfo: DstPtrInfo, F: MachineMemOperand::MOStore, Size: ObjSize, BaseAlignment: Align(16));
354
355 bool UseSetTagRangeLoop =
356 kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
357 if (!UseSetTagRangeLoop)
358 return EmitUnrolledSetTag(DAG, dl, Chain, Ptr: Addr, ObjSize, BaseMemOperand,
359 ZeroData);
360
361 const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
362
363 unsigned Opcode;
364 if (Addr.getOpcode() == ISD::FrameIndex) {
365 int FI = cast<FrameIndexSDNode>(Val&: Addr)->getIndex();
366 Addr = DAG.getTargetFrameIndex(FI, VT: MVT::i64);
367 Opcode = ZeroData ? AArch64::STZGloop : AArch64::STGloop;
368 } else {
369 Opcode = ZeroData ? AArch64::STZGloop_wback : AArch64::STGloop_wback;
370 }
371 SDValue Ops[] = {DAG.getTargetConstant(Val: ObjSize, DL: dl, VT: MVT::i64), Addr, Chain};
372 SDNode *St = DAG.getMachineNode(Opcode, dl, ResultTys: ResTys, Ops);
373
374 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {BaseMemOperand});
375 return SDValue(St, 2);
376}
377