ARMSelectionDAGInfo.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp]

1	//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the ARMSelectionDAGInfo class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "ARMTargetTransformInfo.h"
14	#include "llvm/CodeGen/SelectionDAG.h"
15	#include "llvm/Support/CommandLine.h"
16	using namespace llvm;
17
18	#define DEBUG_TYPE "arm-selectiondag-info"
19
20	static cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
21	"arm-memtransfer-tploop", cl::Hidden,
22	cl::desc ("Control conversion of memcpy to "
23	"Tail predicated loops (WLSTP)"),
24	cl::init(Val: TPLoop::ForceDisabled),
25	cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled",
26	"Don't convert memcpy to TP loop."),
27	clEnumValN(TPLoop::ForceEnabled, "force-enabled",
28	"Always convert memcpy to TP loop."),
29	clEnumValN(TPLoop::Allow, "allow",
30	"Allow (may be subject to certain conditions) "
31	"conversion of memcpy to TP loop.")));
32
33	bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
34	return Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
35	Opcode <= ARMISD::LAST_MEMORY_OPCODE;
36	}
37
38	// Emit, if possible, a specialized version of the given Libcall. Typically this
39	// means selecting the appropriately aligned version, but we also convert memset
40	// of 0 into memclr.
41	SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
42	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
43	SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
44	const ARMSubtarget &Subtarget =
45	DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
46	const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
47
48	// Only use a specialized AEABI function if the default version of this
49	// Libcall is an AEABI function.
50	if (std::strncmp(s1: TLI->getLibcallName(Call: LC), s2: "__aeabi", n: `7`) != `0`)
51	return SDValue ();
52
53	// Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
54	// able to translate memset to memclr and use the value to index the function
55	// name array.
56	enum {
57	AEABI_MEMCPY = `0`,
58	AEABI_MEMMOVE,
59	AEABI_MEMSET,
60	AEABI_MEMCLR
61	} AEABILibcall;
62	switch (LC) {
63	case RTLIB::MEMCPY:
64	AEABILibcall = AEABI_MEMCPY;
65	break;
66	case RTLIB::MEMMOVE:
67	AEABILibcall = AEABI_MEMMOVE;
68	break;
69	case RTLIB::MEMSET:
70	AEABILibcall = AEABI_MEMSET;
71	if (isNullConstant(V: Src))
72	AEABILibcall = AEABI_MEMCLR;
73	break;
74	default:
75	return SDValue ();
76	}
77
78	// Choose the most-aligned libcall variant that we can
79	enum {
80	ALIGN1 = `0`,
81	ALIGN4,
82	ALIGN8
83	} AlignVariant;
84	if ((Align & `7`) == `0`)
85	AlignVariant = ALIGN8;
86	else if ((Align & `3`) == `0`)
87	AlignVariant = ALIGN4;
88	else
89	AlignVariant = ALIGN1;
90
91	TargetLowering::ArgListTy Args;
92	TargetLowering::ArgListEntry Entry;
93	Entry.Ty = DAG.getDataLayout().getIntPtrType(C&: *DAG.getContext());
94	Entry.Node = Dst;
95	Args.push_back(x: Entry);
96	if (AEABILibcall == AEABI_MEMCLR) {
97	Entry.Node = Size;
98	Args.push_back(x: Entry);
99	} else if (AEABILibcall == AEABI_MEMSET) {
100	// Adjust parameters for memset, EABI uses format (ptr, size, value),
101	// GNU library uses (ptr, value, size)
102	// See RTABI section 4.3.4
103	Entry.Node = Size;
104	Args.push_back(x: Entry);
105
106	// Extend or truncate the argument to be an i32 value for the call.
107	if (Src.getValueType().bitsGT(VT: MVT::i32))
108	Src = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Src);
109	else if (Src.getValueType().bitsLT(VT: MVT::i32))
110	Src = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i32, Operand: Src);
111
112	Entry.Node = Src;
113	Entry.Ty = Type::getInt32Ty(C&: *DAG.getContext());
114	Entry.IsSExt = false;
115	Args.push_back(x: Entry);
116	} else {
117	Entry.Node = Src;
118	Args.push_back(x: Entry);
119
120	Entry.Node = Size;
121	Args.push_back(x: Entry);
122	}
123
124	static const RTLIB::Libcall FunctionImpls[`4`][`3`] = {
125	{RTLIB::MEMCPY, RTLIB::AEABI_MEMCPY4, RTLIB::AEABI_MEMCPY8},
126	{RTLIB::MEMMOVE, RTLIB::AEABI_MEMMOVE4, RTLIB::AEABI_MEMMOVE8},
127	{RTLIB::MEMSET, RTLIB::AEABI_MEMSET4, RTLIB::AEABI_MEMSET8},
128	{RTLIB::AEABI_MEMCLR, RTLIB::AEABI_MEMCLR4, RTLIB::AEABI_MEMCLR8}};
129
130	RTLIB::Libcall NewLC = FunctionImpls[AEABILibcall][AlignVariant];
131
132	TargetLowering::CallLoweringInfo CLI(DAG);
133	CLI.setDebugLoc(dl)
134	.setChain(Chain)
135	.setLibCallee(
136	CC: TLI->getLibcallCallingConv(Call: NewLC), ResultType: Type::getVoidTy(C&: *DAG.getContext()),
137	Target: DAG.getExternalSymbol(Sym: TLI->getLibcallName(Call: NewLC),
138	VT: TLI->getPointerTy(DL: DAG.getDataLayout())),
139	ArgsList: std::move(Args))
140	.setDiscardResult();
141	std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
142
143	return CallResult.second;
144	}
145
146	static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
147	const SelectionDAG &DAG,
148	ConstantSDNode *ConstantSize,
149	Align Alignment, bool IsMemcpy) {
150	auto &F = DAG.getMachineFunction().getFunction();
151	if (!EnableMemtransferTPLoop)
152	return false;
153	if (EnableMemtransferTPLoop == TPLoop::ForceEnabled)
154	return true;
155	// Do not generate inline TP loop if optimizations is disabled,
156	// or if optimization for size (-Os or -Oz) is on.
157	if (F.hasOptNone() \|\| F.hasOptSize())
158	return false;
159	// If cli option is unset, for memset always generate inline TP.
160	// For memcpy, check some conditions
161	if (!IsMemcpy)
162	return true;
163	if (!ConstantSize && Alignment >= Align (`4`))
164	return true;
165	if (ConstantSize &&
166	ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
167	ConstantSize->getZExtValue() <
168	Subtarget.getMaxMemcpyTPInlineSizeThreshold())
169	return true;
170	return false;
171	}
172
173	SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
174	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
175	SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
176	MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
177	const ARMSubtarget &Subtarget =
178	DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
179	ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size);
180
181	if (Subtarget.hasMVEIntegerOps() &&
182	shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, IsMemcpy: true))
183	return DAG.getNode(Opcode: ARMISD::MEMCPYLOOP, DL: dl, VT: MVT::Other, N1: Chain, N2: Dst, N3: Src,
184	N4: DAG.getZExtOrTrunc(Op: Size, DL: dl, VT: MVT::i32));
185
186	// Do repeated 4-byte loads and stores. To be improved.
187	// This requires 4-byte alignment.
188	if (Alignment < Align (`4`))
189	return SDValue ();
190	// This requires the copy size to be a constant, preferably
191	// within a subtarget-specific limit.
192	if (!ConstantSize)
193	return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
194	Align: Alignment.value(), LC: RTLIB::MEMCPY);
195	uint64_t SizeVal = ConstantSize->getZExtValue();
196	if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
197	return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
198	Align: Alignment.value(), LC: RTLIB::MEMCPY);
199
200	unsigned BytesLeft = SizeVal & `3`;
201	unsigned NumMemOps = SizeVal >> `2`;
202	unsigned EmittedNumMemOps = `0`;
203	EVT VT = MVT::i32;
204	unsigned VTSize = `4`;
205	unsigned i = `0`;
206	// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
207	const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? `4` : `6`;
208	SDValue TFOps[`6`];
209	SDValue Loads[`6`];
210	uint64_t SrcOff = `0`, DstOff = `0`;
211
212	// FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
213	// VLDM/VSTM and make this code emit it when appropriate. This would reduce
214	// pressure on the general purpose registers. However this seems harder to map
215	// onto the register allocator's view of the world.
216
217	// The number of MEMCPY pseudo-instructions to emit. We use up to
218	// MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
219	// later on. This is a lower bound on the number of MEMCPY operations we must
220	// emit.
221	unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - `1`) / MaxLoadsInLDM;
222
223	// Code size optimisation: do not inline memcpy if expansion results in
224	// more instructions than the libary call.
225	if (NumMEMCPYs > `1` && Subtarget.hasMinSize()) {
226	return SDValue ();
227	}
228
229	SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other, VT4: MVT::Glue);
230
231	for (unsigned I = `0`; I != NumMEMCPYs; ++I) {
232	// Evenly distribute registers among MEMCPY operations to reduce register
233	// pressure.
234	unsigned NextEmittedNumMemOps = NumMemOps * (I + `1`) / NumMEMCPYs;
235	unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
236
237	Dst = DAG.getNode(Opcode: ARMISD::MEMCPY, DL: dl, VTList: VTs, N1: Chain, N2: Dst, N3: Src,
238	N4: DAG.getConstant(Val: NumRegs, DL: dl, VT: MVT::i32));
239	Src = Dst.getValue(R: `1`);
240	Chain = Dst.getValue(R: `2`);
241
242	DstPtrInfo = DstPtrInfo.getWithOffset(O: NumRegs * VTSize);
243	SrcPtrInfo = SrcPtrInfo.getWithOffset(O: NumRegs * VTSize);
244
245	EmittedNumMemOps = NextEmittedNumMemOps;
246	}
247
248	if (BytesLeft == `0`)
249	return Chain;
250
251	// Issue loads / stores for the trailing (1 - 3) bytes.
252	auto getRemainingValueType = [](unsigned BytesLeft) {
253	return (BytesLeft >= `2`) ? MVT::i16 : MVT::i8;
254	};
255	auto getRemainingSize = [](unsigned BytesLeft) {
256	return (BytesLeft >= `2`) ? `2` : `1`;
257	};
258
259	unsigned BytesLeftSave = BytesLeft;
260	i = `0`;
261	while (BytesLeft) {
262	VT = getRemainingValueType (BytesLeft);
263	VTSize = getRemainingSize (BytesLeft);
264	Loads[i] = DAG.getLoad(VT, dl, Chain,
265	Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: Src,
266	N2: DAG.getConstant(Val: SrcOff, DL: dl, VT: MVT::i32)),
267	PtrInfo: SrcPtrInfo.getWithOffset(O: SrcOff));
268	TFOps[i] = Loads[i].getValue(R: `1`);
269	++i;
270	SrcOff += VTSize;
271	BytesLeft -= VTSize;
272	}
273	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: ArrayRef(TFOps, i));
274
275	i = `0`;
276	BytesLeft = BytesLeftSave;
277	while (BytesLeft) {
278	VT = getRemainingValueType (BytesLeft);
279	VTSize = getRemainingSize (BytesLeft);
280	TFOps[i] = DAG.getStore(Chain, dl, Val: Loads[i],
281	Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: Dst,
282	N2: DAG.getConstant(Val: DstOff, DL: dl, VT: MVT::i32)),
283	PtrInfo: DstPtrInfo.getWithOffset(O: DstOff));
284	++i;
285	DstOff += VTSize;
286	BytesLeft -= VTSize;
287	}
288	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: ArrayRef(TFOps, i));
289	}
290
291	SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
292	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
293	SDValue Size, Align Alignment, bool isVolatile,
294	MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
295	return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
296	Align: Alignment.value(), LC: RTLIB::MEMMOVE);
297	}
298
299	SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
300	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
301	SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
302	MachinePointerInfo DstPtrInfo) const {
303
304	const ARMSubtarget &Subtarget =
305	DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
306
307	ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size);
308
309	// Generate TP loop for llvm.memset
310	if (Subtarget.hasMVEIntegerOps() &&
311	shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
312	IsMemcpy: false)) {
313	Src = DAG.getSplatBuildVector(VT: MVT::v16i8, DL: dl,
314	Op: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i8, Operand: Src));
315	return DAG.getNode(Opcode: ARMISD::MEMSETLOOP, DL: dl, VT: MVT::Other, N1: Chain, N2: Dst, N3: Src,
316	N4: DAG.getZExtOrTrunc(Op: Size, DL: dl, VT: MVT::i32));
317	}
318
319	if (!AlwaysInline)
320	return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
321	Align: Alignment.value(), LC: RTLIB::MEMSET);
322
323	return SDValue ();
324	}
325

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp