X86SelectionDAGInfo.cpp source code [llvm_projects/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp]

1	//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the X86SelectionDAGInfo class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "X86SelectionDAGInfo.h"
14	#include "X86ISelLowering.h"
15	#include "X86InstrInfo.h"
16	#include "X86RegisterInfo.h"
17	#include "X86Subtarget.h"
18	#include "llvm/CodeGen/MachineFrameInfo.h"
19	#include "llvm/CodeGen/SelectionDAG.h"
20	#include "llvm/CodeGen/TargetLowering.h"
21
22	using namespace llvm;
23
24	#define DEBUG_TYPE "x86-selectiondag-info"
25
26	static cl::opt<bool>
27	UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(Val: false),
28	cl::desc ("Use fast short rep mov in memcpy lowering"));
29
30	bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
31	return Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
32	Opcode <= X86ISD::LAST_MEMORY_OPCODE;
33	}
34
35	bool X86SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
36	return Opcode >= X86ISD::FIRST_STRICTFP_OPCODE &&
37	Opcode <= X86ISD::LAST_STRICTFP_OPCODE;
38	}
39
40	/// Returns the best type to use with repmovs/repstos depending on alignment.
41	static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment) {
42	uint64_t Align = Alignment.value();
43	assert((Align != `0`) && "Align is normalized");
44	assert(isPowerOf2_64(Align) && "Align is a power of 2");
45	switch (Align) {
46	case `1`:
47	return MVT::i8;
48	case `2`:
49	return MVT::i16;
50	case `4`:
51	return MVT::i32;
52	default:
53	return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
54	}
55	}
56
57	bool X86SelectionDAGInfo::isBaseRegConflictPossible(
58	SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
59	// We cannot use TRI->hasBasePointer() until after* we select all basic*
60	// blocks. Legalization may introduce new stack temporaries with large
61	// alignment requirements. Fall back to generic code if there are any
62	// dynamic stack adjustments (hopefully rare) and the base pointer would
63	// conflict if we had to use it.
64	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
65	if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
66	return false;
67
68	const X86RegisterInfo TRI = static_cast<const* X86RegisterInfo *>(
69	DAG.getSubtarget().getRegisterInfo());
70	return llvm::is_contained(Range&: ClobberSet, Element: TRI->getBaseRegister());
71	}
72
73	/// Emit a single REP STOSB instruction for a particular constant size.
74	static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG,
75	const SDLoc &dl, SDValue Chain, SDValue Dst,
76	SDValue Val, SDValue Size, MVT AVT) {
77	const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
78	unsigned AX = X86::AL;
79	switch (AVT.getSizeInBits()) {
80	case `8`:
81	AX = X86::AL;
82	break;
83	case `16`:
84	AX = X86::AX;
85	break;
86	case `32`:
87	AX = X86::EAX;
88	break;
89	default:
90	AX = X86::RAX;
91	break;
92	}
93
94	const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
95	const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
96
97	SDValue InGlue;
98	Chain = DAG.getCopyToReg(Chain, dl, Reg: AX, N: Val, Glue: InGlue);
99	InGlue = Chain.getValue(R: `1`);
100	Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
101	InGlue = Chain.getValue(R: `1`);
102	Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
103	InGlue = Chain.getValue(R: `1`);
104
105	SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
106	SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
107	return DAG.getNode(Opcode: X86ISD::REP_STOS, DL: dl, VTList: Tys, Ops);
108	}
109
110	/// Emit a single REP STOSB instruction for a particular constant size.
111	static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
112	const SDLoc &dl, SDValue Chain, SDValue Dst,
113	SDValue Val, uint64_t Size) {
114	return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
115	Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
116	}
117
118	/// Returns a REP STOS instruction, possibly with a few load/stores to implement
119	/// a constant size memory set. In some cases where we know REP MOVS is
120	/// inefficient we return an empty SDValue so the calling code can either
121	/// generate a store sequence or call the runtime memset function.
122	static SDValue emitConstantSizeRepstos(SelectionDAG &DAG,
123	const X86Subtarget &Subtarget,
124	const SDLoc &dl, SDValue Chain,
125	SDValue Dst, SDValue Val, uint64_t Size,
126	EVT SizeVT, Align Alignment,
127	bool isVolatile, bool AlwaysInline,
128	MachinePointerInfo DstPtrInfo) {
129	/// In case we optimize for size, we use repstosb even if it's less efficient
130	/// so we can save the loads/stores of the leftover.
131	if (DAG.getMachineFunction().getFunction().hasMinSize()) {
132	if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
133	// Special case 0 because otherwise we get large literals,
134	// which causes larger encoding.
135	if ((Size & `31`) == `0` && (ValC->getZExtValue() & `255`) == `0`) {
136	MVT BlockType = MVT::i32;
137	const uint64_t BlockBits = BlockType.getSizeInBits();
138	const uint64_t BlockBytes = BlockBits / `8`;
139	const uint64_t BlockCount = Size / BlockBytes;
140
141	Val = DAG.getConstant(Val: `0`, DL: dl, VT: BlockType);
142	// repstosd is same size as repstosb
143	return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
144	Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
145	}
146	}
147	return emitRepstosB(Subtarget, DAG, dl, Chain, Dst, Val, Size);
148	}
149
150	if (Size > Subtarget.getMaxInlineSizeThreshold())
151	return SDValue ();
152
153	// If not DWORD aligned or size is more than the threshold, call the library.
154	// The libc version is likely to be faster for these cases. It can use the
155	// address value and run time information about the CPU.
156	if (Alignment < Align (`4`))
157	return SDValue ();
158
159	MVT BlockType = MVT::i8;
160	uint64_t BlockCount = Size;
161	uint64_t BytesLeft = `0`;
162
163	SDValue OriginalVal = Val;
164	if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
165	BlockType = getOptimalRepType(Subtarget, Alignment);
166	uint64_t Value = ValC->getZExtValue() & `255`;
167	const uint64_t BlockBits = BlockType.getSizeInBits();
168
169	if (BlockBits >= `16`)
170	Value = (Value << `8`) \| Value;
171
172	if (BlockBits >= `32`)
173	Value = (Value << `16`) \| Value;
174
175	if (BlockBits >= `64`)
176	Value = (Value << `32`) \| Value;
177
178	const uint64_t BlockBytes = BlockBits / `8`;
179	BlockCount = Size / BlockBytes;
180	BytesLeft = Size % BlockBytes;
181	Val = DAG.getConstant(Val: Value, DL: dl, VT: BlockType);
182	}
183
184	SDValue RepStos =
185	emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
186	Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
187	/// RepStos can process the whole length.
188	if (BytesLeft == `0`)
189	return RepStos;
190
191	// Handle the last 1 - 7 bytes.
192	SmallVector<SDValue, `4`> Results;
193	Results.push_back(Elt: RepStos);
194	unsigned Offset = Size - BytesLeft;
195	EVT AddrVT = Dst.getValueType();
196
197	Results.push_back(
198	Elt: DAG.getMemset(Chain, dl,
199	Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AddrVT, N1: Dst,
200	N2: DAG.getConstant(Val: Offset, DL: dl, VT: AddrVT)),
201	Src: OriginalVal, Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT),
202	Alignment, isVol: isVolatile, AlwaysInline,
203	/ CI / nullptr, DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset)));
204
205	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
206	}
207
208	SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
209	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
210	SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
211	MachinePointerInfo DstPtrInfo) const {
212	// If to a segment-relative address space, use the default lowering.
213	if (DstPtrInfo.getAddrSpace() >= `256`)
214	return SDValue ();
215
216	// If the base register might conflict with our physical registers, bail out.
217	const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
218	X86::ECX, X86::EAX, X86::EDI};
219	if (isBaseRegConflictPossible(DAG, ClobberSet))
220	return SDValue ();
221
222	ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size);
223	if (!ConstantSize)
224	return SDValue ();
225
226	const X86Subtarget &Subtarget =
227	DAG.getMachineFunction().getSubtarget<X86Subtarget>();
228	return emitConstantSizeRepstos(
229	DAG, Subtarget, dl, Chain, Dst, Val, Size: ConstantSize->getZExtValue(),
230	SizeVT: Size.getValueType(), Alignment, isVolatile, AlwaysInline, DstPtrInfo);
231	}
232
233	/// Emit a single REP MOVS{B,W,D,Q} instruction.
234	static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
235	const SDLoc &dl, SDValue Chain, SDValue Dst,
236	SDValue Src, SDValue Size, MVT AVT) {
237	const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
238	const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
239	const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
240	const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
241
242	SDValue InGlue;
243	Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
244	InGlue = Chain.getValue(R: `1`);
245	Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
246	InGlue = Chain.getValue(R: `1`);
247	Chain = DAG.getCopyToReg(Chain, dl, Reg: SI, N: Src, Glue: InGlue);
248	InGlue = Chain.getValue(R: `1`);
249
250	SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
251	SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
252	return DAG.getNode(Opcode: X86ISD::REP_MOVS, DL: dl, VTList: Tys, Ops);
253	}
254
255	/// Emit a single REP MOVSB instruction for a particular constant size.
256	static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
257	const SDLoc &dl, SDValue Chain, SDValue Dst,
258	SDValue Src, uint64_t Size) {
259	return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
260	Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
261	}
262
263	/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
264	/// a constant size memory copy. In some cases where we know REP MOVS is
265	/// inefficient we return an empty SDValue so the calling code can either
266	/// generate a load/store sequence or call the runtime memcpy function.
267	static SDValue emitConstantSizeRepmov(
268	SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
269	SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
270	Align Alignment, bool isVolatile, bool AlwaysInline,
271	MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
272	/// In case we optimize for size, we use repmovsb even if it's less efficient
273	/// so we can save the loads/stores of the leftover.
274	if (DAG.getMachineFunction().getFunction().hasMinSize())
275	return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
276
277	/// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
278	/// efficient.
279	if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
280	return SDValue ();
281
282	/// If we have enhanced repmovs we use it.
283	if (Subtarget.hasERMSB())
284	return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
285
286	assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
287	/// We assume runtime memcpy will do a better job for unaligned copies when
288	/// ERMS is not present.
289	if (!AlwaysInline && (Alignment < Align (`4`)))
290	return SDValue ();
291
292	const MVT BlockType = getOptimalRepType(Subtarget, Alignment);
293	const uint64_t BlockBytes = BlockType.getSizeInBits() / `8`;
294	const uint64_t BlockCount = Size / BlockBytes;
295	const uint64_t BytesLeft = Size % BlockBytes;
296	SDValue RepMovs =
297	emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
298	Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
299
300	/// RepMov can process the whole length.
301	if (BytesLeft == `0`)
302	return RepMovs;
303
304	assert(BytesLeft && "We have leftover at this point");
305
306	// Handle the last 1 - 7 bytes.
307	SmallVector<SDValue, `4`> Results;
308	Results.push_back(Elt: RepMovs);
309	unsigned Offset = Size - BytesLeft;
310	EVT DstVT = Dst.getValueType();
311	EVT SrcVT = Src.getValueType();
312	Results.push_back(Elt: DAG.getMemcpy(
313	Chain, dl,
314	Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: DstVT, N1: Dst, N2: DAG.getConstant(Val: Offset, DL: dl, VT: DstVT)),
315	Src: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: SrcVT, N1: Src, N2: DAG.getConstant(Val: Offset, DL: dl, VT: SrcVT)),
316	Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT), Alignment, isVol: isVolatile,
317	/AlwaysInline/ true, /CI=/nullptr, OverrideTailCall: std::nullopt,
318	DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset), SrcPtrInfo: SrcPtrInfo.getWithOffset(O: Offset)));
319	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
320	}
321
322	SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
323	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
324	SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
325	MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
326	// If to a segment-relative address space, use the default lowering.
327	if (DstPtrInfo.getAddrSpace() >= `256` \|\| SrcPtrInfo.getAddrSpace() >= `256`)
328	return SDValue ();
329
330	// If the base registers conflict with our physical registers, use the default
331	// lowering.
332	const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
333	X86::ECX, X86::ESI, X86::EDI};
334	if (isBaseRegConflictPossible(DAG, ClobberSet))
335	return SDValue ();
336
337	const X86Subtarget &Subtarget =
338	DAG.getMachineFunction().getSubtarget<X86Subtarget>();
339
340	// If enabled and available, use fast short rep mov.
341	if (UseFSRMForMemcpy && Subtarget.hasFSRM())
342	return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, AVT: MVT::i8);
343
344	/// Handle constant sizes
345	if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size))
346	return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
347	Size: ConstantSize->getZExtValue(),
348	SizeVT: Size.getValueType(), Alignment, isVolatile,
349	AlwaysInline, DstPtrInfo, SrcPtrInfo);
350
351	return SDValue ();
352	}
353

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp