X86SelectionDAGInfo.cpp source code [llvm_projects/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp]

1	//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the X86SelectionDAGInfo class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "X86SelectionDAGInfo.h"
14	#include "X86InstrInfo.h"
15	#include "X86RegisterInfo.h"
16	#include "X86Subtarget.h"
17	#include "llvm/CodeGen/MachineFrameInfo.h"
18	#include "llvm/CodeGen/SelectionDAG.h"
19	#include "llvm/CodeGen/TargetLowering.h"
20
21	#define GET_SDNODE_DESC
22	#include "X86GenSDNodeInfo.inc"
23
24	using namespace llvm;
25
26	#define DEBUG_TYPE "x86-selectiondag-info"
27
28	static cl::opt<bool>
29	UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(Val: false),
30	cl::desc("Use fast short rep mov in memcpy lowering"));
31
32	X86SelectionDAGInfo::X86SelectionDAGInfo()
33	: SelectionDAGGenTargetInfo (X86GenSDNodeInfo) {}
34
35	const char X86SelectionDAGInfo::getTargetNodeName(unsigned* Opcode) const {
36	#define NODE_NAME_CASE(NODE) \
37	case X86ISD::NODE: \
38	return "X86ISD::" #NODE;
39
40	// These nodes don't have corresponding entries in .td files yet.*
41	switch (static_cast<X86ISD::NodeType>(Opcode)) {
42	NODE_NAME_CASE(POP_FROM_X87_REG)
43	NODE_NAME_CASE(GlobalBaseReg)
44	NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
45	NODE_NAME_CASE(PCMPESTR)
46	NODE_NAME_CASE(PCMPISTR)
47	NODE_NAME_CASE(MGATHER)
48	NODE_NAME_CASE(MSCATTER)
49	NODE_NAME_CASE(AESENCWIDE128KL)
50	NODE_NAME_CASE(AESDECWIDE128KL)
51	NODE_NAME_CASE(AESENCWIDE256KL)
52	NODE_NAME_CASE(AESDECWIDE256KL)
53	}
54	#undef NODE_NAME_CASE
55
56	return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
57	}
58
59	bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
60	// These nodes don't have corresponding entries in .td files yet.*
61	if (Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
62	Opcode <= X86ISD::LAST_MEMORY_OPCODE)
63	return true;
64
65	return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
66	}
67
68	void X86SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
69	const SDNode N) const* {
70	switch (N->getOpcode()) {
71	default:
72	break;
73	case X86ISD::VP2INTERSECT:
74	// invalid number of results; expected 1, got 2
75	case X86ISD::FSETCCM_SAE:
76	// invalid number of operands; expected 3, got 4
77	case X86ISD::CVTTP2SI_SAE:
78	case X86ISD::CVTTP2UI_SAE:
79	case X86ISD::CVTTP2IBS_SAE:
80	// invalid number of operands; expected 1, got 2
81	case X86ISD::CMPMM_SAE:
82	// invalid number of operands; expected 4, got 5
83	case X86ISD::CALL:
84	case X86ISD::NT_BRIND:
85	// operand #1 must have type i32 (iPTR), but has type i64
86	case X86ISD::INSERTQI:
87	case X86ISD::EXTRQI:
88	// result #0 must have type v2i64, but has type v16i8/v8i16
89	return;
90	}
91
92	SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
93	}
94
95	/// Returns the best type to use with repmovs/repstos depending on alignment.
96	static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment) {
97	uint64_t Align = Alignment.value();
98	assert((Align != `0`) && "Align is normalized");
99	assert(isPowerOf2_64(Align) && "Align is a power of 2");
100	switch (Align) {
101	case `1`:
102	return MVT::i8;
103	case `2`:
104	return MVT::i16;
105	case `4`:
106	return MVT::i32;
107	default:
108	return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
109	}
110	}
111
112	bool X86SelectionDAGInfo::isBaseRegConflictPossible(
113	SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
114	// We cannot use TRI->hasBasePointer() until after* we select all basic*
115	// blocks. Legalization may introduce new stack temporaries with large
116	// alignment requirements. Fall back to generic code if there are any
117	// dynamic stack adjustments (hopefully rare) and the base pointer would
118	// conflict if we had to use it.
119	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
120	if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
121	return false;
122
123	const X86RegisterInfo TRI = static_cast<const* X86RegisterInfo *>(
124	DAG.getSubtarget().getRegisterInfo());
125	return llvm::is_contained(Range&: ClobberSet, Element: TRI->getBaseRegister());
126	}
127
128	/// Emit a single REP STOSB instruction for a particular constant size.
129	static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG,
130	const SDLoc &dl, SDValue Chain, SDValue Dst,
131	SDValue Val, SDValue Size, MVT AVT) {
132	const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
133	unsigned AX = X86::AL;
134	switch (AVT.getSizeInBits()) {
135	case `8`:
136	AX = X86::AL;
137	break;
138	case `16`:
139	AX = X86::AX;
140	break;
141	case `32`:
142	AX = X86::EAX;
143	break;
144	default:
145	AX = X86::RAX;
146	break;
147	}
148
149	const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
150	const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
151
152	SDValue InGlue;
153	Chain = DAG.getCopyToReg(Chain, dl, Reg: AX, N: Val, Glue: InGlue);
154	InGlue = Chain.getValue(R: `1`);
155	Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
156	InGlue = Chain.getValue(R: `1`);
157	Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
158	InGlue = Chain.getValue(R: `1`);
159
160	SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
161	SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
162	return DAG.getNode(Opcode: X86ISD::REP_STOS, DL: dl, VTList: Tys, Ops);
163	}
164
165	/// Emit a single REP STOSB instruction for a particular constant size.
166	static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
167	const SDLoc &dl, SDValue Chain, SDValue Dst,
168	SDValue Val, uint64_t Size) {
169	return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
170	Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
171	}
172
173	/// Returns a REP STOS instruction, possibly with a few load/stores to implement
174	/// a constant size memory set. In some cases where we know REP MOVS is
175	/// inefficient we return an empty SDValue so the calling code can either
176	/// generate a store sequence or call the runtime memset function.
177	static SDValue emitConstantSizeRepstos(SelectionDAG &DAG,
178	const X86Subtarget &Subtarget,
179	const SDLoc &dl, SDValue Chain,
180	SDValue Dst, SDValue Val, uint64_t Size,
181	EVT SizeVT, Align Alignment,
182	bool isVolatile, bool AlwaysInline,
183	MachinePointerInfo DstPtrInfo) {
184	/// In case we optimize for size, we use repstosb even if it's less efficient
185	/// so we can save the loads/stores of the leftover.
186	if (DAG.getMachineFunction().getFunction().hasMinSize()) {
187	if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
188	// Special case 0 because otherwise we get large literals,
189	// which causes larger encoding.
190	if ((Size & `31`) == `0` && (ValC->getZExtValue() & `255`) == `0`) {
191	MVT BlockType = MVT::i32;
192	const uint64_t BlockBits = BlockType.getSizeInBits();
193	const uint64_t BlockBytes = BlockBits / `8`;
194	const uint64_t BlockCount = Size / BlockBytes;
195
196	Val = DAG.getConstant(Val: `0`, DL: dl, VT: BlockType);
197	// repstosd is same size as repstosb
198	return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
199	Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
200	}
201	}
202	return emitRepstosB(Subtarget, DAG, dl, Chain, Dst, Val, Size);
203	}
204
205	if (Size > Subtarget.getMaxInlineSizeThreshold())
206	return SDValue ();
207
208	// If not DWORD aligned or size is more than the threshold, call the library.
209	// The libc version is likely to be faster for these cases. It can use the
210	// address value and run time information about the CPU.
211	if (Alignment < Align (`4`))
212	return SDValue ();
213
214	MVT BlockType = MVT::i8;
215	uint64_t BlockCount = Size;
216	uint64_t BytesLeft = `0`;
217
218	SDValue OriginalVal = Val;
219	if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
220	BlockType = getOptimalRepType(Subtarget, Alignment);
221	uint64_t Value = ValC->getZExtValue() & `255`;
222	const uint64_t BlockBits = BlockType.getSizeInBits();
223
224	if (BlockBits >= `16`)
225	Value = (Value << `8`) \| Value;
226
227	if (BlockBits >= `32`)
228	Value = (Value << `16`) \| Value;
229
230	if (BlockBits >= `64`)
231	Value = (Value << `32`) \| Value;
232
233	const uint64_t BlockBytes = BlockBits / `8`;
234	BlockCount = Size / BlockBytes;
235	BytesLeft = Size % BlockBytes;
236	Val = DAG.getConstant(Val: Value, DL: dl, VT: BlockType);
237	}
238
239	SDValue RepStos =
240	emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
241	Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
242	/// RepStos can process the whole length.
243	if (BytesLeft == `0`)
244	return RepStos;
245
246	// Handle the last 1 - 7 bytes.
247	SmallVector<SDValue, `4`> Results;
248	Results.push_back(Elt: RepStos);
249	unsigned Offset = Size - BytesLeft;
250	EVT AddrVT = Dst.getValueType();
251
252	Results.push_back(
253	Elt: DAG.getMemset(Chain, dl,
254	Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AddrVT, N1: Dst,
255	N2: DAG.getConstant(Val: Offset, DL: dl, VT: AddrVT)),
256	Src: OriginalVal, Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT),
257	Alignment, isVol: isVolatile, AlwaysInline,
258	/ CI / nullptr, DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset)));
259
260	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
261	}
262
263	SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
264	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
265	SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
266	MachinePointerInfo DstPtrInfo) const {
267	const X86Subtarget &Subtarget =
268	DAG.getMachineFunction().getSubtarget<X86Subtarget>();
269
270	// If to a segment-relative address space, use the default lowering.
271	if (DstPtrInfo.getAddrSpace() >= `256`)
272	return SDValue ();
273
274	// REP STOS uses EDI on x86-32. Fall back if the user reserved EDI, so the
275	// generic expander can avoid emitting REP STOS.
276	if (!Subtarget.is64Bit() && Subtarget.isRegisterReservedByUser(i: X86::EDI))
277	return SDValue ();
278
279	// If the base register might conflict with our physical registers, bail out.
280	const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
281	X86::ECX, X86::EAX, X86::EDI};
282	if (isBaseRegConflictPossible(DAG, ClobberSet))
283	return SDValue ();
284
285	ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size);
286	if (!ConstantSize)
287	return SDValue ();
288
289	return emitConstantSizeRepstos(
290	DAG, Subtarget, dl, Chain, Dst, Val, Size: ConstantSize->getZExtValue(),
291	SizeVT: Size.getValueType(), Alignment, isVolatile, AlwaysInline, DstPtrInfo);
292	}
293
294	/// Emit a single REP MOVS{B,W,D,Q} instruction.
295	static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
296	const SDLoc &dl, SDValue Chain, SDValue Dst,
297	SDValue Src, SDValue Size, MVT AVT) {
298	const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
299	const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
300	const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
301	const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
302
303	SDValue InGlue;
304	Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
305	InGlue = Chain.getValue(R: `1`);
306	Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
307	InGlue = Chain.getValue(R: `1`);
308	Chain = DAG.getCopyToReg(Chain, dl, Reg: SI, N: Src, Glue: InGlue);
309	InGlue = Chain.getValue(R: `1`);
310
311	SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
312	SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
313	return DAG.getNode(Opcode: X86ISD::REP_MOVS, DL: dl, VTList: Tys, Ops);
314	}
315
316	/// Emit a single REP MOVSB instruction for a particular constant size.
317	static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
318	const SDLoc &dl, SDValue Chain, SDValue Dst,
319	SDValue Src, uint64_t Size) {
320	return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
321	Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
322	}
323
324	/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
325	/// a constant size memory copy. In some cases where we know REP MOVS is
326	/// inefficient we return an empty SDValue so the calling code can either
327	/// generate a load/store sequence or call the runtime memcpy function.
328	static SDValue emitConstantSizeRepmov(
329	SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
330	SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
331	Align Alignment, bool isVolatile, bool AlwaysInline,
332	MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
333	/// In case we optimize for size, we use repmovsb even if it's less efficient
334	/// so we can save the loads/stores of the leftover.
335	if (DAG.getMachineFunction().getFunction().hasMinSize())
336	return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
337
338	/// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
339	/// efficient.
340	if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
341	return SDValue ();
342
343	/// If we have enhanced repmovs we use it.
344	if (Subtarget.hasERMSB())
345	return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
346
347	assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
348	/// We assume runtime memcpy will do a better job for unaligned copies when
349	/// ERMS is not present.
350	if (!AlwaysInline && (Alignment < Align (`4`)))
351	return SDValue ();
352
353	const MVT BlockType = getOptimalRepType(Subtarget, Alignment);
354	const uint64_t BlockBytes = BlockType.getSizeInBits() / `8`;
355	const uint64_t BlockCount = Size / BlockBytes;
356	const uint64_t BytesLeft = Size % BlockBytes;
357	SDValue RepMovs =
358	emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
359	Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
360
361	/// RepMov can process the whole length.
362	if (BytesLeft == `0`)
363	return RepMovs;
364
365	assert(BytesLeft && "We have leftover at this point");
366
367	// Handle the last 1 - 7 bytes.
368	SmallVector<SDValue, `4`> Results;
369	Results.push_back(Elt: RepMovs);
370	unsigned Offset = Size - BytesLeft;
371	EVT DstVT = Dst.getValueType();
372	EVT SrcVT = Src.getValueType();
373	Results.push_back(Elt: DAG.getMemcpy(
374	Chain, dl,
375	Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: DstVT, N1: Dst, N2: DAG.getConstant(Val: Offset, DL: dl, VT: DstVT)),
376	Src: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: SrcVT, N1: Src, N2: DAG.getConstant(Val: Offset, DL: dl, VT: SrcVT)),
377	Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT), DstAlign: Alignment, SrcAlign: Alignment, isVol: isVolatile,
378	/AlwaysInline/ true, /CI=/nullptr, OverrideTailCall: std::nullopt,
379	DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset), SrcPtrInfo: SrcPtrInfo.getWithOffset(O: Offset)));
380	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
381	}
382
383	SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
384	SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
385	SDValue Size, Align DstAlign, Align SrcAlign, bool isVolatile,
386	bool AlwaysInline, MachinePointerInfo DstPtrInfo,
387	MachinePointerInfo SrcPtrInfo) const {
388	const X86Subtarget &Subtarget =
389	DAG.getMachineFunction().getSubtarget<X86Subtarget>();
390
391	// If to a segment-relative address space, use the default lowering.
392	if (DstPtrInfo.getAddrSpace() >= `256` \|\| SrcPtrInfo.getAddrSpace() >= `256`)
393	return SDValue ();
394
395	// REP MOVS uses EDI/ESI on x86-32. fall back only when EDI is
396	// reserved so the generic expander can avoid emitting REP MOVS.
397	if (!Subtarget.is64Bit() && Subtarget.isRegisterReservedByUser(i: X86::EDI))
398	return SDValue ();
399
400	// If the base registers conflict with our physical registers, use the default
401	// lowering.
402	const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
403	X86::ECX, X86::ESI, X86::EDI};
404	if (isBaseRegConflictPossible(DAG, ClobberSet))
405	return SDValue ();
406
407	// If enabled and available, use fast short rep mov.
408	if (UseFSRMForMemcpy && Subtarget.hasFSRM())
409	return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, AVT: MVT::i8);
410
411	// Handle constant sizes
412	if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size)) {
413	Align Alignment = std::min(a: DstAlign, b: SrcAlign);
414	return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
415	Size: ConstantSize->getZExtValue(),
416	SizeVT: Size.getValueType(), Alignment, isVolatile,
417	AlwaysInline, DstPtrInfo, SrcPtrInfo);
418	}
419
420	return SDValue ();
421	}
422

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp