1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the X86SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86SelectionDAGInfo.h"
14#include "X86InstrInfo.h"
15#include "X86RegisterInfo.h"
16#include "X86Subtarget.h"
17#include "llvm/CodeGen/MachineFrameInfo.h"
18#include "llvm/CodeGen/SelectionDAG.h"
19#include "llvm/CodeGen/TargetLowering.h"
20
21#define GET_SDNODE_DESC
22#include "X86GenSDNodeInfo.inc"
23
24using namespace llvm;
25
26#define DEBUG_TYPE "x86-selectiondag-info"
27
28static cl::opt<bool>
29 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(Val: false),
30 cl::desc("Use fast short rep mov in memcpy lowering"));
31
32X86SelectionDAGInfo::X86SelectionDAGInfo()
33 : SelectionDAGGenTargetInfo(X86GenSDNodeInfo) {}
34
35const char *X86SelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
36#define NODE_NAME_CASE(NODE) \
37 case X86ISD::NODE: \
38 return "X86ISD::" #NODE;
39
40 // These nodes don't have corresponding entries in *.td files yet.
41 switch (static_cast<X86ISD::NodeType>(Opcode)) {
42 NODE_NAME_CASE(POP_FROM_X87_REG)
43 NODE_NAME_CASE(GlobalBaseReg)
44 NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
45 NODE_NAME_CASE(PCMPESTR)
46 NODE_NAME_CASE(PCMPISTR)
47 NODE_NAME_CASE(MGATHER)
48 NODE_NAME_CASE(MSCATTER)
49 NODE_NAME_CASE(AESENCWIDE128KL)
50 NODE_NAME_CASE(AESDECWIDE128KL)
51 NODE_NAME_CASE(AESENCWIDE256KL)
52 NODE_NAME_CASE(AESDECWIDE256KL)
53 }
54#undef NODE_NAME_CASE
55
56 return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
57}
58
59bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
60 // These nodes don't have corresponding entries in *.td files yet.
61 if (Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
62 Opcode <= X86ISD::LAST_MEMORY_OPCODE)
63 return true;
64
65 return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
66}
67
68void X86SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
69 const SDNode *N) const {
70 switch (N->getOpcode()) {
71 default:
72 break;
73 case X86ISD::VP2INTERSECT:
74 // invalid number of results; expected 1, got 2
75 case X86ISD::FSETCCM_SAE:
76 // invalid number of operands; expected 3, got 4
77 case X86ISD::CVTTP2SI_SAE:
78 case X86ISD::CVTTP2UI_SAE:
79 case X86ISD::CVTTP2IBS_SAE:
80 // invalid number of operands; expected 1, got 2
81 case X86ISD::CMPMM_SAE:
82 // invalid number of operands; expected 4, got 5
83 case X86ISD::CALL:
84 case X86ISD::NT_BRIND:
85 // operand #1 must have type i32 (iPTR), but has type i64
86 case X86ISD::INSERTQI:
87 case X86ISD::EXTRQI:
88 // result #0 must have type v2i64, but has type v16i8/v8i16
89 return;
90 }
91
92 SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
93}
94
95/// Returns the best type to use with repmovs/repstos depending on alignment.
96static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment) {
97 uint64_t Align = Alignment.value();
98 assert((Align != 0) && "Align is normalized");
99 assert(isPowerOf2_64(Align) && "Align is a power of 2");
100 switch (Align) {
101 case 1:
102 return MVT::i8;
103 case 2:
104 return MVT::i16;
105 case 4:
106 return MVT::i32;
107 default:
108 return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
109 }
110}
111
112bool X86SelectionDAGInfo::isBaseRegConflictPossible(
113 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
114 // We cannot use TRI->hasBasePointer() until *after* we select all basic
115 // blocks. Legalization may introduce new stack temporaries with large
116 // alignment requirements. Fall back to generic code if there are any
117 // dynamic stack adjustments (hopefully rare) and the base pointer would
118 // conflict if we had to use it.
119 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
120 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
121 return false;
122
123 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
124 DAG.getSubtarget().getRegisterInfo());
125 return llvm::is_contained(Range&: ClobberSet, Element: TRI->getBaseRegister());
126}
127
128/// Emit a single REP STOSB instruction for a particular constant size.
129static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG,
130 const SDLoc &dl, SDValue Chain, SDValue Dst,
131 SDValue Val, SDValue Size, MVT AVT) {
132 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
133 unsigned AX = X86::AL;
134 switch (AVT.getSizeInBits()) {
135 case 8:
136 AX = X86::AL;
137 break;
138 case 16:
139 AX = X86::AX;
140 break;
141 case 32:
142 AX = X86::EAX;
143 break;
144 default:
145 AX = X86::RAX;
146 break;
147 }
148
149 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
150 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
151
152 SDValue InGlue;
153 Chain = DAG.getCopyToReg(Chain, dl, Reg: AX, N: Val, Glue: InGlue);
154 InGlue = Chain.getValue(R: 1);
155 Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
156 InGlue = Chain.getValue(R: 1);
157 Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
158 InGlue = Chain.getValue(R: 1);
159
160 SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
161 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
162 return DAG.getNode(Opcode: X86ISD::REP_STOS, DL: dl, VTList: Tys, Ops);
163}
164
165/// Emit a single REP STOSB instruction for a particular constant size.
166static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
167 const SDLoc &dl, SDValue Chain, SDValue Dst,
168 SDValue Val, uint64_t Size) {
169 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
170 Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
171}
172
173/// Returns a REP STOS instruction, possibly with a few load/stores to implement
174/// a constant size memory set. In some cases where we know REP MOVS is
175/// inefficient we return an empty SDValue so the calling code can either
176/// generate a store sequence or call the runtime memset function.
177static SDValue emitConstantSizeRepstos(SelectionDAG &DAG,
178 const X86Subtarget &Subtarget,
179 const SDLoc &dl, SDValue Chain,
180 SDValue Dst, SDValue Val, uint64_t Size,
181 EVT SizeVT, Align Alignment,
182 bool isVolatile, bool AlwaysInline,
183 MachinePointerInfo DstPtrInfo) {
184 /// In case we optimize for size, we use repstosb even if it's less efficient
185 /// so we can save the loads/stores of the leftover.
186 if (DAG.getMachineFunction().getFunction().hasMinSize()) {
187 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
188 // Special case 0 because otherwise we get large literals,
189 // which causes larger encoding.
190 if ((Size & 31) == 0 && (ValC->getZExtValue() & 255) == 0) {
191 MVT BlockType = MVT::i32;
192 const uint64_t BlockBits = BlockType.getSizeInBits();
193 const uint64_t BlockBytes = BlockBits / 8;
194 const uint64_t BlockCount = Size / BlockBytes;
195
196 Val = DAG.getConstant(Val: 0, DL: dl, VT: BlockType);
197 // repstosd is same size as repstosb
198 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
199 Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
200 }
201 }
202 return emitRepstosB(Subtarget, DAG, dl, Chain, Dst, Val, Size);
203 }
204
205 if (Size > Subtarget.getMaxInlineSizeThreshold())
206 return SDValue();
207
208 // If not DWORD aligned or size is more than the threshold, call the library.
209 // The libc version is likely to be faster for these cases. It can use the
210 // address value and run time information about the CPU.
211 if (Alignment < Align(4))
212 return SDValue();
213
214 MVT BlockType = MVT::i8;
215 uint64_t BlockCount = Size;
216 uint64_t BytesLeft = 0;
217
218 SDValue OriginalVal = Val;
219 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
220 BlockType = getOptimalRepType(Subtarget, Alignment);
221 uint64_t Value = ValC->getZExtValue() & 255;
222 const uint64_t BlockBits = BlockType.getSizeInBits();
223
224 if (BlockBits >= 16)
225 Value = (Value << 8) | Value;
226
227 if (BlockBits >= 32)
228 Value = (Value << 16) | Value;
229
230 if (BlockBits >= 64)
231 Value = (Value << 32) | Value;
232
233 const uint64_t BlockBytes = BlockBits / 8;
234 BlockCount = Size / BlockBytes;
235 BytesLeft = Size % BlockBytes;
236 Val = DAG.getConstant(Val: Value, DL: dl, VT: BlockType);
237 }
238
239 SDValue RepStos =
240 emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
241 Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
242 /// RepStos can process the whole length.
243 if (BytesLeft == 0)
244 return RepStos;
245
246 // Handle the last 1 - 7 bytes.
247 SmallVector<SDValue, 4> Results;
248 Results.push_back(Elt: RepStos);
249 unsigned Offset = Size - BytesLeft;
250 EVT AddrVT = Dst.getValueType();
251
252 Results.push_back(
253 Elt: DAG.getMemset(Chain, dl,
254 Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AddrVT, N1: Dst,
255 N2: DAG.getConstant(Val: Offset, DL: dl, VT: AddrVT)),
256 Src: OriginalVal, Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT),
257 Alignment, isVol: isVolatile, AlwaysInline,
258 /* CI */ nullptr, DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset)));
259
260 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
261}
262
263SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
264 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
265 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
266 MachinePointerInfo DstPtrInfo) const {
267 const X86Subtarget &Subtarget =
268 DAG.getMachineFunction().getSubtarget<X86Subtarget>();
269
270 // If to a segment-relative address space, use the default lowering.
271 if (DstPtrInfo.getAddrSpace() >= 256)
272 return SDValue();
273
274 // REP STOS uses EDI on x86-32. Fall back if the user reserved EDI, so the
275 // generic expander can avoid emitting REP STOS.
276 if (!Subtarget.is64Bit() && Subtarget.isRegisterReservedByUser(i: X86::EDI))
277 return SDValue();
278
279 // If the base register might conflict with our physical registers, bail out.
280 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
281 X86::ECX, X86::EAX, X86::EDI};
282 if (isBaseRegConflictPossible(DAG, ClobberSet))
283 return SDValue();
284
285 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size);
286 if (!ConstantSize)
287 return SDValue();
288
289 return emitConstantSizeRepstos(
290 DAG, Subtarget, dl, Chain, Dst, Val, Size: ConstantSize->getZExtValue(),
291 SizeVT: Size.getValueType(), Alignment, isVolatile, AlwaysInline, DstPtrInfo);
292}
293
294/// Emit a single REP MOVS{B,W,D,Q} instruction.
295static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
296 const SDLoc &dl, SDValue Chain, SDValue Dst,
297 SDValue Src, SDValue Size, MVT AVT) {
298 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
299 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
300 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
301 const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
302
303 SDValue InGlue;
304 Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
305 InGlue = Chain.getValue(R: 1);
306 Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
307 InGlue = Chain.getValue(R: 1);
308 Chain = DAG.getCopyToReg(Chain, dl, Reg: SI, N: Src, Glue: InGlue);
309 InGlue = Chain.getValue(R: 1);
310
311 SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
312 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
313 return DAG.getNode(Opcode: X86ISD::REP_MOVS, DL: dl, VTList: Tys, Ops);
314}
315
316/// Emit a single REP MOVSB instruction for a particular constant size.
317static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
318 const SDLoc &dl, SDValue Chain, SDValue Dst,
319 SDValue Src, uint64_t Size) {
320 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
321 Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
322}
323
324/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
325/// a constant size memory copy. In some cases where we know REP MOVS is
326/// inefficient we return an empty SDValue so the calling code can either
327/// generate a load/store sequence or call the runtime memcpy function.
328static SDValue emitConstantSizeRepmov(
329 SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
330 SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
331 Align Alignment, bool isVolatile, bool AlwaysInline,
332 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
333 /// In case we optimize for size, we use repmovsb even if it's less efficient
334 /// so we can save the loads/stores of the leftover.
335 if (DAG.getMachineFunction().getFunction().hasMinSize())
336 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
337
338 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
339 /// efficient.
340 if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
341 return SDValue();
342
343 /// If we have enhanced repmovs we use it.
344 if (Subtarget.hasERMSB())
345 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
346
347 assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
348 /// We assume runtime memcpy will do a better job for unaligned copies when
349 /// ERMS is not present.
350 if (!AlwaysInline && (Alignment < Align(4)))
351 return SDValue();
352
353 const MVT BlockType = getOptimalRepType(Subtarget, Alignment);
354 const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
355 const uint64_t BlockCount = Size / BlockBytes;
356 const uint64_t BytesLeft = Size % BlockBytes;
357 SDValue RepMovs =
358 emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
359 Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
360
361 /// RepMov can process the whole length.
362 if (BytesLeft == 0)
363 return RepMovs;
364
365 assert(BytesLeft && "We have leftover at this point");
366
367 // Handle the last 1 - 7 bytes.
368 SmallVector<SDValue, 4> Results;
369 Results.push_back(Elt: RepMovs);
370 unsigned Offset = Size - BytesLeft;
371 EVT DstVT = Dst.getValueType();
372 EVT SrcVT = Src.getValueType();
373 Results.push_back(Elt: DAG.getMemcpy(
374 Chain, dl,
375 Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: DstVT, N1: Dst, N2: DAG.getConstant(Val: Offset, DL: dl, VT: DstVT)),
376 Src: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: SrcVT, N1: Src, N2: DAG.getConstant(Val: Offset, DL: dl, VT: SrcVT)),
377 Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT), Alignment, isVol: isVolatile,
378 /*AlwaysInline*/ true, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
379 DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset), SrcPtrInfo: SrcPtrInfo.getWithOffset(O: Offset)));
380 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
381}
382
383SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
384 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
385 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
386 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
387 const X86Subtarget &Subtarget =
388 DAG.getMachineFunction().getSubtarget<X86Subtarget>();
389
390 // If to a segment-relative address space, use the default lowering.
391 if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
392 return SDValue();
393
394 // REP MOVS uses EDI/ESI on x86-32. fall back only when EDI is
395 // reserved so the generic expander can avoid emitting REP MOVS.
396 if (!Subtarget.is64Bit() && Subtarget.isRegisterReservedByUser(i: X86::EDI))
397 return SDValue();
398
399 // If the base registers conflict with our physical registers, use the default
400 // lowering.
401 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
402 X86::ECX, X86::ESI, X86::EDI};
403 if (isBaseRegConflictPossible(DAG, ClobberSet))
404 return SDValue();
405
406 // If enabled and available, use fast short rep mov.
407 if (UseFSRMForMemcpy && Subtarget.hasFSRM())
408 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, AVT: MVT::i8);
409
410 /// Handle constant sizes
411 if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size))
412 return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
413 Size: ConstantSize->getZExtValue(),
414 SizeVT: Size.getValueType(), Alignment, isVolatile,
415 AlwaysInline, DstPtrInfo, SrcPtrInfo);
416
417 return SDValue();
418}
419