1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the X86SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86SelectionDAGInfo.h"
14#include "X86ISelLowering.h"
15#include "X86InstrInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/SelectionDAG.h"
20#include "llvm/CodeGen/TargetLowering.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "x86-selectiondag-info"
25
26static cl::opt<bool>
27 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(Val: false),
28 cl::desc("Use fast short rep mov in memcpy lowering"));
29
30bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
31 return Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
32 Opcode <= X86ISD::LAST_MEMORY_OPCODE;
33}
34
35bool X86SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
36 return Opcode >= X86ISD::FIRST_STRICTFP_OPCODE &&
37 Opcode <= X86ISD::LAST_STRICTFP_OPCODE;
38}
39
40/// Returns the best type to use with repmovs/repstos depending on alignment.
41static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment) {
42 uint64_t Align = Alignment.value();
43 assert((Align != 0) && "Align is normalized");
44 assert(isPowerOf2_64(Align) && "Align is a power of 2");
45 switch (Align) {
46 case 1:
47 return MVT::i8;
48 case 2:
49 return MVT::i16;
50 case 4:
51 return MVT::i32;
52 default:
53 return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
54 }
55}
56
57bool X86SelectionDAGInfo::isBaseRegConflictPossible(
58 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
59 // We cannot use TRI->hasBasePointer() until *after* we select all basic
60 // blocks. Legalization may introduce new stack temporaries with large
61 // alignment requirements. Fall back to generic code if there are any
62 // dynamic stack adjustments (hopefully rare) and the base pointer would
63 // conflict if we had to use it.
64 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
65 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
66 return false;
67
68 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
69 DAG.getSubtarget().getRegisterInfo());
70 return llvm::is_contained(Range&: ClobberSet, Element: TRI->getBaseRegister());
71}
72
73/// Emit a single REP STOSB instruction for a particular constant size.
74static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG,
75 const SDLoc &dl, SDValue Chain, SDValue Dst,
76 SDValue Val, SDValue Size, MVT AVT) {
77 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
78 unsigned AX = X86::AL;
79 switch (AVT.getSizeInBits()) {
80 case 8:
81 AX = X86::AL;
82 break;
83 case 16:
84 AX = X86::AX;
85 break;
86 case 32:
87 AX = X86::EAX;
88 break;
89 default:
90 AX = X86::RAX;
91 break;
92 }
93
94 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
95 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
96
97 SDValue InGlue;
98 Chain = DAG.getCopyToReg(Chain, dl, Reg: AX, N: Val, Glue: InGlue);
99 InGlue = Chain.getValue(R: 1);
100 Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
101 InGlue = Chain.getValue(R: 1);
102 Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
103 InGlue = Chain.getValue(R: 1);
104
105 SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
106 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
107 return DAG.getNode(Opcode: X86ISD::REP_STOS, DL: dl, VTList: Tys, Ops);
108}
109
110/// Emit a single REP STOSB instruction for a particular constant size.
111static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
112 const SDLoc &dl, SDValue Chain, SDValue Dst,
113 SDValue Val, uint64_t Size) {
114 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
115 Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
116}
117
118/// Returns a REP STOS instruction, possibly with a few load/stores to implement
119/// a constant size memory set. In some cases where we know REP MOVS is
120/// inefficient we return an empty SDValue so the calling code can either
121/// generate a store sequence or call the runtime memset function.
122static SDValue emitConstantSizeRepstos(SelectionDAG &DAG,
123 const X86Subtarget &Subtarget,
124 const SDLoc &dl, SDValue Chain,
125 SDValue Dst, SDValue Val, uint64_t Size,
126 EVT SizeVT, Align Alignment,
127 bool isVolatile, bool AlwaysInline,
128 MachinePointerInfo DstPtrInfo) {
129 /// In case we optimize for size, we use repstosb even if it's less efficient
130 /// so we can save the loads/stores of the leftover.
131 if (DAG.getMachineFunction().getFunction().hasMinSize()) {
132 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
133 // Special case 0 because otherwise we get large literals,
134 // which causes larger encoding.
135 if ((Size & 31) == 0 && (ValC->getZExtValue() & 255) == 0) {
136 MVT BlockType = MVT::i32;
137 const uint64_t BlockBits = BlockType.getSizeInBits();
138 const uint64_t BlockBytes = BlockBits / 8;
139 const uint64_t BlockCount = Size / BlockBytes;
140
141 Val = DAG.getConstant(Val: 0, DL: dl, VT: BlockType);
142 // repstosd is same size as repstosb
143 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
144 Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
145 }
146 }
147 return emitRepstosB(Subtarget, DAG, dl, Chain, Dst, Val, Size);
148 }
149
150 if (Size > Subtarget.getMaxInlineSizeThreshold())
151 return SDValue();
152
153 // If not DWORD aligned or size is more than the threshold, call the library.
154 // The libc version is likely to be faster for these cases. It can use the
155 // address value and run time information about the CPU.
156 if (Alignment < Align(4))
157 return SDValue();
158
159 MVT BlockType = MVT::i8;
160 uint64_t BlockCount = Size;
161 uint64_t BytesLeft = 0;
162
163 SDValue OriginalVal = Val;
164 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
165 BlockType = getOptimalRepType(Subtarget, Alignment);
166 uint64_t Value = ValC->getZExtValue() & 255;
167 const uint64_t BlockBits = BlockType.getSizeInBits();
168
169 if (BlockBits >= 16)
170 Value = (Value << 8) | Value;
171
172 if (BlockBits >= 32)
173 Value = (Value << 16) | Value;
174
175 if (BlockBits >= 64)
176 Value = (Value << 32) | Value;
177
178 const uint64_t BlockBytes = BlockBits / 8;
179 BlockCount = Size / BlockBytes;
180 BytesLeft = Size % BlockBytes;
181 Val = DAG.getConstant(Val: Value, DL: dl, VT: BlockType);
182 }
183
184 SDValue RepStos =
185 emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
186 Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
187 /// RepStos can process the whole length.
188 if (BytesLeft == 0)
189 return RepStos;
190
191 // Handle the last 1 - 7 bytes.
192 SmallVector<SDValue, 4> Results;
193 Results.push_back(Elt: RepStos);
194 unsigned Offset = Size - BytesLeft;
195 EVT AddrVT = Dst.getValueType();
196
197 Results.push_back(
198 Elt: DAG.getMemset(Chain, dl,
199 Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AddrVT, N1: Dst,
200 N2: DAG.getConstant(Val: Offset, DL: dl, VT: AddrVT)),
201 Src: OriginalVal, Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT),
202 Alignment, isVol: isVolatile, AlwaysInline,
203 /* CI */ nullptr, DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset)));
204
205 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
206}
207
208SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
209 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
210 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
211 MachinePointerInfo DstPtrInfo) const {
212 // If to a segment-relative address space, use the default lowering.
213 if (DstPtrInfo.getAddrSpace() >= 256)
214 return SDValue();
215
216 // If the base register might conflict with our physical registers, bail out.
217 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
218 X86::ECX, X86::EAX, X86::EDI};
219 if (isBaseRegConflictPossible(DAG, ClobberSet))
220 return SDValue();
221
222 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size);
223 if (!ConstantSize)
224 return SDValue();
225
226 const X86Subtarget &Subtarget =
227 DAG.getMachineFunction().getSubtarget<X86Subtarget>();
228 return emitConstantSizeRepstos(
229 DAG, Subtarget, dl, Chain, Dst, Val, Size: ConstantSize->getZExtValue(),
230 SizeVT: Size.getValueType(), Alignment, isVolatile, AlwaysInline, DstPtrInfo);
231}
232
233/// Emit a single REP MOVS{B,W,D,Q} instruction.
234static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
235 const SDLoc &dl, SDValue Chain, SDValue Dst,
236 SDValue Src, SDValue Size, MVT AVT) {
237 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
238 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
239 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
240 const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
241
242 SDValue InGlue;
243 Chain = DAG.getCopyToReg(Chain, dl, Reg: CX, N: Size, Glue: InGlue);
244 InGlue = Chain.getValue(R: 1);
245 Chain = DAG.getCopyToReg(Chain, dl, Reg: DI, N: Dst, Glue: InGlue);
246 InGlue = Chain.getValue(R: 1);
247 Chain = DAG.getCopyToReg(Chain, dl, Reg: SI, N: Src, Glue: InGlue);
248 InGlue = Chain.getValue(R: 1);
249
250 SDVTList Tys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
251 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
252 return DAG.getNode(Opcode: X86ISD::REP_MOVS, DL: dl, VTList: Tys, Ops);
253}
254
255/// Emit a single REP MOVSB instruction for a particular constant size.
256static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
257 const SDLoc &dl, SDValue Chain, SDValue Dst,
258 SDValue Src, uint64_t Size) {
259 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
260 Size: DAG.getIntPtrConstant(Val: Size, DL: dl), AVT: MVT::i8);
261}
262
263/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
264/// a constant size memory copy. In some cases where we know REP MOVS is
265/// inefficient we return an empty SDValue so the calling code can either
266/// generate a load/store sequence or call the runtime memcpy function.
267static SDValue emitConstantSizeRepmov(
268 SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
269 SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
270 Align Alignment, bool isVolatile, bool AlwaysInline,
271 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
272 /// In case we optimize for size, we use repmovsb even if it's less efficient
273 /// so we can save the loads/stores of the leftover.
274 if (DAG.getMachineFunction().getFunction().hasMinSize())
275 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
276
277 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
278 /// efficient.
279 if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
280 return SDValue();
281
282 /// If we have enhanced repmovs we use it.
283 if (Subtarget.hasERMSB())
284 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
285
286 assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
287 /// We assume runtime memcpy will do a better job for unaligned copies when
288 /// ERMS is not present.
289 if (!AlwaysInline && (Alignment < Align(4)))
290 return SDValue();
291
292 const MVT BlockType = getOptimalRepType(Subtarget, Alignment);
293 const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
294 const uint64_t BlockCount = Size / BlockBytes;
295 const uint64_t BytesLeft = Size % BlockBytes;
296 SDValue RepMovs =
297 emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
298 Size: DAG.getIntPtrConstant(Val: BlockCount, DL: dl), AVT: BlockType);
299
300 /// RepMov can process the whole length.
301 if (BytesLeft == 0)
302 return RepMovs;
303
304 assert(BytesLeft && "We have leftover at this point");
305
306 // Handle the last 1 - 7 bytes.
307 SmallVector<SDValue, 4> Results;
308 Results.push_back(Elt: RepMovs);
309 unsigned Offset = Size - BytesLeft;
310 EVT DstVT = Dst.getValueType();
311 EVT SrcVT = Src.getValueType();
312 Results.push_back(Elt: DAG.getMemcpy(
313 Chain, dl,
314 Dst: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: DstVT, N1: Dst, N2: DAG.getConstant(Val: Offset, DL: dl, VT: DstVT)),
315 Src: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: SrcVT, N1: Src, N2: DAG.getConstant(Val: Offset, DL: dl, VT: SrcVT)),
316 Size: DAG.getConstant(Val: BytesLeft, DL: dl, VT: SizeVT), Alignment, isVol: isVolatile,
317 /*AlwaysInline*/ true, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
318 DstPtrInfo: DstPtrInfo.getWithOffset(O: Offset), SrcPtrInfo: SrcPtrInfo.getWithOffset(O: Offset)));
319 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Results);
320}
321
322SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
323 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
324 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
325 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
326 // If to a segment-relative address space, use the default lowering.
327 if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
328 return SDValue();
329
330 // If the base registers conflict with our physical registers, use the default
331 // lowering.
332 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
333 X86::ECX, X86::ESI, X86::EDI};
334 if (isBaseRegConflictPossible(DAG, ClobberSet))
335 return SDValue();
336
337 const X86Subtarget &Subtarget =
338 DAG.getMachineFunction().getSubtarget<X86Subtarget>();
339
340 // If enabled and available, use fast short rep mov.
341 if (UseFSRMForMemcpy && Subtarget.hasFSRM())
342 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, AVT: MVT::i8);
343
344 /// Handle constant sizes
345 if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Val&: Size))
346 return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
347 Size: ConstantSize->getZExtValue(),
348 SizeVT: Size.getValueType(), Alignment, isVolatile,
349 AlwaysInline, DstPtrInfo, SrcPtrInfo);
350
351 return SDValue();
352}
353