X86Subtarget.cpp source code [llvm_projects/llvm/lib/Target/X86/X86Subtarget.cpp]

1	//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the X86 specific subclass of TargetSubtargetInfo.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "X86Subtarget.h"
14	#include "GISel/X86CallLowering.h"
15	#include "GISel/X86LegalizerInfo.h"
16	#include "GISel/X86RegisterBankInfo.h"
17	#include "MCTargetDesc/X86BaseInfo.h"
18	#include "X86.h"
19	#include "X86MacroFusion.h"
20	#include "X86TargetMachine.h"
21	#include "llvm/CodeGen/GlobalISel/CallLowering.h"
22	#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
23	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24	#include "llvm/CodeGen/ScheduleDAGMutation.h"
25	#include "llvm/IR/Attributes.h"
26	#include "llvm/IR/ConstantRange.h"
27	#include "llvm/IR/Function.h"
28	#include "llvm/IR/GlobalValue.h"
29	#include "llvm/IR/Module.h"
30	#include "llvm/Support/Casting.h"
31	#include "llvm/Support/CodeGen.h"
32	#include "llvm/Support/CommandLine.h"
33	#include "llvm/Support/Debug.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/raw_ostream.h"
36	#include "llvm/Target/TargetMachine.h"
37	#include "llvm/TargetParser/Triple.h"
38
39	#if defined(_MSC_VER)
40	#include <intrin.h>
41	#endif
42
43	using namespace llvm;
44
45	#define DEBUG_TYPE "subtarget"
46
47	#define GET_SUBTARGETINFO_TARGET_DESC
48	#define GET_SUBTARGETINFO_CTOR
49	#include "X86GenSubtargetInfo.inc"
50
51	// Temporary option to control early if-conversion for x86 while adding machine
52	// models.
53	static cl::opt<bool>
54	X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
55	cl::desc ("Enable early if-conversion on X86"));
56
57
58	/// Classify a blockaddress reference for the current subtarget according to how
59	/// we should reference it in a non-pcrel context.
60	unsigned char X86Subtarget::classifyBlockAddressReference() const {
61	return classifyLocalReference(GV: nullptr);
62	}
63
64	/// Classify a global variable reference for the current subtarget according to
65	/// how we should reference it in a non-pcrel context.
66	unsigned char
67	X86Subtarget::classifyGlobalReference(const GlobalValue GV) const* {
68	return classifyGlobalReference(GV, M: *GV->getParent());
69	}
70
71	unsigned char
72	X86Subtarget::classifyLocalReference(const GlobalValue GV) const* {
73	CodeModel::Model CM = TM.getCodeModel();
74	// Tagged globals have non-zero upper bits, which makes direct references
75	// require a 64-bit immediate. With the small/medium code models this causes
76	// relocation errors, so we go through the GOT instead.
77	if (AllowTaggedGlobals && CM != CodeModel::Large && GV && !isa<Function>(Val: GV))
78	return X86II::MO_GOTPCREL_NORELAX;
79
80	// If we're not PIC, it's not very interesting.
81	if (!isPositionIndependent())
82	return X86II::MO_NO_FLAG;
83
84	if (is64Bit()) {
85	// 64-bit ELF PIC local references may use GOTOFF relocations.
86	if (isTargetELF()) {
87	assert(CM != CodeModel::Tiny &&
88	"Tiny codesize model not supported on X86");
89	// In the large code model, all text is far from any global data, so we
90	// use GOTOFF.
91	if (CM == CodeModel::Large)
92	return X86II::MO_GOTOFF;
93	// Large GlobalValues use GOTOFF, otherwise use RIP-rel access.
94	if (GV)
95	return TM.isLargeGlobalValue(GV) ? X86II::MO_GOTOFF : X86II::MO_NO_FLAG;
96	// GV == nullptr is for all other non-GlobalValue global data like the
97	// constant pool, jump tables, labels, etc. The small and medium code
98	// models treat these as accessible with a RIP-rel access.
99	return X86II::MO_NO_FLAG;
100	}
101
102	// Otherwise, this is either a RIP-relative reference or a 64-bit movabsq,
103	// both of which use MO_NO_FLAG.
104	return X86II::MO_NO_FLAG;
105	}
106
107	// The COFF dynamic linker just patches the executable sections.
108	if (isTargetCOFF())
109	return X86II::MO_NO_FLAG;
110
111	if (isTargetDarwin()) {
112	// 32 bit macho has no relocation for a-b if a is undefined, even if
113	// b is in the section that is being relocated.
114	// This means we have to use o load even for GVs that are known to be
115	// local to the dso.
116	if (GV && (GV->isDeclarationForLinker() \|\| GV->hasCommonLinkage()))
117	return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
118
119	return X86II::MO_PIC_BASE_OFFSET;
120	}
121
122	return X86II::MO_GOTOFF;
123	}
124
125	unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
126	const Module &M) const {
127	// The static large model never uses stubs.
128	if (TM.getCodeModel() == CodeModel::Large && !isPositionIndependent())
129	return X86II::MO_NO_FLAG;
130
131	// Absolute symbols can be referenced directly.
132	if (GV) {
133	if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
134	// See if we can use the 8-bit immediate form. Note that some instructions
135	// will sign extend the immediate operand, so to be conservative we only
136	// accept the range [0,128).
137	if (CR ->getUnsignedMax().ult(RHS: `128`))
138	return X86II::MO_ABS8;
139	else
140	return X86II::MO_NO_FLAG;
141	}
142	}
143
144	if (TM.shouldAssumeDSOLocal(GV))
145	return classifyLocalReference(GV);
146
147	if (isTargetCOFF()) {
148	// ExternalSymbolSDNode like _tls_index.
149	if (!GV)
150	return X86II::MO_NO_FLAG;
151	if (GV->hasDLLImportStorageClass())
152	return X86II::MO_DLLIMPORT;
153	return X86II::MO_COFFSTUB;
154	}
155	// Some JIT users use -win32-elf triples; these shouldn't use GOT tables.*
156	if (isOSWindows())
157	return X86II::MO_NO_FLAG;
158
159	if (is64Bit()) {
160	// ELF supports a large, truly PIC code model with non-PC relative GOT
161	// references. Other object file formats do not. Use the no-flag, 64-bit
162	// reference for them.
163	if (TM.getCodeModel() == CodeModel::Large)
164	return isTargetELF() ? X86II::MO_GOT : X86II::MO_NO_FLAG;
165	// Tagged globals have non-zero upper bits, which makes direct references
166	// require a 64-bit immediate. So we can't let the linker relax the
167	// relocation to a 32-bit RIP-relative direct reference.
168	if (AllowTaggedGlobals && GV && !isa<Function>(Val: GV))
169	return X86II::MO_GOTPCREL_NORELAX;
170	return X86II::MO_GOTPCREL;
171	}
172
173	if (isTargetDarwin()) {
174	if (!isPositionIndependent())
175	return X86II::MO_DARWIN_NONLAZY;
176	return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
177	}
178
179	// 32-bit ELF references GlobalAddress directly in static relocation model.
180	// We cannot use MO_GOT because EBX may not be set up.
181	if (TM.getRelocationModel() == Reloc::Static)
182	return X86II::MO_NO_FLAG;
183	return X86II::MO_GOT;
184	}
185
186	unsigned char
187	X86Subtarget::classifyGlobalFunctionReference(const GlobalValue GV) const* {
188	return classifyGlobalFunctionReference(GV, M: *GV->getParent());
189	}
190
191	unsigned char
192	X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
193	const Module &M) const {
194	if (TM.shouldAssumeDSOLocal(GV))
195	return X86II::MO_NO_FLAG;
196
197	// Functions on COFF can be non-DSO local for three reasons:
198	// - They are intrinsic functions (!GV)
199	// - They are marked dllimport
200	// - They are extern_weak, and a stub is needed
201	if (isTargetCOFF()) {
202	if (!GV)
203	return X86II::MO_NO_FLAG;
204	if (GV->hasDLLImportStorageClass())
205	return X86II::MO_DLLIMPORT;
206	return X86II::MO_COFFSTUB;
207	}
208
209	const Function *F = dyn_cast_or_null<Function>(Val: GV);
210
211	if (isTargetELF()) {
212	if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
213	// According to psABI, PLT stub clobbers XMM8-XMM15.
214	// In Regcall calling convention those registers are used for passing
215	// parameters. Thus we need to prevent lazy binding in Regcall.
216	return X86II::MO_GOTPCREL;
217	// If PLT must be avoided then the call should be via GOTPCREL.
218	if (((F && F->hasFnAttribute(Kind: Attribute::NonLazyBind)) \|\|
219	(!F && M.getRtLibUseGOT())) &&
220	is64Bit())
221	return X86II::MO_GOTPCREL;
222	// Reference ExternalSymbol directly in static relocation model.
223	if (!is64Bit() && !GV && TM.getRelocationModel() == Reloc::Static)
224	return X86II::MO_NO_FLAG;
225	return X86II::MO_PLT;
226	}
227
228	if (is64Bit()) {
229	if (F && F->hasFnAttribute(Kind: Attribute::NonLazyBind))
230	// If the function is marked as non-lazy, generate an indirect call
231	// which loads from the GOT directly. This avoids runtime overhead
232	// at the cost of eager binding (and one extra byte of encoding).
233	return X86II::MO_GOTPCREL;
234	return X86II::MO_NO_FLAG;
235	}
236
237	return X86II::MO_NO_FLAG;
238	}
239
240	/// Return true if the subtarget allows calls to immediate address.
241	bool X86Subtarget::isLegalToCallImmediateAddr() const {
242	// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
243	// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
244	// the following check for Win32 should be removed.
245	if (Is64Bit \|\| isTargetWin32())
246	return false;
247	return isTargetELF() \|\| TM.getRelocationModel() == Reloc::Static;
248	}
249
250	void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
251	StringRef FS) {
252	if (CPU.empty())
253	CPU = "generic";
254
255	if (TuneCPU.empty())
256	TuneCPU = "i586"; // FIXME: "generic" is more modern than llc tests expect.
257
258	std::string FullFS = X86_MC::ParseX86Triple(TT: TargetTriple);
259	assert(!FullFS.empty() && "Failed to parse X86 triple");
260
261	if (!FS.empty())
262	FullFS = (Twine (FullFS) + "," + FS).str();
263
264	// Attach EVEX512 feature when we have AVX512 features with a default CPU.
265	// "pentium4" is default CPU for 32-bit targets.
266	// "x86-64" is default CPU for 64-bit targets.
267	if (CPU == "generic" \|\| CPU == "pentium4" \|\| CPU == "x86-64") {
268	size_t posNoEVEX512 = FS.rfind(Str: "-evex512");
269	// Make sure we won't be cheated by "-avx512fp16".
270	size_t posNoAVX512F =
271	FS.ends_with(Suffix: "-avx512f") ? FS.size() - `8` : FS.rfind(Str: "-avx512f,");
272	size_t posEVEX512 = FS.rfind(Str: "+evex512");
273	// Any AVX512XXX will enable AVX512F.
274	size_t posAVX512F = FS.rfind(Str: "+avx512");
275
276	if (posAVX512F != StringRef::npos &&
277	(posNoAVX512F == StringRef::npos \|\| posNoAVX512F < posAVX512F))
278	if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
279	FullFS += ",+evex512";
280	}
281
282	// Disable 64-bit only features in non-64-bit mode.
283	SmallVector<StringRef, `9`> FeaturesIn64BitOnly = {
284	"egpr", "push2pop2", "ppx", "ndd", "ccmp", "nf", "cf", "zu", "uintr"};
285	if (FullFS.find(s: "-64bit-mode") != std::string::npos)
286	for (StringRef F : FeaturesIn64BitOnly)
287	FullFS += ",-" + F.str();
288
289	// Parse features string and set the CPU.
290	ParseSubtargetFeatures(CPU, TuneCPU, FS: FullFS);
291
292	// All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
293	// 16-bytes and under that are reasonably fast. These features were
294	// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
295	// micro-architectures respectively.
296	if (hasSSE42() \|\| hasSSE4A())
297	IsUnalignedMem16Slow = false;
298
299	LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
300	<< ", MMX " << HasMMX << ", 64bit " << HasX86_64 << "\n");
301	if (Is64Bit && !HasX86_64)
302	reportFatalUsageError(reason: "64-bit code requested on a subtarget that doesn't "
303	"support it!");
304
305	// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD, NaCl, and for all
306	// 64-bit targets. On Solaris (32-bit), stack alignment is 4 bytes
307	// following the i386 psABI, while on Illumos it is always 16 bytes.
308	if (StackAlignOverride)
309	stackAlignment = *StackAlignOverride;
310	else if (isTargetDarwin() \|\| isTargetLinux() \|\| isTargetKFreeBSD() \|\|
311	isTargetNaCl() \|\| Is64Bit)
312	stackAlignment = Align (`16`);
313
314	// Consume the vector width attribute or apply any target specific limit.
315	if (PreferVectorWidthOverride)
316	PreferVectorWidth = PreferVectorWidthOverride;
317	else if (Prefer128Bit)
318	PreferVectorWidth = `128`;
319	else if (Prefer256Bit)
320	PreferVectorWidth = `256`;
321	}
322
323	X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
324	StringRef TuneCPU,
325	StringRef FS) {
326	initSubtargetFeatures(CPU, TuneCPU, FS);
327	return *this;
328	}
329
330	X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
331	StringRef FS, const X86TargetMachine &TM,
332	MaybeAlign StackAlignOverride,
333	unsigned PreferVectorWidthOverride,
334	unsigned RequiredVectorWidth)
335	: X86GenSubtargetInfo (TT, CPU, TuneCPU, FS),
336	PICStyle(PICStyles::Style::None), TM(TM), TargetTriple (TT),
337	StackAlignOverride (StackAlignOverride),
338	PreferVectorWidthOverride(PreferVectorWidthOverride),
339	RequiredVectorWidth(RequiredVectorWidth),
340	InstrInfo (initializeSubtargetDependencies(CPU, TuneCPU, FS)),
341	TLInfo (TM, *this), FrameLowering (*this, getStackAlignment()) {
342	// Determine the PICStyle based on the target selected.
343	if (!isPositionIndependent() \|\| TM.getCodeModel() == CodeModel::Large)
344	// With the large code model, None forces all memory accesses to be indirect
345	// rather than RIP-relative.
346	setPICStyle(PICStyles::Style::None);
347	else if (is64Bit())
348	setPICStyle(PICStyles::Style::RIPRel);
349	else if (isTargetCOFF())
350	setPICStyle(PICStyles::Style::None);
351	else if (isTargetDarwin())
352	setPICStyle(PICStyles::Style::StubPIC);
353	else if (isTargetELF())
354	setPICStyle(PICStyles::Style::GOT);
355
356	CallLoweringInfo.reset(p: new X86CallLowering (*getTargetLowering()));
357	Legalizer.reset(p: new X86LegalizerInfo (*this, TM));
358
359	auto RBI = new* X86RegisterBankInfo (*getRegisterInfo());
360	RegBankInfo.reset(p: RBI);
361	InstSelector.reset(p: createX86InstructionSelector(TM, *this, *RBI));
362	}
363
364	// Define the virtual destructor out-of-line for build efficiency.
365	X86Subtarget::~X86Subtarget() = default;
366
367	const CallLowering X86Subtarget::getCallLowering() const* {
368	return CallLoweringInfo.get();
369	}
370
371	InstructionSelector X86Subtarget::getInstructionSelector() const* {
372	return InstSelector.get();
373	}
374
375	const LegalizerInfo X86Subtarget::getLegalizerInfo() const* {
376	return Legalizer.get();
377	}
378
379	const RegisterBankInfo X86Subtarget::getRegBankInfo() const* {
380	return RegBankInfo.get();
381	}
382
383	bool X86Subtarget::enableEarlyIfConversion() const {
384	return canUseCMOV() && X86EarlyIfConv;
385	}
386
387	void X86Subtarget::getPostRAMutations(
388	std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
389	Mutations.push_back(x: createX86MacroFusionDAGMutation());
390	}
391
392	bool X86Subtarget::isPositionIndependent() const {
393	return TM.isPositionIndependent();
394	}
395

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86Subtarget.cpp