X86Subtarget.cpp source code [llvm_projects/llvm/lib/Target/X86/X86Subtarget.cpp]

1	//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the X86 specific subclass of TargetSubtargetInfo.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "X86Subtarget.h"
14	#include "GISel/X86CallLowering.h"
15	#include "GISel/X86LegalizerInfo.h"
16	#include "GISel/X86RegisterBankInfo.h"
17	#include "MCTargetDesc/X86BaseInfo.h"
18	#include "X86.h"
19	#include "X86MacroFusion.h"
20	#include "X86TargetMachine.h"
21	#include "llvm/CodeGen/GlobalISel/CallLowering.h"
22	#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
23	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24	#include "llvm/CodeGen/ScheduleDAGMutation.h"
25	#include "llvm/IR/Attributes.h"
26	#include "llvm/IR/ConstantRange.h"
27	#include "llvm/IR/Function.h"
28	#include "llvm/IR/GlobalValue.h"
29	#include "llvm/IR/Module.h"
30	#include "llvm/Support/Casting.h"
31	#include "llvm/Support/CodeGen.h"
32	#include "llvm/Support/CommandLine.h"
33	#include "llvm/Support/Debug.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/raw_ostream.h"
36	#include "llvm/Target/TargetMachine.h"
37	#include "llvm/TargetParser/Triple.h"
38
39	#if defined(_MSC_VER)
40	#include <intrin.h>
41	#endif
42
43	using namespace llvm;
44
45	#define DEBUG_TYPE "subtarget"
46
47	#define GET_SUBTARGETINFO_TARGET_DESC
48	#define GET_SUBTARGETINFO_CTOR
49	#include "X86GenSubtargetInfo.inc"
50
51	// Temporary option to control early if-conversion for x86 while adding machine
52	// models.
53	static cl::opt<bool>
54	X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
55	cl::desc ("Enable early if-conversion on X86"));
56
57
58	/// Classify a blockaddress reference for the current subtarget according to how
59	/// we should reference it in a non-pcrel context.
60	unsigned char X86Subtarget::classifyBlockAddressReference() const {
61	return classifyLocalReference(GV: nullptr);
62	}
63
64	/// Classify a global variable reference for the current subtarget according to
65	/// how we should reference it in a non-pcrel context.
66	unsigned char
67	X86Subtarget::classifyGlobalReference(const GlobalValue GV) const* {
68	return classifyGlobalReference(GV, M: *GV->getParent());
69	}
70
71	unsigned char
72	X86Subtarget::classifyLocalReference(const GlobalValue GV) const* {
73	CodeModel::Model CM = TM.getCodeModel();
74	// Tagged globals have non-zero upper bits, which makes direct references
75	// require a 64-bit immediate. With the small/medium code models this causes
76	// relocation errors, so we go through the GOT instead.
77	if (AllowTaggedGlobals && CM != CodeModel::Large && GV && !isa<Function>(Val: GV))
78	return X86II::MO_GOTPCREL_NORELAX;
79
80	// If we're not PIC, it's not very interesting.
81	if (!isPositionIndependent())
82	return X86II::MO_NO_FLAG;
83
84	if (is64Bit()) {
85	// 64-bit ELF PIC local references may use GOTOFF relocations.
86	if (isTargetELF()) {
87	assert(CM != CodeModel::Tiny &&
88	"Tiny codesize model not supported on X86");
89	// In the large code model, all text is far from any global data, so we
90	// use GOTOFF.
91	if (CM == CodeModel::Large)
92	return X86II::MO_GOTOFF;
93	// Large GlobalValues use GOTOFF, otherwise use RIP-rel access.
94	if (GV)
95	return TM.isLargeGlobalValue(GV) ? X86II::MO_GOTOFF : X86II::MO_NO_FLAG;
96	// GV == nullptr is for all other non-GlobalValue global data like the
97	// constant pool, jump tables, labels, etc. The small and medium code
98	// models treat these as accessible with a RIP-rel access.
99	return X86II::MO_NO_FLAG;
100	}
101
102	// Otherwise, this is either a RIP-relative reference or a 64-bit movabsq,
103	// both of which use MO_NO_FLAG.
104	return X86II::MO_NO_FLAG;
105	}
106
107	// The COFF dynamic linker just patches the executable sections.
108	if (isTargetCOFF())
109	return X86II::MO_NO_FLAG;
110
111	if (isTargetDarwin()) {
112	// 32 bit macho has no relocation for a-b if a is undefined, even if
113	// b is in the section that is being relocated.
114	// This means we have to use o load even for GVs that are known to be
115	// local to the dso.
116	if (GV && (GV->isDeclarationForLinker() \|\| GV->hasCommonLinkage()))
117	return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
118
119	return X86II::MO_PIC_BASE_OFFSET;
120	}
121
122	return X86II::MO_GOTOFF;
123	}
124
125	unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
126	const Module &M) const {
127	// The static large model never uses stubs.
128	if (TM.getCodeModel() == CodeModel::Large && !isPositionIndependent())
129	return X86II::MO_NO_FLAG;
130
131	// Absolute symbols can be referenced directly.
132	if (GV) {
133	if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
134	// See if we can use the 8-bit immediate form. Note that some instructions
135	// will sign extend the immediate operand, so to be conservative we only
136	// accept the range [0,128).
137	if (CR ->getUnsignedMax().ult(RHS: `128`))
138	return X86II::MO_ABS8;
139	else
140	return X86II::MO_NO_FLAG;
141	}
142	}
143
144	if (TM.shouldAssumeDSOLocal(GV))
145	return classifyLocalReference(GV);
146
147	if (isTargetCOFF()) {
148	// ExternalSymbolSDNode like _tls_index.
149	if (!GV)
150	return X86II::MO_NO_FLAG;
151	if (GV->hasDLLImportStorageClass())
152	return X86II::MO_DLLIMPORT;
153	return X86II::MO_COFFSTUB;
154	}
155	// Some JIT users use -win32-elf triples; these shouldn't use GOT tables.*
156	if (isOSWindows())
157	return X86II::MO_NO_FLAG;
158
159	if (is64Bit()) {
160	// ELF supports a large, truly PIC code model with non-PC relative GOT
161	// references. Other object file formats do not. Use the no-flag, 64-bit
162	// reference for them.
163	if (TM.getCodeModel() == CodeModel::Large)
164	return isTargetELF() ? X86II::MO_GOT : X86II::MO_NO_FLAG;
165	// Tagged globals have non-zero upper bits, which makes direct references
166	// require a 64-bit immediate. So we can't let the linker relax the
167	// relocation to a 32-bit RIP-relative direct reference.
168	if (AllowTaggedGlobals && GV && !isa<Function>(Val: GV))
169	return X86II::MO_GOTPCREL_NORELAX;
170	return X86II::MO_GOTPCREL;
171	}
172
173	if (isTargetDarwin()) {
174	if (!isPositionIndependent())
175	return X86II::MO_DARWIN_NONLAZY;
176	return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
177	}
178
179	// 32-bit ELF references GlobalAddress directly in static relocation model.
180	// We cannot use MO_GOT because EBX may not be set up.
181	if (TM.getRelocationModel() == Reloc::Static)
182	return X86II::MO_NO_FLAG;
183	return X86II::MO_GOT;
184	}
185
186	unsigned char
187	X86Subtarget::classifyGlobalFunctionReference(const GlobalValue GV) const* {
188	return classifyGlobalFunctionReference(GV, M: *GV->getParent());
189	}
190
191	unsigned char
192	X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
193	const Module &M) const {
194	if (TM.shouldAssumeDSOLocal(GV))
195	return X86II::MO_NO_FLAG;
196
197	// Functions on COFF can be non-DSO local for three reasons:
198	// - They are intrinsic functions (!GV)
199	// - They are marked dllimport
200	// - They are extern_weak, and a stub is needed
201	if (isTargetCOFF()) {
202	if (!GV)
203	return X86II::MO_NO_FLAG;
204	if (GV->hasDLLImportStorageClass())
205	return X86II::MO_DLLIMPORT;
206	return X86II::MO_COFFSTUB;
207	}
208
209	const Function *F = dyn_cast_or_null<Function>(Val: GV);
210
211	if (isTargetELF()) {
212	if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
213	// According to psABI, PLT stub clobbers XMM8-XMM15.
214	// In Regcall calling convention those registers are used for passing
215	// parameters. Thus we need to prevent lazy binding in Regcall.
216	return X86II::MO_GOTPCREL;
217	// If PLT must be avoided then the call should be via GOTPCREL.
218	if (((F && F->hasFnAttribute(Kind: Attribute::NonLazyBind)) \|\|
219	(!F && M.getRtLibUseGOT())) &&
220	is64Bit())
221	return X86II::MO_GOTPCREL;
222	// Reference ExternalSymbol directly in static relocation model.
223	if (!is64Bit() && !GV && TM.getRelocationModel() == Reloc::Static)
224	return X86II::MO_NO_FLAG;
225	return X86II::MO_PLT;
226	}
227
228	if (is64Bit()) {
229	if (F && F->hasFnAttribute(Kind: Attribute::NonLazyBind))
230	// If the function is marked as non-lazy, generate an indirect call
231	// which loads from the GOT directly. This avoids runtime overhead
232	// at the cost of eager binding (and one extra byte of encoding).
233	return X86II::MO_GOTPCREL;
234	return X86II::MO_NO_FLAG;
235	}
236
237	return X86II::MO_NO_FLAG;
238	}
239
240	/// Return true if the subtarget allows calls to immediate address.
241	bool X86Subtarget::isLegalToCallImmediateAddr() const {
242	// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
243	// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
244	// the following check for Win32 should be removed.
245	if (Is64Bit \|\| isTargetWin32())
246	return false;
247	return isTargetELF() \|\| TM.getRelocationModel() == Reloc::Static;
248	}
249
250	void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
251	StringRef FS) {
252	if (CPU.empty())
253	CPU = "generic";
254
255	if (TuneCPU.empty())
256	TuneCPU = "i586"; // FIXME: "generic" is more modern than llc tests expect.
257
258	std::string FullFS = X86_MC::ParseX86Triple(TT: TargetTriple);
259	assert(!FullFS.empty() && "Failed to parse X86 triple");
260
261	if (!FS.empty())
262	FullFS = (Twine (FullFS) + "," + FS).str();
263
264	// Attach EVEX512 feature when we have AVX512 features with a default CPU.
265	// "pentium4" is default CPU for 32-bit targets.
266	// "x86-64" is default CPU for 64-bit targets.
267	if (CPU == "generic" \|\| CPU == "pentium4" \|\| CPU == "x86-64") {
268	size_t posNoEVEX512 = FS.rfind(Str: "-evex512");
269	// Make sure we won't be cheated by "-avx512fp16".
270	size_t posNoAVX512F =
271	FS.ends_with(Suffix: "-avx512f") ? FS.size() - `8` : FS.rfind(Str: "-avx512f,");
272	size_t posEVEX512 = FS.rfind(Str: "+evex512");
273	// Any AVX512XXX will enable AVX512F.
274	size_t posAVX512F = FS.rfind(Str: "+avx512");
275
276	if (posAVX512F != StringRef::npos &&
277	(posNoAVX512F == StringRef::npos \|\| posNoAVX512F < posAVX512F))
278	if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
279	FullFS += ",+evex512";
280	}
281
282	// Parse features string and set the CPU.
283	ParseSubtargetFeatures(CPU, TuneCPU, FS: FullFS);
284
285	// All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
286	// 16-bytes and under that are reasonably fast. These features were
287	// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
288	// micro-architectures respectively.
289	if (hasSSE42() \|\| hasSSE4A())
290	IsUnalignedMem16Slow = false;
291
292	LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
293	<< ", MMX " << HasMMX << ", 64bit " << HasX86_64 << "\n");
294	if (Is64Bit && !HasX86_64)
295	report_fatal_error(reason: "64-bit code requested on a subtarget that doesn't "
296	"support it!");
297
298	// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD, NaCl, and for all
299	// 64-bit targets. On Solaris (32-bit), stack alignment is 4 bytes
300	// following the i386 psABI, while on Illumos it is always 16 bytes.
301	if (StackAlignOverride)
302	stackAlignment = *StackAlignOverride;
303	else if (isTargetDarwin() \|\| isTargetLinux() \|\| isTargetKFreeBSD() \|\|
304	isTargetNaCl() \|\| Is64Bit)
305	stackAlignment = Align (`16`);
306
307	// Consume the vector width attribute or apply any target specific limit.
308	if (PreferVectorWidthOverride)
309	PreferVectorWidth = PreferVectorWidthOverride;
310	else if (Prefer128Bit)
311	PreferVectorWidth = `128`;
312	else if (Prefer256Bit)
313	PreferVectorWidth = `256`;
314	}
315
316	X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
317	StringRef TuneCPU,
318	StringRef FS) {
319	initSubtargetFeatures(CPU, TuneCPU, FS);
320	return *this;
321	}
322
323	X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
324	StringRef FS, const X86TargetMachine &TM,
325	MaybeAlign StackAlignOverride,
326	unsigned PreferVectorWidthOverride,
327	unsigned RequiredVectorWidth)
328	: X86GenSubtargetInfo (TT, CPU, TuneCPU, FS),
329	PICStyle(PICStyles::Style::None), TM(TM), TargetTriple (TT),
330	StackAlignOverride (StackAlignOverride),
331	PreferVectorWidthOverride(PreferVectorWidthOverride),
332	RequiredVectorWidth(RequiredVectorWidth),
333	InstrInfo (initializeSubtargetDependencies(CPU, TuneCPU, FS)),
334	TLInfo (TM, *this), FrameLowering (*this, getStackAlignment()) {
335	// Determine the PICStyle based on the target selected.
336	if (!isPositionIndependent() \|\| TM.getCodeModel() == CodeModel::Large)
337	// With the large code model, None forces all memory accesses to be indirect
338	// rather than RIP-relative.
339	setPICStyle(PICStyles::Style::None);
340	else if (is64Bit())
341	setPICStyle(PICStyles::Style::RIPRel);
342	else if (isTargetCOFF())
343	setPICStyle(PICStyles::Style::None);
344	else if (isTargetDarwin())
345	setPICStyle(PICStyles::Style::StubPIC);
346	else if (isTargetELF())
347	setPICStyle(PICStyles::Style::GOT);
348
349	CallLoweringInfo.reset(p: new X86CallLowering (*getTargetLowering()));
350	Legalizer.reset(p: new X86LegalizerInfo (*this, TM));
351
352	auto RBI = new* X86RegisterBankInfo (*getRegisterInfo());
353	RegBankInfo.reset(p: RBI);
354	InstSelector.reset(p: createX86InstructionSelector(TM, *this, *RBI));
355	}
356
357	const CallLowering X86Subtarget::getCallLowering() const* {
358	return CallLoweringInfo.get();
359	}
360
361	InstructionSelector X86Subtarget::getInstructionSelector() const* {
362	return InstSelector.get();
363	}
364
365	const LegalizerInfo X86Subtarget::getLegalizerInfo() const* {
366	return Legalizer.get();
367	}
368
369	const RegisterBankInfo X86Subtarget::getRegBankInfo() const* {
370	return RegBankInfo.get();
371	}
372
373	bool X86Subtarget::enableEarlyIfConversion() const {
374	return canUseCMOV() && X86EarlyIfConv;
375	}
376
377	void X86Subtarget::getPostRAMutations(
378	std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
379	Mutations.push_back(x: createX86MacroFusionDAGMutation());
380	}
381
382	bool X86Subtarget::isPositionIndependent() const {
383	return TM.isPositionIndependent();
384	}
385

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86Subtarget.cpp