1 | //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares the X86 specific subclass of TargetSubtargetInfo. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
14 | #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
15 | |
16 | #include "X86FrameLowering.h" |
17 | #include "X86ISelLowering.h" |
18 | #include "X86InstrInfo.h" |
19 | #include "X86SelectionDAGInfo.h" |
20 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
21 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
22 | #include "llvm/IR/CallingConv.h" |
23 | #include "llvm/TargetParser/Triple.h" |
24 | #include <climits> |
25 | #include <memory> |
26 | |
27 | #define |
28 | #include "X86GenSubtargetInfo.inc" |
29 | |
30 | namespace llvm { |
31 | |
32 | class CallLowering; |
33 | class GlobalValue; |
34 | class InstructionSelector; |
35 | class LegalizerInfo; |
36 | class RegisterBankInfo; |
37 | class StringRef; |
38 | class TargetMachine; |
39 | |
40 | /// The X86 backend supports a number of different styles of PIC. |
41 | /// |
42 | namespace PICStyles { |
43 | |
44 | enum class Style { |
45 | StubPIC, // Used on i386-darwin in pic mode. |
46 | GOT, // Used on 32 bit elf on when in pic mode. |
47 | RIPRel, // Used on X86-64 when in pic mode. |
48 | None // Set when not in pic mode. |
49 | }; |
50 | |
51 | } // end namespace PICStyles |
52 | |
53 | class X86Subtarget final : public X86GenSubtargetInfo { |
54 | enum X86SSEEnum { |
55 | NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512 |
56 | }; |
57 | |
58 | /// Which PIC style to use |
59 | PICStyles::Style PICStyle; |
60 | |
61 | const TargetMachine &TM; |
62 | |
63 | /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. |
64 | X86SSEEnum X86SSELevel = NoSSE; |
65 | |
66 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
67 | bool ATTRIBUTE = DEFAULT; |
68 | #include "X86GenSubtargetInfo.inc" |
69 | /// The minimum alignment known to hold of the stack frame on |
70 | /// entry to the function and which must be maintained by every function. |
71 | Align stackAlignment = Align(4); |
72 | |
73 | Align TileConfigAlignment = Align(4); |
74 | |
75 | /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. |
76 | /// |
77 | // FIXME: this is a known good value for Yonah. How about others? |
78 | unsigned MaxInlineSizeThreshold = 128; |
79 | |
80 | /// What processor and OS we're targeting. |
81 | Triple TargetTriple; |
82 | |
83 | /// GlobalISel related APIs. |
84 | std::unique_ptr<CallLowering> CallLoweringInfo; |
85 | std::unique_ptr<LegalizerInfo> Legalizer; |
86 | std::unique_ptr<RegisterBankInfo> RegBankInfo; |
87 | std::unique_ptr<InstructionSelector> InstSelector; |
88 | |
89 | /// Override the stack alignment. |
90 | MaybeAlign StackAlignOverride; |
91 | |
92 | /// Preferred vector width from function attribute. |
93 | unsigned PreferVectorWidthOverride; |
94 | |
95 | /// Resolved preferred vector width from function attribute and subtarget |
96 | /// features. |
97 | unsigned PreferVectorWidth = UINT32_MAX; |
98 | |
99 | /// Required vector width from function attribute. |
100 | unsigned RequiredVectorWidth; |
101 | |
102 | X86SelectionDAGInfo TSInfo; |
103 | // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which |
104 | // X86TargetLowering needs. |
105 | X86InstrInfo InstrInfo; |
106 | X86TargetLowering TLInfo; |
107 | X86FrameLowering FrameLowering; |
108 | |
109 | public: |
110 | /// This constructor initializes the data members to match that |
111 | /// of the specified triple. |
112 | /// |
113 | X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, |
114 | const X86TargetMachine &TM, MaybeAlign StackAlignOverride, |
115 | unsigned PreferVectorWidthOverride, |
116 | unsigned RequiredVectorWidth); |
117 | |
118 | const X86TargetLowering *getTargetLowering() const override { |
119 | return &TLInfo; |
120 | } |
121 | |
122 | const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } |
123 | |
124 | const X86FrameLowering *getFrameLowering() const override { |
125 | return &FrameLowering; |
126 | } |
127 | |
128 | const X86SelectionDAGInfo *getSelectionDAGInfo() const override { |
129 | return &TSInfo; |
130 | } |
131 | |
132 | const X86RegisterInfo *getRegisterInfo() const override { |
133 | return &getInstrInfo()->getRegisterInfo(); |
134 | } |
135 | |
136 | unsigned getTileConfigSize() const { return 64; } |
137 | Align getTileConfigAlignment() const { return TileConfigAlignment; } |
138 | |
139 | /// Returns the minimum alignment known to hold of the |
140 | /// stack frame on entry to the function and which must be maintained by every |
141 | /// function for this subtarget. |
142 | Align getStackAlignment() const { return stackAlignment; } |
143 | |
144 | /// Returns the maximum memset / memcpy size |
145 | /// that still makes it profitable to inline the call. |
146 | unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } |
147 | |
148 | /// ParseSubtargetFeatures - Parses features string setting specified |
149 | /// subtarget options. Definition of function is auto generated by tblgen. |
150 | void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
151 | |
152 | /// Methods used by Global ISel |
153 | const CallLowering *getCallLowering() const override; |
154 | InstructionSelector *getInstructionSelector() const override; |
155 | const LegalizerInfo *getLegalizerInfo() const override; |
156 | const RegisterBankInfo *getRegBankInfo() const override; |
157 | |
158 | private: |
159 | /// Initialize the full set of dependencies so we can use an initializer |
160 | /// list for X86Subtarget. |
161 | X86Subtarget &initializeSubtargetDependencies(StringRef CPU, |
162 | StringRef TuneCPU, |
163 | StringRef FS); |
164 | void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
165 | |
166 | public: |
167 | |
168 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
169 | bool GETTER() const { return ATTRIBUTE; } |
170 | #include "X86GenSubtargetInfo.inc" |
171 | |
172 | /// Is this x86_64 with the ILP32 programming model (x32 ABI)? |
173 | bool isTarget64BitILP32() const { |
174 | return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl()); |
175 | } |
176 | |
177 | /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? |
178 | bool isTarget64BitLP64() const { |
179 | return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl()); |
180 | } |
181 | |
182 | PICStyles::Style getPICStyle() const { return PICStyle; } |
183 | void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } |
184 | |
185 | bool canUseCMPXCHG8B() const { return hasCX8(); } |
186 | bool canUseCMPXCHG16B() const { |
187 | // CX16 is just the CPUID bit, instruction requires 64-bit mode too. |
188 | return hasCX16() && is64Bit(); |
189 | } |
190 | // SSE codegen depends on cmovs, and all SSE1+ processors support them. |
191 | // All 64-bit processors support cmov. |
192 | bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); } |
193 | bool hasSSE1() const { return X86SSELevel >= SSE1; } |
194 | bool hasSSE2() const { return X86SSELevel >= SSE2; } |
195 | bool hasSSE3() const { return X86SSELevel >= SSE3; } |
196 | bool hasSSSE3() const { return X86SSELevel >= SSSE3; } |
197 | bool hasSSE41() const { return X86SSELevel >= SSE41; } |
198 | bool hasSSE42() const { return X86SSELevel >= SSE42; } |
199 | bool hasAVX() const { return X86SSELevel >= AVX; } |
200 | bool hasAVX2() const { return X86SSELevel >= AVX2; } |
201 | bool hasAVX512() const { return X86SSELevel >= AVX512; } |
202 | bool hasInt256() const { return hasAVX2(); } |
203 | bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } |
204 | bool hasPrefetchW() const { |
205 | // The PREFETCHW instruction was added with 3DNow but later CPUs gave it |
206 | // its own CPUID bit as part of deprecating 3DNow. |
207 | return hasPRFCHW(); |
208 | } |
209 | bool hasSSEPrefetch() const { |
210 | // We also implicitly enable these when we have a write prefix supporting |
211 | // cache level OR if we have prfchw. |
212 | return hasSSE1() || hasPRFCHW() || hasPREFETCHI(); |
213 | } |
214 | bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); } |
215 | // These are generic getters that OR together all of the thunk types |
216 | // supported by the subtarget. Therefore useIndirectThunk*() will return true |
217 | // if any respective thunk feature is enabled. |
218 | bool useIndirectThunkCalls() const { |
219 | return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); |
220 | } |
221 | bool useIndirectThunkBranches() const { |
222 | return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); |
223 | } |
224 | |
225 | unsigned getPreferVectorWidth() const { return PreferVectorWidth; } |
226 | unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } |
227 | |
228 | // Helper functions to determine when we should allow widening to 512-bit |
229 | // during codegen. |
230 | // TODO: Currently we're always allowing widening on CPUs without VLX, |
231 | // because for many cases we don't have a better option. |
232 | bool canExtendTo512DQ() const { |
233 | return hasAVX512() && hasEVEX512() && |
234 | (!hasVLX() || getPreferVectorWidth() >= 512); |
235 | } |
236 | bool canExtendTo512BW() const { |
237 | return hasBWI() && canExtendTo512DQ(); |
238 | } |
239 | |
240 | bool hasNoDomainDelay() const { return NoDomainDelay; } |
241 | bool hasNoDomainDelayMov() const { |
242 | return hasNoDomainDelay() || NoDomainDelayMov; |
243 | } |
244 | bool hasNoDomainDelayBlend() const { |
245 | return hasNoDomainDelay() || NoDomainDelayBlend; |
246 | } |
247 | bool hasNoDomainDelayShuffle() const { |
248 | return hasNoDomainDelay() || NoDomainDelayShuffle; |
249 | } |
250 | |
251 | // If there are no 512-bit vectors and we prefer not to use 512-bit registers, |
252 | // disable them in the legalizer. |
253 | bool useAVX512Regs() const { |
254 | return hasAVX512() && hasEVEX512() && |
255 | (canExtendTo512DQ() || RequiredVectorWidth > 256); |
256 | } |
257 | |
258 | bool useLight256BitInstructions() const { |
259 | return getPreferVectorWidth() >= 256 || AllowLight256Bit; |
260 | } |
261 | |
262 | bool useBWIRegs() const { |
263 | return hasBWI() && useAVX512Regs(); |
264 | } |
265 | |
266 | bool isXRaySupported() const override { return is64Bit(); } |
267 | |
268 | /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for |
269 | /// no-sse2). There isn't any reason to disable it if the target processor |
270 | /// supports it. |
271 | bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); } |
272 | |
273 | /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for |
274 | /// no-sse2). There isn't any reason to disable it if the target processor |
275 | /// supports it. |
276 | bool hasMFence() const { return hasSSE2() || is64Bit(); } |
277 | |
278 | const Triple &getTargetTriple() const { return TargetTriple; } |
279 | |
280 | bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } |
281 | bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } |
282 | bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } |
283 | bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } |
284 | bool isTargetPS() const { return TargetTriple.isPS(); } |
285 | |
286 | bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } |
287 | bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } |
288 | bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } |
289 | |
290 | bool isTargetLinux() const { return TargetTriple.isOSLinux(); } |
291 | bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } |
292 | bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } |
293 | bool isTargetAndroid() const { return TargetTriple.isAndroid(); } |
294 | bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } |
295 | bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } |
296 | bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } |
297 | bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } |
298 | bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } |
299 | |
300 | bool isTargetWindowsMSVC() const { |
301 | return TargetTriple.isWindowsMSVCEnvironment(); |
302 | } |
303 | |
304 | bool isTargetWindowsCoreCLR() const { |
305 | return TargetTriple.isWindowsCoreCLREnvironment(); |
306 | } |
307 | |
308 | bool isTargetWindowsCygwin() const { |
309 | return TargetTriple.isWindowsCygwinEnvironment(); |
310 | } |
311 | |
312 | bool isTargetWindowsGNU() const { |
313 | return TargetTriple.isWindowsGNUEnvironment(); |
314 | } |
315 | |
316 | bool isTargetWindowsItanium() const { |
317 | return TargetTriple.isWindowsItaniumEnvironment(); |
318 | } |
319 | |
320 | bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } |
321 | |
322 | bool isOSWindows() const { return TargetTriple.isOSWindows(); } |
323 | |
324 | bool isTargetWin64() const { return Is64Bit && isOSWindows(); } |
325 | |
326 | bool isTargetWin32() const { return !Is64Bit && isOSWindows(); } |
327 | |
328 | bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; } |
329 | bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; } |
330 | |
331 | bool isPICStyleStubPIC() const { |
332 | return PICStyle == PICStyles::Style::StubPIC; |
333 | } |
334 | |
335 | bool isPositionIndependent() const; |
336 | |
337 | bool isCallingConvWin64(CallingConv::ID CC) const { |
338 | switch (CC) { |
339 | // On Win64, all these conventions just use the default convention. |
340 | case CallingConv::C: |
341 | case CallingConv::Fast: |
342 | case CallingConv::Tail: |
343 | case CallingConv::Swift: |
344 | case CallingConv::SwiftTail: |
345 | case CallingConv::X86_FastCall: |
346 | case CallingConv::X86_StdCall: |
347 | case CallingConv::X86_ThisCall: |
348 | case CallingConv::X86_VectorCall: |
349 | case CallingConv::Intel_OCL_BI: |
350 | return isTargetWin64(); |
351 | // This convention allows using the Win64 convention on other targets. |
352 | case CallingConv::Win64: |
353 | return true; |
354 | // This convention allows using the SysV convention on Windows targets. |
355 | case CallingConv::X86_64_SysV: |
356 | return false; |
357 | // Otherwise, who knows what this is. |
358 | default: |
359 | return false; |
360 | } |
361 | } |
362 | |
363 | /// Classify a global variable reference for the current subtarget according |
364 | /// to how we should reference it in a non-pcrel context. |
365 | unsigned char classifyLocalReference(const GlobalValue *GV) const; |
366 | |
367 | unsigned char classifyGlobalReference(const GlobalValue *GV, |
368 | const Module &M) const; |
369 | unsigned char classifyGlobalReference(const GlobalValue *GV) const; |
370 | |
371 | /// Classify a global function reference for the current subtarget. |
372 | unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, |
373 | const Module &M) const; |
374 | unsigned char |
375 | classifyGlobalFunctionReference(const GlobalValue *GV) const override; |
376 | |
377 | /// Classify a blockaddress reference for the current subtarget according to |
378 | /// how we should reference it in a non-pcrel context. |
379 | unsigned char classifyBlockAddressReference() const; |
380 | |
381 | /// Return true if the subtarget allows calls to immediate address. |
382 | bool isLegalToCallImmediateAddr() const; |
383 | |
384 | /// Return whether FrameLowering should always set the "extended frame |
385 | /// present" bit in FP, or set it based on a symbol in the runtime. |
386 | bool swiftAsyncContextIsDynamicallySet() const { |
387 | // Older OS versions (particularly system unwinders) are confused by the |
388 | // Swift extended frame, so when building code that might be run on them we |
389 | // must dynamically query the concurrency library to determine whether |
390 | // extended frames should be flagged as present. |
391 | const Triple &TT = getTargetTriple(); |
392 | |
393 | unsigned Major = TT.getOSVersion().getMajor(); |
394 | switch(TT.getOS()) { |
395 | default: |
396 | return false; |
397 | case Triple::IOS: |
398 | case Triple::TvOS: |
399 | return Major < 15; |
400 | case Triple::WatchOS: |
401 | return Major < 8; |
402 | case Triple::MacOSX: |
403 | case Triple::Darwin: |
404 | return Major < 12; |
405 | } |
406 | } |
407 | |
408 | /// If we are using indirect thunks, we need to expand indirectbr to avoid it |
409 | /// lowering to an actual indirect jump. |
410 | bool enableIndirectBrExpand() const override { |
411 | return useIndirectThunkBranches(); |
412 | } |
413 | |
414 | /// Enable the MachineScheduler pass for all X86 subtargets. |
415 | bool enableMachineScheduler() const override { return true; } |
416 | |
417 | bool enableEarlyIfConversion() const override; |
418 | |
419 | void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> |
420 | &Mutations) const override; |
421 | |
422 | AntiDepBreakMode getAntiDepBreakMode() const override { |
423 | return TargetSubtargetInfo::ANTIDEP_CRITICAL; |
424 | } |
425 | }; |
426 | |
427 | } // end namespace llvm |
428 | |
429 | #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
430 | |