| 1 | //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file declares the X86 specific subclass of TargetSubtargetInfo. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
| 14 | #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
| 15 | |
| 16 | #include "X86FrameLowering.h" |
| 17 | #include "X86ISelLowering.h" |
| 18 | #include "X86InstrInfo.h" |
| 19 | #include "X86SelectionDAGInfo.h" |
| 20 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 21 | #include "llvm/IR/CallingConv.h" |
| 22 | #include "llvm/TargetParser/Triple.h" |
| 23 | #include <climits> |
| 24 | #include <memory> |
| 25 | |
| 26 | #define |
| 27 | #include "X86GenSubtargetInfo.inc" |
| 28 | |
| 29 | namespace llvm { |
| 30 | |
| 31 | class CallLowering; |
| 32 | class GlobalValue; |
| 33 | class InstructionSelector; |
| 34 | class LegalizerInfo; |
| 35 | class RegisterBankInfo; |
| 36 | class StringRef; |
| 37 | class TargetMachine; |
| 38 | |
| 39 | /// The X86 backend supports a number of different styles of PIC. |
| 40 | /// |
| 41 | namespace PICStyles { |
| 42 | |
| 43 | enum class Style { |
| 44 | StubPIC, // Used on i386-darwin in pic mode. |
| 45 | GOT, // Used on 32 bit elf on when in pic mode. |
| 46 | RIPRel, // Used on X86-64 when in pic mode. |
| 47 | None // Set when not in pic mode. |
| 48 | }; |
| 49 | |
| 50 | } // end namespace PICStyles |
| 51 | |
| 52 | class X86Subtarget final : public X86GenSubtargetInfo { |
| 53 | enum X86SSEEnum { |
| 54 | NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512 |
| 55 | }; |
| 56 | |
| 57 | /// Which PIC style to use |
| 58 | PICStyles::Style PICStyle; |
| 59 | |
| 60 | const TargetMachine &TM; |
| 61 | |
| 62 | /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. |
| 63 | X86SSEEnum X86SSELevel = NoSSE; |
| 64 | |
| 65 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
| 66 | bool ATTRIBUTE = DEFAULT; |
| 67 | #include "X86GenSubtargetInfo.inc" |
| 68 | /// The minimum alignment known to hold of the stack frame on |
| 69 | /// entry to the function and which must be maintained by every function. |
| 70 | Align stackAlignment = Align(4); |
| 71 | |
| 72 | Align TileConfigAlignment = Align(4); |
| 73 | |
| 74 | /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. |
| 75 | /// |
| 76 | // FIXME: this is a known good value for Yonah. How about others? |
| 77 | unsigned MaxInlineSizeThreshold = 128; |
| 78 | |
| 79 | /// What processor and OS we're targeting. |
| 80 | Triple TargetTriple; |
| 81 | |
| 82 | /// GlobalISel related APIs. |
| 83 | std::unique_ptr<CallLowering> CallLoweringInfo; |
| 84 | std::unique_ptr<LegalizerInfo> Legalizer; |
| 85 | std::unique_ptr<RegisterBankInfo> RegBankInfo; |
| 86 | std::unique_ptr<InstructionSelector> InstSelector; |
| 87 | |
| 88 | /// Override the stack alignment. |
| 89 | MaybeAlign StackAlignOverride; |
| 90 | |
| 91 | /// Preferred vector width from function attribute. |
| 92 | unsigned PreferVectorWidthOverride; |
| 93 | |
| 94 | /// Resolved preferred vector width from function attribute and subtarget |
| 95 | /// features. |
| 96 | unsigned PreferVectorWidth = UINT32_MAX; |
| 97 | |
| 98 | /// Required vector width from function attribute. |
| 99 | unsigned RequiredVectorWidth; |
| 100 | |
| 101 | X86SelectionDAGInfo TSInfo; |
| 102 | // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which |
| 103 | // X86TargetLowering needs. |
| 104 | X86InstrInfo InstrInfo; |
| 105 | X86TargetLowering TLInfo; |
| 106 | X86FrameLowering FrameLowering; |
| 107 | |
| 108 | public: |
| 109 | /// This constructor initializes the data members to match that |
| 110 | /// of the specified triple. |
| 111 | /// |
| 112 | X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, |
| 113 | const X86TargetMachine &TM, MaybeAlign StackAlignOverride, |
| 114 | unsigned PreferVectorWidthOverride, |
| 115 | unsigned RequiredVectorWidth); |
| 116 | ~X86Subtarget() override; |
| 117 | |
| 118 | const X86TargetLowering *getTargetLowering() const override { |
| 119 | return &TLInfo; |
| 120 | } |
| 121 | |
| 122 | const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } |
| 123 | |
| 124 | const X86FrameLowering *getFrameLowering() const override { |
| 125 | return &FrameLowering; |
| 126 | } |
| 127 | |
| 128 | const X86SelectionDAGInfo *getSelectionDAGInfo() const override { |
| 129 | return &TSInfo; |
| 130 | } |
| 131 | |
| 132 | const X86RegisterInfo *getRegisterInfo() const override { |
| 133 | return &getInstrInfo()->getRegisterInfo(); |
| 134 | } |
| 135 | |
| 136 | unsigned getTileConfigSize() const { return 64; } |
| 137 | Align getTileConfigAlignment() const { return TileConfigAlignment; } |
| 138 | |
| 139 | /// Returns the minimum alignment known to hold of the |
| 140 | /// stack frame on entry to the function and which must be maintained by every |
| 141 | /// function for this subtarget. |
| 142 | Align getStackAlignment() const { return stackAlignment; } |
| 143 | |
| 144 | /// Returns the maximum memset / memcpy size |
| 145 | /// that still makes it profitable to inline the call. |
| 146 | unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } |
| 147 | |
| 148 | /// ParseSubtargetFeatures - Parses features string setting specified |
| 149 | /// subtarget options. Definition of function is auto generated by tblgen. |
| 150 | void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
| 151 | |
| 152 | /// Methods used by Global ISel |
| 153 | const CallLowering *getCallLowering() const override; |
| 154 | InstructionSelector *getInstructionSelector() const override; |
| 155 | const LegalizerInfo *getLegalizerInfo() const override; |
| 156 | const RegisterBankInfo *getRegBankInfo() const override; |
| 157 | |
| 158 | private: |
| 159 | /// Initialize the full set of dependencies so we can use an initializer |
| 160 | /// list for X86Subtarget. |
| 161 | X86Subtarget &initializeSubtargetDependencies(StringRef CPU, |
| 162 | StringRef TuneCPU, |
| 163 | StringRef FS); |
| 164 | void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
| 165 | |
| 166 | public: |
| 167 | |
| 168 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
| 169 | bool GETTER() const { return ATTRIBUTE; } |
| 170 | #include "X86GenSubtargetInfo.inc" |
| 171 | |
| 172 | /// Is this x86_64 with the ILP32 programming model (x32 ABI)? |
| 173 | bool isTarget64BitILP32() const { |
| 174 | return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl()); |
| 175 | } |
| 176 | |
| 177 | /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? |
| 178 | bool isTarget64BitLP64() const { |
| 179 | return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl()); |
| 180 | } |
| 181 | |
| 182 | PICStyles::Style getPICStyle() const { return PICStyle; } |
| 183 | void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } |
| 184 | |
| 185 | bool canUseCMPXCHG8B() const { return hasCX8(); } |
| 186 | bool canUseCMPXCHG16B() const { |
| 187 | // CX16 is just the CPUID bit, instruction requires 64-bit mode too. |
| 188 | return hasCX16() && is64Bit(); |
| 189 | } |
| 190 | // SSE codegen depends on cmovs, and all SSE1+ processors support them. |
| 191 | // All 64-bit processors support cmov. |
| 192 | bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); } |
| 193 | bool hasSSE1() const { return X86SSELevel >= SSE1; } |
| 194 | bool hasSSE2() const { return X86SSELevel >= SSE2; } |
| 195 | bool hasSSE3() const { return X86SSELevel >= SSE3; } |
| 196 | bool hasSSSE3() const { return X86SSELevel >= SSSE3; } |
| 197 | bool hasSSE41() const { return X86SSELevel >= SSE41; } |
| 198 | bool hasSSE42() const { return X86SSELevel >= SSE42; } |
| 199 | bool hasAVX() const { return X86SSELevel >= AVX; } |
| 200 | bool hasAVX2() const { return X86SSELevel >= AVX2; } |
| 201 | bool hasAVX512() const { return X86SSELevel >= AVX512; } |
| 202 | bool hasInt256() const { return hasAVX2(); } |
| 203 | bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } |
| 204 | bool hasPrefetchW() const { |
| 205 | // The PREFETCHW instruction was added with 3DNow but later CPUs gave it |
| 206 | // its own CPUID bit as part of deprecating 3DNow. |
| 207 | return hasPRFCHW(); |
| 208 | } |
| 209 | bool hasSSEPrefetch() const { |
| 210 | // We also implicitly enable these when we have a write prefix supporting |
| 211 | // cache level OR if we have prfchw. |
| 212 | return hasSSE1() || hasPRFCHW() || hasPREFETCHI(); |
| 213 | } |
| 214 | bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); } |
| 215 | // These are generic getters that OR together all of the thunk types |
| 216 | // supported by the subtarget. Therefore useIndirectThunk*() will return true |
| 217 | // if any respective thunk feature is enabled. |
| 218 | bool useIndirectThunkCalls() const { |
| 219 | return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); |
| 220 | } |
| 221 | bool useIndirectThunkBranches() const { |
| 222 | return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); |
| 223 | } |
| 224 | |
| 225 | unsigned getPreferVectorWidth() const { return PreferVectorWidth; } |
| 226 | unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } |
| 227 | |
| 228 | // Helper functions to determine when we should allow widening to 512-bit |
| 229 | // during codegen. |
| 230 | // TODO: Currently we're always allowing widening on CPUs without VLX, |
| 231 | // because for many cases we don't have a better option. |
| 232 | bool canExtendTo512DQ() const { |
| 233 | return hasAVX512() && hasEVEX512() && |
| 234 | (!hasVLX() || getPreferVectorWidth() >= 512); |
| 235 | } |
| 236 | bool canExtendTo512BW() const { |
| 237 | return hasBWI() && canExtendTo512DQ(); |
| 238 | } |
| 239 | |
| 240 | bool hasNoDomainDelay() const { return NoDomainDelay; } |
| 241 | bool hasNoDomainDelayMov() const { |
| 242 | return hasNoDomainDelay() || NoDomainDelayMov; |
| 243 | } |
| 244 | bool hasNoDomainDelayBlend() const { |
| 245 | return hasNoDomainDelay() || NoDomainDelayBlend; |
| 246 | } |
| 247 | bool hasNoDomainDelayShuffle() const { |
| 248 | return hasNoDomainDelay() || NoDomainDelayShuffle; |
| 249 | } |
| 250 | |
| 251 | // If there are no 512-bit vectors and we prefer not to use 512-bit registers, |
| 252 | // disable them in the legalizer. |
| 253 | bool useAVX512Regs() const { |
| 254 | return hasAVX512() && hasEVEX512() && |
| 255 | (canExtendTo512DQ() || RequiredVectorWidth > 256); |
| 256 | } |
| 257 | |
| 258 | bool useLight256BitInstructions() const { |
| 259 | return getPreferVectorWidth() >= 256 || AllowLight256Bit; |
| 260 | } |
| 261 | |
| 262 | bool useBWIRegs() const { |
| 263 | return hasBWI() && useAVX512Regs(); |
| 264 | } |
| 265 | |
| 266 | // Returns true if the destination register of a BSF/BSR instruction is |
| 267 | // not touched if the source register is zero. |
| 268 | // NOTE: i32->i64 implicit zext isn't guaranteed by BSR/BSF pass through. |
| 269 | bool hasBitScanPassThrough() const { return is64Bit(); } |
| 270 | |
| 271 | bool isXRaySupported() const override { return is64Bit(); } |
| 272 | |
| 273 | /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for |
| 274 | /// no-sse2). There isn't any reason to disable it if the target processor |
| 275 | /// supports it. |
| 276 | bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); } |
| 277 | |
| 278 | /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for |
| 279 | /// no-sse2). There isn't any reason to disable it if the target processor |
| 280 | /// supports it. |
| 281 | bool hasMFence() const { return hasSSE2() || is64Bit(); } |
| 282 | |
| 283 | /// Avoid use of `mfence` for`fence seq_cst`, and instead use `lock or`. |
| 284 | bool avoidMFence() const { return is64Bit(); } |
| 285 | |
| 286 | const Triple &getTargetTriple() const { return TargetTriple; } |
| 287 | |
| 288 | bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } |
| 289 | bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } |
| 290 | bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } |
| 291 | bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } |
| 292 | bool isTargetPS() const { return TargetTriple.isPS(); } |
| 293 | |
| 294 | bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } |
| 295 | bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } |
| 296 | bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } |
| 297 | |
| 298 | bool isTargetLinux() const { return TargetTriple.isOSLinux(); } |
| 299 | bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } |
| 300 | bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } |
| 301 | bool isTargetAndroid() const { return TargetTriple.isAndroid(); } |
| 302 | bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } |
| 303 | bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } |
| 304 | bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } |
| 305 | bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } |
| 306 | bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } |
| 307 | |
| 308 | bool isTargetWindowsMSVC() const { |
| 309 | return TargetTriple.isWindowsMSVCEnvironment(); |
| 310 | } |
| 311 | |
| 312 | bool isTargetWindowsCoreCLR() const { |
| 313 | return TargetTriple.isWindowsCoreCLREnvironment(); |
| 314 | } |
| 315 | |
| 316 | bool isTargetWindowsCygwin() const { |
| 317 | return TargetTriple.isWindowsCygwinEnvironment(); |
| 318 | } |
| 319 | |
| 320 | bool isTargetWindowsGNU() const { |
| 321 | return TargetTriple.isWindowsGNUEnvironment(); |
| 322 | } |
| 323 | |
| 324 | bool isTargetWindowsItanium() const { |
| 325 | return TargetTriple.isWindowsItaniumEnvironment(); |
| 326 | } |
| 327 | |
| 328 | bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } |
| 329 | |
| 330 | bool isUEFI() const { return TargetTriple.isUEFI(); } |
| 331 | |
| 332 | bool isOSWindows() const { return TargetTriple.isOSWindows(); } |
| 333 | |
| 334 | bool isTargetUEFI64() const { return Is64Bit && isUEFI(); } |
| 335 | |
| 336 | bool isTargetWin64() const { return Is64Bit && isOSWindows(); } |
| 337 | |
| 338 | bool isTargetWin32() const { return !Is64Bit && isOSWindows(); } |
| 339 | |
| 340 | bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; } |
| 341 | bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; } |
| 342 | |
| 343 | bool isPICStyleStubPIC() const { |
| 344 | return PICStyle == PICStyles::Style::StubPIC; |
| 345 | } |
| 346 | |
| 347 | bool isPositionIndependent() const; |
| 348 | |
| 349 | bool isCallingConvWin64(CallingConv::ID CC) const { |
| 350 | switch (CC) { |
| 351 | // On Win64, all these conventions just use the default convention. |
| 352 | case CallingConv::C: |
| 353 | case CallingConv::Fast: |
| 354 | case CallingConv::Tail: |
| 355 | return isTargetWin64() || isTargetUEFI64(); |
| 356 | case CallingConv::Swift: |
| 357 | case CallingConv::SwiftTail: |
| 358 | case CallingConv::X86_FastCall: |
| 359 | case CallingConv::X86_StdCall: |
| 360 | case CallingConv::X86_ThisCall: |
| 361 | case CallingConv::X86_VectorCall: |
| 362 | case CallingConv::Intel_OCL_BI: |
| 363 | return isTargetWin64(); |
| 364 | // This convention allows using the Win64 convention on other targets. |
| 365 | case CallingConv::Win64: |
| 366 | return true; |
| 367 | // This convention allows using the SysV convention on Windows targets. |
| 368 | case CallingConv::X86_64_SysV: |
| 369 | return false; |
| 370 | // Otherwise, who knows what this is. |
| 371 | default: |
| 372 | return false; |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | /// Classify a global variable reference for the current subtarget according |
| 377 | /// to how we should reference it in a non-pcrel context. |
| 378 | unsigned char classifyLocalReference(const GlobalValue *GV) const; |
| 379 | |
| 380 | unsigned char classifyGlobalReference(const GlobalValue *GV, |
| 381 | const Module &M) const; |
| 382 | unsigned char classifyGlobalReference(const GlobalValue *GV) const; |
| 383 | |
| 384 | /// Classify a global function reference for the current subtarget. |
| 385 | unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, |
| 386 | const Module &M) const; |
| 387 | unsigned char |
| 388 | classifyGlobalFunctionReference(const GlobalValue *GV) const override; |
| 389 | |
| 390 | /// Classify a blockaddress reference for the current subtarget according to |
| 391 | /// how we should reference it in a non-pcrel context. |
| 392 | unsigned char classifyBlockAddressReference() const; |
| 393 | |
| 394 | /// Return true if the subtarget allows calls to immediate address. |
| 395 | bool isLegalToCallImmediateAddr() const; |
| 396 | |
| 397 | /// Return whether FrameLowering should always set the "extended frame |
| 398 | /// present" bit in FP, or set it based on a symbol in the runtime. |
| 399 | bool swiftAsyncContextIsDynamicallySet() const { |
| 400 | // Older OS versions (particularly system unwinders) are confused by the |
| 401 | // Swift extended frame, so when building code that might be run on them we |
| 402 | // must dynamically query the concurrency library to determine whether |
| 403 | // extended frames should be flagged as present. |
| 404 | const Triple &TT = getTargetTriple(); |
| 405 | |
| 406 | unsigned Major = TT.getOSVersion().getMajor(); |
| 407 | switch(TT.getOS()) { |
| 408 | default: |
| 409 | return false; |
| 410 | case Triple::IOS: |
| 411 | case Triple::TvOS: |
| 412 | return Major < 15; |
| 413 | case Triple::WatchOS: |
| 414 | return Major < 8; |
| 415 | case Triple::MacOSX: |
| 416 | case Triple::Darwin: |
| 417 | return Major < 12; |
| 418 | } |
| 419 | } |
| 420 | |
| 421 | /// If we are using indirect thunks, we need to expand indirectbr to avoid it |
| 422 | /// lowering to an actual indirect jump. |
| 423 | bool enableIndirectBrExpand() const override { |
| 424 | return useIndirectThunkBranches(); |
| 425 | } |
| 426 | |
| 427 | /// Enable the MachineScheduler pass for all X86 subtargets. |
| 428 | bool enableMachineScheduler() const override { return true; } |
| 429 | |
| 430 | bool enableEarlyIfConversion() const override; |
| 431 | |
| 432 | void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> |
| 433 | &Mutations) const override; |
| 434 | |
| 435 | AntiDepBreakMode getAntiDepBreakMode() const override { |
| 436 | return TargetSubtargetInfo::ANTIDEP_CRITICAL; |
| 437 | } |
| 438 | }; |
| 439 | |
| 440 | } // end namespace llvm |
| 441 | |
| 442 | #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H |
| 443 | |