| 1 | //===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the NVPTX specific subclass of TargetSubtarget. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "NVPTXSubtarget.h" |
| 14 | #include "NVPTXSelectionDAGInfo.h" |
| 15 | #include "NVPTXTargetMachine.h" |
| 16 | #include "llvm/Support/ErrorHandling.h" |
| 17 | #include "llvm/Support/FormatVariadic.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | #define DEBUG_TYPE "nvptx-subtarget" |
| 22 | |
| 23 | #define GET_SUBTARGETINFO_ENUM |
| 24 | #define GET_SUBTARGETINFO_TARGET_DESC |
| 25 | #define GET_SUBTARGETINFO_CTOR |
| 26 | #include "NVPTXGenSubtargetInfo.inc" |
| 27 | |
| 28 | static cl::opt<bool> |
| 29 | NoF16Math("nvptx-no-f16-math" , cl::Hidden, |
| 30 | cl::desc("NVPTX Specific: Disable generation of f16 math ops." ), |
| 31 | cl::init(Val: false)); |
| 32 | |
| 33 | static cl::opt<bool> NoF32x2("nvptx-no-f32x2" , cl::Hidden, |
| 34 | cl::desc("NVPTX Specific: Disable generation of " |
| 35 | "f32x2 instructions and registers." ), |
| 36 | cl::init(Val: false)); |
| 37 | |
| 38 | // Pin the vtable to this file. |
| 39 | void NVPTXSubtarget::anchor() {} |
| 40 | |
| 41 | NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, |
| 42 | StringRef FS) { |
| 43 | TargetName = std::string(CPU); |
| 44 | |
| 45 | ParseSubtargetFeatures(CPU: getTargetName(), /*TuneCPU=*/getTargetName(), FS); |
| 46 | |
| 47 | // Re-map SM version numbers, SmVersion carries the regular SMs which do |
| 48 | // have relative order, while FullSmVersion allows distinguishing sm_90 from |
| 49 | // sm_90a, which would *not* be a subset of sm_91. |
| 50 | SmVersion = getSmVersion(); |
| 51 | |
| 52 | // Set default to PTX 6.0 (CUDA 9.0) |
| 53 | if (PTXVersion == 0) { |
| 54 | PTXVersion = 60; |
| 55 | } |
| 56 | |
| 57 | return *this; |
| 58 | } |
| 59 | |
| 60 | NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, |
| 61 | const std::string &FS, |
| 62 | const NVPTXTargetMachine &TM) |
| 63 | : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), |
| 64 | FullSmVersion(200), SmVersion(getSmVersion()), |
| 65 | InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { |
| 66 | TSInfo = std::make_unique<NVPTXSelectionDAGInfo>(); |
| 67 | } |
| 68 | |
| 69 | NVPTXSubtarget::~NVPTXSubtarget() = default; |
| 70 | |
| 71 | const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { |
| 72 | return TSInfo.get(); |
| 73 | } |
| 74 | |
| 75 | bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion, |
| 76 | ArrayRef<unsigned> SMVersions) const { |
| 77 | unsigned PTXVer = getPTXVersion(); |
| 78 | if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion) |
| 79 | return false; |
| 80 | |
| 81 | unsigned SMVer = getSmVersion(); |
| 82 | return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) { |
| 83 | // sm_101 is a different family, never group it with sm_10x. |
| 84 | if (SMVer == 101 || SM == 101) |
| 85 | return SMVer == SM && |
| 86 | // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not |
| 87 | // supported. |
| 88 | !(PTXVer >= 90 && SMVer == 101); |
| 89 | |
| 90 | return getSmFamilyVersion() == SM / 10 && SMVer >= SM; |
| 91 | }); |
| 92 | } |
| 93 | |
| 94 | bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion, |
| 95 | ArrayRef<unsigned> SMVersions) const { |
| 96 | unsigned PTXVer = getPTXVersion(); |
| 97 | if (!hasArchAccelFeatures() || PTXVer < PTXVersion) |
| 98 | return false; |
| 99 | |
| 100 | unsigned SMVer = getSmVersion(); |
| 101 | return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) { |
| 102 | return SMVer == SM && |
| 103 | // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not |
| 104 | // supported. |
| 105 | !(PTXVer >= 90 && SMVer == 101); |
| 106 | }); |
| 107 | } |
| 108 | |
| 109 | bool NVPTXSubtarget::allowFP16Math() const { |
| 110 | return hasFP16Math() && NoF16Math == false; |
| 111 | } |
| 112 | |
| 113 | bool NVPTXSubtarget::hasF32x2Instructions() const { |
| 114 | return SmVersion >= 100 && PTXVersion >= 86 && !NoF32x2; |
| 115 | } |
| 116 | |
| 117 | bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const { |
| 118 | if (!hasBF16Math()) |
| 119 | return false; |
| 120 | |
| 121 | switch (Opcode) { |
| 122 | // Several BF16 instructions are available on sm_90 only. |
| 123 | case ISD::FADD: |
| 124 | case ISD::FMUL: |
| 125 | case ISD::FSUB: |
| 126 | case ISD::SELECT: |
| 127 | case ISD::SELECT_CC: |
| 128 | case ISD::SETCC: |
| 129 | case ISD::FEXP2: |
| 130 | case ISD::FCEIL: |
| 131 | case ISD::FFLOOR: |
| 132 | case ISD::FNEARBYINT: |
| 133 | case ISD::FRINT: |
| 134 | case ISD::FROUNDEVEN: |
| 135 | case ISD::FTRUNC: |
| 136 | return getSmVersion() >= 90 && getPTXVersion() >= 78; |
| 137 | // Several BF16 instructions are available on sm_80 only. |
| 138 | case ISD::FMINNUM: |
| 139 | case ISD::FMAXNUM: |
| 140 | case ISD::FMAXNUM_IEEE: |
| 141 | case ISD::FMINNUM_IEEE: |
| 142 | case ISD::FMAXIMUM: |
| 143 | case ISD::FMINIMUM: |
| 144 | return getSmVersion() >= 80 && getPTXVersion() >= 70; |
| 145 | } |
| 146 | return true; |
| 147 | } |
| 148 | |
| 149 | void NVPTXSubtarget::failIfClustersUnsupported( |
| 150 | std::string const &FailureMessage) const { |
| 151 | if (hasClusters()) |
| 152 | return; |
| 153 | |
| 154 | report_fatal_error(reason: formatv( |
| 155 | Fmt: "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. " |
| 156 | "Requires SM >= 90 and PTX >= 78." , |
| 157 | Vals: getFullSmVersion(), Vals: PTXVersion, Vals: FailureMessage)); |
| 158 | } |
| 159 | |