| 1 | //===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the NVPTX specific subclass of TargetSubtarget. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "NVPTXSubtarget.h" |
| 14 | #include "NVPTXSelectionDAGInfo.h" |
| 15 | #include "NVPTXTargetMachine.h" |
| 16 | #include "llvm/Support/ErrorHandling.h" |
| 17 | #include "llvm/Support/FormatVariadic.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | #define DEBUG_TYPE "nvptx-subtarget" |
| 22 | |
| 23 | #define GET_SUBTARGETINFO_ENUM |
| 24 | #define GET_SUBTARGETINFO_TARGET_DESC |
| 25 | #define GET_SUBTARGETINFO_CTOR |
| 26 | #include "NVPTXGenSubtargetInfo.inc" |
| 27 | |
| 28 | static cl::opt<bool> |
| 29 | NoF16Math("nvptx-no-f16-math" , cl::Hidden, |
| 30 | cl::desc("NVPTX Specific: Disable generation of f16 math ops." ), |
| 31 | cl::init(Val: false)); |
| 32 | |
| 33 | static cl::opt<bool> NoF32x2("nvptx-no-f32x2" , cl::Hidden, |
| 34 | cl::desc("NVPTX Specific: Disable generation of " |
| 35 | "f32x2 instructions and registers." ), |
| 36 | cl::init(Val: false)); |
| 37 | |
| 38 | // FullSmVersion encoding helpers: SM * 10 + suffix offset |
| 39 | // (0 = base, 2 = 'f', 3 = 'a'). |
| 40 | static constexpr unsigned SM(unsigned Version) { return Version * 10; } |
| 41 | static constexpr unsigned SMF(unsigned Version) { return SM(Version) + 2; } |
| 42 | static constexpr unsigned SMA(unsigned Version) { return SM(Version) + 3; } |
| 43 | |
| 44 | // Pin the vtable to this file. |
| 45 | void NVPTXSubtarget::anchor() {} |
| 46 | |
| 47 | // Returns the minimum PTX version required for a given SM target. |
| 48 | // This must be kept in sync with the "Supported Targets" column of the |
| 49 | // "PTX Release History" table in the PTX ISA documentation: |
| 50 | // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history |
| 51 | // |
| 52 | // Note: LLVM's minimum supported PTX version is 3.2 (see FeaturePTX in |
| 53 | // NVPTX.td), so older SMs that supported earlier PTX versions instead use 3.2 |
| 54 | // as their effective minimum. |
| 55 | static unsigned getMinPTXVersionForSM(unsigned FullSmVersion) { |
| 56 | switch (FullSmVersion) { |
| 57 | case SM(Version: 20): |
| 58 | case SM(Version: 21): |
| 59 | case SM(Version: 30): |
| 60 | case SM(Version: 35): |
| 61 | return 32; |
| 62 | case SM(Version: 32): |
| 63 | case SM(Version: 50): |
| 64 | return 40; |
| 65 | case SM(Version: 37): |
| 66 | case SM(Version: 52): |
| 67 | return 41; |
| 68 | case SM(Version: 53): |
| 69 | return 42; |
| 70 | case SM(Version: 60): |
| 71 | case SM(Version: 61): |
| 72 | case SM(Version: 62): |
| 73 | return 50; |
| 74 | case SM(Version: 70): |
| 75 | return 60; |
| 76 | case SM(Version: 72): |
| 77 | return 61; |
| 78 | case SM(Version: 75): |
| 79 | return 63; |
| 80 | case SM(Version: 80): |
| 81 | return 70; |
| 82 | case SM(Version: 86): |
| 83 | return 71; |
| 84 | case SM(Version: 87): |
| 85 | return 74; |
| 86 | case SM(Version: 89): |
| 87 | case SM(Version: 90): |
| 88 | return 78; |
| 89 | case SMA(Version: 90): |
| 90 | return 80; |
| 91 | case SM(Version: 100): |
| 92 | case SMA(Version: 100): |
| 93 | case SM(Version: 101): |
| 94 | case SMA(Version: 101): |
| 95 | return 86; |
| 96 | case SM(Version: 120): |
| 97 | case SMA(Version: 120): |
| 98 | return 87; |
| 99 | case SMF(Version: 100): |
| 100 | case SMF(Version: 101): |
| 101 | case SM(Version: 103): |
| 102 | case SMF(Version: 103): |
| 103 | case SMA(Version: 103): |
| 104 | case SMF(Version: 120): |
| 105 | case SM(Version: 121): |
| 106 | case SMF(Version: 121): |
| 107 | case SMA(Version: 121): |
| 108 | return 88; |
| 109 | case SM(Version: 88): |
| 110 | case SM(Version: 110): |
| 111 | case SMF(Version: 110): |
| 112 | case SMA(Version: 110): |
| 113 | return 90; |
| 114 | default: |
| 115 | llvm_unreachable("Unknown SM version" ); |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, |
| 120 | StringRef FS) { |
| 121 | TargetName = std::string(CPU); |
| 122 | |
| 123 | ParseSubtargetFeatures(CPU: getTargetName(), /*TuneCPU=*/getTargetName(), FS); |
| 124 | |
| 125 | // Re-map SM version numbers, SmVersion carries the regular SMs which do |
| 126 | // have relative order, while FullSmVersion allows distinguishing sm_90 from |
| 127 | // sm_90a, which would *not* be a subset of sm_91. |
| 128 | SmVersion = getSmVersion(); |
| 129 | |
| 130 | unsigned MinPTX = getMinPTXVersionForSM(FullSmVersion); |
| 131 | |
| 132 | if (PTXVersion == 0) { |
| 133 | // User didn't request a specific PTX version; use the minimum for this SM. |
| 134 | PTXVersion = MinPTX; |
| 135 | } else if (PTXVersion < MinPTX) { |
| 136 | // User explicitly requested an insufficient PTX version. |
| 137 | reportFatalUsageError( |
| 138 | reason: formatv(Fmt: "PTX version {0}.{1} does not support target '{2}'. " |
| 139 | "Minimum required PTX version is {3}.{4}. " |
| 140 | "Either remove the PTX version to use the default, " |
| 141 | "or increase it to at least {3}.{4}." , |
| 142 | Vals: PTXVersion / 10, Vals: PTXVersion % 10, Vals: getTargetName(), Vals: MinPTX / 10, |
| 143 | Vals: MinPTX % 10)); |
| 144 | } |
| 145 | |
| 146 | return *this; |
| 147 | } |
| 148 | |
| 149 | NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, |
| 150 | const std::string &FS, |
| 151 | const NVPTXTargetMachine &TM) |
| 152 | : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), |
| 153 | FullSmVersion(200), SmVersion(getSmVersion()), |
| 154 | InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { |
| 155 | TSInfo = std::make_unique<NVPTXSelectionDAGInfo>(); |
| 156 | } |
| 157 | |
| 158 | NVPTXSubtarget::~NVPTXSubtarget() = default; |
| 159 | |
| 160 | const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { |
| 161 | return TSInfo.get(); |
| 162 | } |
| 163 | |
| 164 | bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion, |
| 165 | ArrayRef<unsigned> SMVersions) const { |
| 166 | unsigned PTXVer = getPTXVersion(); |
| 167 | if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion) |
| 168 | return false; |
| 169 | |
| 170 | unsigned SMVer = getSmVersion(); |
| 171 | return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) { |
| 172 | // sm_101 is a different family, never group it with sm_10x. |
| 173 | if (SMVer == 101 || SM == 101) |
| 174 | return SMVer == SM && |
| 175 | // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not |
| 176 | // supported. |
| 177 | !(PTXVer >= 90 && SMVer == 101); |
| 178 | |
| 179 | return getSmFamilyVersion() == SM / 10 && SMVer >= SM; |
| 180 | }); |
| 181 | } |
| 182 | |
| 183 | bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion, |
| 184 | ArrayRef<unsigned> SMVersions) const { |
| 185 | unsigned PTXVer = getPTXVersion(); |
| 186 | if (!hasArchAccelFeatures() || PTXVer < PTXVersion) |
| 187 | return false; |
| 188 | |
| 189 | unsigned SMVer = getSmVersion(); |
| 190 | return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) { |
| 191 | return SMVer == SM && |
| 192 | // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not |
| 193 | // supported. |
| 194 | !(PTXVer >= 90 && SMVer == 101); |
| 195 | }); |
| 196 | } |
| 197 | |
| 198 | bool NVPTXSubtarget::allowFP16Math() const { |
| 199 | return hasFP16Math() && NoF16Math == false; |
| 200 | } |
| 201 | |
| 202 | bool NVPTXSubtarget::hasF32x2Instructions() const { |
| 203 | return SmVersion >= 100 && PTXVersion >= 86 && !NoF32x2; |
| 204 | } |
| 205 | |
| 206 | bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const { |
| 207 | if (!hasBF16Math()) |
| 208 | return false; |
| 209 | |
| 210 | switch (Opcode) { |
| 211 | // Several BF16 instructions are available on sm_90 only. |
| 212 | case ISD::FADD: |
| 213 | case ISD::FMUL: |
| 214 | case ISD::FSUB: |
| 215 | case ISD::SELECT: |
| 216 | case ISD::SELECT_CC: |
| 217 | case ISD::SETCC: |
| 218 | case ISD::FEXP2: |
| 219 | case ISD::FCEIL: |
| 220 | case ISD::FFLOOR: |
| 221 | case ISD::FNEARBYINT: |
| 222 | case ISD::FRINT: |
| 223 | case ISD::FROUNDEVEN: |
| 224 | case ISD::FTRUNC: |
| 225 | return getSmVersion() >= 90 && getPTXVersion() >= 78; |
| 226 | // Several BF16 instructions are available on sm_80 only. |
| 227 | case ISD::FMINNUM: |
| 228 | case ISD::FMAXNUM: |
| 229 | case ISD::FMAXNUM_IEEE: |
| 230 | case ISD::FMINNUM_IEEE: |
| 231 | case ISD::FMAXIMUM: |
| 232 | case ISD::FMINIMUM: |
| 233 | return getSmVersion() >= 80 && getPTXVersion() >= 70; |
| 234 | } |
| 235 | return true; |
| 236 | } |
| 237 | |
| 238 | void NVPTXSubtarget::failIfClustersUnsupported( |
| 239 | std::string const &FailureMessage) const { |
| 240 | if (hasClusters()) |
| 241 | return; |
| 242 | |
| 243 | report_fatal_error(reason: formatv( |
| 244 | Fmt: "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. " |
| 245 | "Requires SM >= 90 and PTX >= 78." , |
| 246 | Vals: getFullSmVersion(), Vals: PTXVersion, Vals: FailureMessage)); |
| 247 | } |
| 248 | |