| 1 | //===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the NVPTX specific subclass of TargetSubtarget. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "NVPTXSubtarget.h" |
| 14 | #include "NVPTXSelectionDAGInfo.h" |
| 15 | #include "NVPTXTargetMachine.h" |
| 16 | #include "llvm/Support/ErrorHandling.h" |
| 17 | #include "llvm/Support/FormatVariadic.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | #define DEBUG_TYPE "nvptx-subtarget" |
| 22 | |
| 23 | #define GET_SUBTARGETINFO_ENUM |
| 24 | #define GET_SUBTARGETINFO_TARGET_DESC |
| 25 | #define GET_SUBTARGETINFO_CTOR |
| 26 | #include "NVPTXGenSubtargetInfo.inc" |
| 27 | |
| 28 | static cl::opt<bool> |
| 29 | NoF16Math("nvptx-no-f16-math" , cl::Hidden, |
| 30 | cl::desc("NVPTX Specific: Disable generation of f16 math ops." ), |
| 31 | cl::init(Val: false)); |
| 32 | // Pin the vtable to this file. |
| 33 | void NVPTXSubtarget::anchor() {} |
| 34 | |
| 35 | NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, |
| 36 | StringRef FS) { |
| 37 | TargetName = std::string(CPU); |
| 38 | |
| 39 | ParseSubtargetFeatures(CPU: getTargetName(), /*TuneCPU=*/getTargetName(), FS); |
| 40 | |
| 41 | // Re-map SM version numbers, SmVersion carries the regular SMs which do |
| 42 | // have relative order, while FullSmVersion allows distinguishing sm_90 from |
| 43 | // sm_90a, which would *not* be a subset of sm_91. |
| 44 | SmVersion = getSmVersion(); |
| 45 | |
| 46 | // Set default to PTX 6.0 (CUDA 9.0) |
| 47 | if (PTXVersion == 0) { |
| 48 | PTXVersion = 60; |
| 49 | } |
| 50 | |
| 51 | return *this; |
| 52 | } |
| 53 | |
| 54 | NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, |
| 55 | const std::string &FS, |
| 56 | const NVPTXTargetMachine &TM) |
| 57 | : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), |
| 58 | FullSmVersion(200), SmVersion(getSmVersion()), |
| 59 | TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) { |
| 60 | TSInfo = std::make_unique<NVPTXSelectionDAGInfo>(); |
| 61 | } |
| 62 | |
| 63 | NVPTXSubtarget::~NVPTXSubtarget() = default; |
| 64 | |
| 65 | const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { |
| 66 | return TSInfo.get(); |
| 67 | } |
| 68 | |
| 69 | bool NVPTXSubtarget::allowFP16Math() const { |
| 70 | return hasFP16Math() && NoF16Math == false; |
| 71 | } |
| 72 | |
| 73 | bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const { |
| 74 | if (!hasBF16Math()) |
| 75 | return false; |
| 76 | |
| 77 | switch (Opcode) { |
| 78 | // Several BF16 instructions are available on sm_90 only. |
| 79 | case ISD::FADD: |
| 80 | case ISD::FMUL: |
| 81 | case ISD::FSUB: |
| 82 | case ISD::SELECT: |
| 83 | case ISD::SELECT_CC: |
| 84 | case ISD::SETCC: |
| 85 | case ISD::FEXP2: |
| 86 | case ISD::FCEIL: |
| 87 | case ISD::FFLOOR: |
| 88 | case ISD::FNEARBYINT: |
| 89 | case ISD::FRINT: |
| 90 | case ISD::FROUNDEVEN: |
| 91 | case ISD::FTRUNC: |
| 92 | return getSmVersion() >= 90 && getPTXVersion() >= 78; |
| 93 | // Several BF16 instructions are available on sm_80 only. |
| 94 | case ISD::FMINNUM: |
| 95 | case ISD::FMAXNUM: |
| 96 | case ISD::FMAXNUM_IEEE: |
| 97 | case ISD::FMINNUM_IEEE: |
| 98 | case ISD::FMAXIMUM: |
| 99 | case ISD::FMINIMUM: |
| 100 | return getSmVersion() >= 80 && getPTXVersion() >= 70; |
| 101 | } |
| 102 | return true; |
| 103 | } |
| 104 | |
| 105 | void NVPTXSubtarget::failIfClustersUnsupported( |
| 106 | std::string const &FailureMessage) const { |
| 107 | if (hasClusters()) |
| 108 | return; |
| 109 | |
| 110 | report_fatal_error(reason: formatv( |
| 111 | Fmt: "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. " |
| 112 | "Requires SM >= 90 and PTX >= 78." , |
| 113 | Vals: getFullSmVersion(), Vals: PTXVersion, Vals: FailureMessage)); |
| 114 | } |
| 115 | |