1//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the NVPTX specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXSubtarget.h"
14#include "NVPTXSelectionDAGInfo.h"
15#include "NVPTXTargetMachine.h"
16#include "llvm/Support/ErrorHandling.h"
17#include "llvm/Support/FormatVariadic.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "nvptx-subtarget"
22
23#define GET_SUBTARGETINFO_ENUM
24#define GET_SUBTARGETINFO_TARGET_DESC
25#define GET_SUBTARGETINFO_CTOR
26#include "NVPTXGenSubtargetInfo.inc"
27
28static cl::opt<bool>
29 NoF16Math("nvptx-no-f16-math", cl::Hidden,
30 cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
31 cl::init(Val: false));
32
33static cl::opt<bool> NoF32x2("nvptx-no-f32x2", cl::Hidden,
34 cl::desc("NVPTX Specific: Disable generation of "
35 "f32x2 instructions and registers."),
36 cl::init(Val: false));
37
38// Pin the vtable to this file.
39void NVPTXSubtarget::anchor() {}
40
41NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
42 StringRef FS) {
43 TargetName = std::string(CPU);
44
45 ParseSubtargetFeatures(CPU: getTargetName(), /*TuneCPU=*/getTargetName(), FS);
46
47 // Re-map SM version numbers, SmVersion carries the regular SMs which do
48 // have relative order, while FullSmVersion allows distinguishing sm_90 from
49 // sm_90a, which would *not* be a subset of sm_91.
50 SmVersion = getSmVersion();
51
52 // Set default to PTX 6.0 (CUDA 9.0)
53 if (PTXVersion == 0) {
54 PTXVersion = 60;
55 }
56
57 return *this;
58}
59
60NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
61 const std::string &FS,
62 const NVPTXTargetMachine &TM)
63 : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
64 FullSmVersion(200), SmVersion(getSmVersion()),
65 InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {
66 TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
67}
68
69NVPTXSubtarget::~NVPTXSubtarget() = default;
70
71const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
72 return TSInfo.get();
73}
74
75bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
76 ArrayRef<unsigned> SMVersions) const {
77 unsigned PTXVer = getPTXVersion();
78 if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion)
79 return false;
80
81 unsigned SMVer = getSmVersion();
82 return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) {
83 // sm_101 is a different family, never group it with sm_10x.
84 if (SMVer == 101 || SM == 101)
85 return SMVer == SM &&
86 // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
87 // supported.
88 !(PTXVer >= 90 && SMVer == 101);
89
90 return getSmFamilyVersion() == SM / 10 && SMVer >= SM;
91 });
92}
93
94bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
95 ArrayRef<unsigned> SMVersions) const {
96 unsigned PTXVer = getPTXVersion();
97 if (!hasArchAccelFeatures() || PTXVer < PTXVersion)
98 return false;
99
100 unsigned SMVer = getSmVersion();
101 return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) {
102 return SMVer == SM &&
103 // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
104 // supported.
105 !(PTXVer >= 90 && SMVer == 101);
106 });
107}
108
109bool NVPTXSubtarget::allowFP16Math() const {
110 return hasFP16Math() && NoF16Math == false;
111}
112
113bool NVPTXSubtarget::hasF32x2Instructions() const {
114 return SmVersion >= 100 && PTXVersion >= 86 && !NoF32x2;
115}
116
117bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const {
118 if (!hasBF16Math())
119 return false;
120
121 switch (Opcode) {
122 // Several BF16 instructions are available on sm_90 only.
123 case ISD::FADD:
124 case ISD::FMUL:
125 case ISD::FSUB:
126 case ISD::SELECT:
127 case ISD::SELECT_CC:
128 case ISD::SETCC:
129 case ISD::FEXP2:
130 case ISD::FCEIL:
131 case ISD::FFLOOR:
132 case ISD::FNEARBYINT:
133 case ISD::FRINT:
134 case ISD::FROUNDEVEN:
135 case ISD::FTRUNC:
136 return getSmVersion() >= 90 && getPTXVersion() >= 78;
137 // Several BF16 instructions are available on sm_80 only.
138 case ISD::FMINNUM:
139 case ISD::FMAXNUM:
140 case ISD::FMAXNUM_IEEE:
141 case ISD::FMINNUM_IEEE:
142 case ISD::FMAXIMUM:
143 case ISD::FMINIMUM:
144 return getSmVersion() >= 80 && getPTXVersion() >= 70;
145 }
146 return true;
147}
148
149void NVPTXSubtarget::failIfClustersUnsupported(
150 std::string const &FailureMessage) const {
151 if (hasClusters())
152 return;
153
154 report_fatal_error(reason: formatv(
155 Fmt: "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. "
156 "Requires SM >= 90 and PTX >= 78.",
157 Vals: getFullSmVersion(), Vals: PTXVersion, Vals: FailureMessage));
158}
159