1 | //===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the NVPTX specific subclass of TargetSubtarget. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "NVPTXSubtarget.h" |
14 | #include "NVPTXSelectionDAGInfo.h" |
15 | #include "NVPTXTargetMachine.h" |
16 | #include "llvm/Support/ErrorHandling.h" |
17 | #include "llvm/Support/FormatVariadic.h" |
18 | |
19 | using namespace llvm; |
20 | |
21 | #define DEBUG_TYPE "nvptx-subtarget" |
22 | |
23 | #define GET_SUBTARGETINFO_ENUM |
24 | #define GET_SUBTARGETINFO_TARGET_DESC |
25 | #define GET_SUBTARGETINFO_CTOR |
26 | #include "NVPTXGenSubtargetInfo.inc" |
27 | |
28 | static cl::opt<bool> |
29 | NoF16Math("nvptx-no-f16-math" , cl::Hidden, |
30 | cl::desc("NVPTX Specific: Disable generation of f16 math ops." ), |
31 | cl::init(Val: false)); |
32 | // Pin the vtable to this file. |
33 | void NVPTXSubtarget::anchor() {} |
34 | |
35 | NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, |
36 | StringRef FS) { |
37 | TargetName = std::string(CPU); |
38 | |
39 | ParseSubtargetFeatures(CPU: getTargetName(), /*TuneCPU=*/getTargetName(), FS); |
40 | |
41 | // Re-map SM version numbers, SmVersion carries the regular SMs which do |
42 | // have relative order, while FullSmVersion allows distinguishing sm_90 from |
43 | // sm_90a, which would *not* be a subset of sm_91. |
44 | SmVersion = getSmVersion(); |
45 | |
46 | // Set default to PTX 6.0 (CUDA 9.0) |
47 | if (PTXVersion == 0) { |
48 | PTXVersion = 60; |
49 | } |
50 | |
51 | return *this; |
52 | } |
53 | |
54 | NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, |
55 | const std::string &FS, |
56 | const NVPTXTargetMachine &TM) |
57 | : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), |
58 | FullSmVersion(200), SmVersion(getSmVersion()), |
59 | TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) { |
60 | TSInfo = std::make_unique<NVPTXSelectionDAGInfo>(); |
61 | } |
62 | |
63 | NVPTXSubtarget::~NVPTXSubtarget() = default; |
64 | |
65 | const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { |
66 | return TSInfo.get(); |
67 | } |
68 | |
69 | bool NVPTXSubtarget::allowFP16Math() const { |
70 | return hasFP16Math() && NoF16Math == false; |
71 | } |
72 | |
73 | bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const { |
74 | if (!hasBF16Math()) |
75 | return false; |
76 | |
77 | switch (Opcode) { |
78 | // Several BF16 instructions are available on sm_90 only. |
79 | case ISD::FADD: |
80 | case ISD::FMUL: |
81 | case ISD::FSUB: |
82 | case ISD::SELECT: |
83 | case ISD::SELECT_CC: |
84 | case ISD::SETCC: |
85 | case ISD::FEXP2: |
86 | case ISD::FCEIL: |
87 | case ISD::FFLOOR: |
88 | case ISD::FNEARBYINT: |
89 | case ISD::FRINT: |
90 | case ISD::FROUNDEVEN: |
91 | case ISD::FTRUNC: |
92 | return getSmVersion() >= 90 && getPTXVersion() >= 78; |
93 | // Several BF16 instructions are available on sm_80 only. |
94 | case ISD::FMINNUM: |
95 | case ISD::FMAXNUM: |
96 | case ISD::FMAXNUM_IEEE: |
97 | case ISD::FMINNUM_IEEE: |
98 | case ISD::FMAXIMUM: |
99 | case ISD::FMINIMUM: |
100 | return getSmVersion() >= 80 && getPTXVersion() >= 70; |
101 | } |
102 | return true; |
103 | } |
104 | |
105 | void NVPTXSubtarget::failIfClustersUnsupported( |
106 | std::string const &FailureMessage) const { |
107 | if (hasClusters()) |
108 | return; |
109 | |
110 | report_fatal_error(reason: formatv( |
111 | Fmt: "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. " |
112 | "Requires SM >= 90 and PTX >= 78." , |
113 | Vals: getFullSmVersion(), Vals: PTXVersion, Vals: FailureMessage)); |
114 | } |
115 | |