1//===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SIModeRegisterDefaults.h"
10#include "GCNSubtarget.h"
11
12using namespace llvm;
13
14SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
15 const GCNSubtarget &ST) {
16 *this = getDefaultForCallingConv(CC: F.getCallingConv());
17
18 if (ST.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode)) {
19 StringRef IEEEAttr = F.getFnAttribute(Kind: "amdgpu-ieee").getValueAsString();
20 if (!IEEEAttr.empty())
21 IEEE = IEEEAttr == "true";
22
23 StringRef DX10ClampAttr =
24 F.getFnAttribute(Kind: "amdgpu-dx10-clamp").getValueAsString();
25 if (!DX10ClampAttr.empty())
26 DX10Clamp = DX10ClampAttr == "true";
27 }
28
29 DenormalFPEnv FPEnv = F.getDenormalFPEnv();
30 FP64FP16Denormals = FPEnv.DefaultMode;
31 FP32Denormals = FPEnv.F32Mode;
32}
33
34using namespace AMDGPU;
35
36/// Combine f32 and f64 rounding modes into a combined rounding mode value.
37static constexpr uint32_t getModeRegisterRoundMode(uint32_t HWFP32Val,
38 uint32_t HWFP64Val) {
39 return HWFP32Val << F32FltRoundOffset | HWFP64Val << F64FltRoundOffset;
40}
41
42static constexpr uint64_t encodeFltRoundsTable(uint32_t FltRoundsVal,
43 uint32_t HWF32Val,
44 uint32_t HWF64Val) {
45 uint32_t ModeVal = getModeRegisterRoundMode(HWFP32Val: HWF32Val, HWFP64Val: HWF64Val);
46 if (FltRoundsVal > TowardNegative)
47 FltRoundsVal -= ExtendedFltRoundOffset;
48
49 uint32_t BitIndex = ModeVal << 2;
50 return static_cast<uint64_t>(FltRoundsVal) << BitIndex;
51}
52
53// Encode FLT_ROUNDS value where the two rounding modes are the same and use a
54// standard value
55static constexpr uint64_t
56encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode, uint32_t HWVal) {
57 return encodeFltRoundsTable(FltRoundsVal: FltRoundsMode, HWF32Val: HWVal, HWF64Val: HWVal);
58}
59
60// Convert mode register encoded rounding mode to AMDGPUFltRounds
61static constexpr AMDGPUFltRounds
62decodeIndexFltRoundConversionTable(uint32_t HWMode) {
63 uint32_t TableRead = (FltRoundConversionTable >> (HWMode << 2)) & 0xf;
64 if (TableRead > TowardNegative)
65 TableRead += ExtendedFltRoundOffset;
66 return static_cast<AMDGPUFltRounds>(TableRead);
67}
68
69static constexpr uint32_t HWTowardZero = FP_ROUND_ROUND_TO_ZERO;
70static constexpr uint32_t HWNearestTiesToEven = FP_ROUND_ROUND_TO_NEAREST;
71static constexpr uint32_t HWTowardPositive = FP_ROUND_ROUND_TO_INF;
72static constexpr uint32_t HWTowardNegative = FP_ROUND_ROUND_TO_NEGINF;
73
74const uint64_t AMDGPU::FltRoundConversionTable =
75 encodeFltRoundsTableSame(FltRoundsMode: TowardZeroF32_TowardZeroF64, HWVal: HWTowardZero) |
76 encodeFltRoundsTableSame(FltRoundsMode: NearestTiesToEvenF32_NearestTiesToEvenF64,
77 HWVal: HWNearestTiesToEven) |
78 encodeFltRoundsTableSame(FltRoundsMode: TowardPositiveF32_TowardPositiveF64,
79 HWVal: HWTowardPositive) |
80 encodeFltRoundsTableSame(FltRoundsMode: TowardNegativeF32_TowardNegativeF64,
81 HWVal: HWTowardNegative) |
82
83 encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_NearestTiesToEvenF64, HWF32Val: HWTowardZero,
84 HWF64Val: HWNearestTiesToEven) |
85 encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_TowardPositiveF64, HWF32Val: HWTowardZero,
86 HWF64Val: HWTowardPositive) |
87 encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_TowardNegativeF64, HWF32Val: HWTowardZero,
88 HWF64Val: HWTowardNegative) |
89
90 encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardZeroF64,
91 HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardZero) |
92 encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardPositiveF64,
93 HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardPositive) |
94 encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardNegativeF64,
95 HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardNegative) |
96
97 encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_TowardZeroF64, HWF32Val: HWTowardPositive,
98 HWF64Val: HWTowardZero) |
99 encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_NearestTiesToEvenF64,
100 HWF32Val: HWTowardPositive, HWF64Val: HWNearestTiesToEven) |
101 encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_TowardNegativeF64, HWF32Val: HWTowardPositive,
102 HWF64Val: HWTowardNegative) |
103
104 encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_TowardZeroF64, HWF32Val: HWTowardNegative,
105 HWF64Val: HWTowardZero) |
106 encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_NearestTiesToEvenF64,
107 HWF32Val: HWTowardNegative, HWF64Val: HWNearestTiesToEven) |
108 encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_TowardPositiveF64, HWF32Val: HWTowardNegative,
109 HWF64Val: HWTowardPositive);
110
111// Verify evaluation of FltRoundConversionTable
112
113// If both modes are the same, should return the standard values.
114static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
115 HWFP32Val: HWTowardZero, HWFP64Val: HWTowardZero)) == AMDGPUFltRounds::TowardZero);
116static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
117 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWNearestTiesToEven)) ==
118 AMDGPUFltRounds::NearestTiesToEven);
119static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
120 HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardPositive)) ==
121 AMDGPUFltRounds::TowardPositive);
122static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
123 HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardNegative)) ==
124 AMDGPUFltRounds::TowardNegative);
125
126static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
127 HWFP32Val: HWTowardZero, HWFP64Val: HWNearestTiesToEven)) ==
128 TowardZeroF32_NearestTiesToEvenF64);
129static_assert(decodeIndexFltRoundConversionTable(
130 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardPositive)) ==
131 TowardZeroF32_TowardPositiveF64);
132static_assert(decodeIndexFltRoundConversionTable(
133 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardNegative)) ==
134 TowardZeroF32_TowardNegativeF64);
135
136static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
137 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardZero)) ==
138 NearestTiesToEvenF32_TowardZeroF64);
139static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
140 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardPositive)) ==
141 NearestTiesToEvenF32_TowardPositiveF64);
142static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
143 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardNegative)) ==
144 NearestTiesToEvenF32_TowardNegativeF64);
145
146static_assert(decodeIndexFltRoundConversionTable(
147 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardZero)) ==
148 TowardPositiveF32_TowardZeroF64);
149static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
150 HWFP32Val: HWTowardPositive, HWFP64Val: HWNearestTiesToEven)) ==
151 TowardPositiveF32_NearestTiesToEvenF64);
152static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
153 HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardNegative)) ==
154 TowardPositiveF32_TowardNegativeF64);
155
156static_assert(decodeIndexFltRoundConversionTable(
157 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardZero)) ==
158 TowardNegativeF32_TowardZeroF64);
159static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
160 HWFP32Val: HWTowardNegative, HWFP64Val: HWNearestTiesToEven)) ==
161 TowardNegativeF32_NearestTiesToEvenF64);
162static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
163 HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardPositive)) ==
164 TowardNegativeF32_TowardPositiveF64);
165
166// Decode FLT_ROUNDS into the hardware value where the two rounding modes are
167// the same and use a standard value
168static constexpr uint64_t encodeFltRoundsToHWTableSame(uint32_t HWVal,
169 uint32_t FltRoundsVal) {
170 if (FltRoundsVal > TowardNegative)
171 FltRoundsVal -= ExtendedFltRoundOffset;
172
173 return static_cast<uint64_t>(getModeRegisterRoundMode(HWFP32Val: HWVal, HWFP64Val: HWVal))
174 << (FltRoundsVal << 2);
175}
176
177/// Decode FLT_ROUNDS into the hardware value where the two rounding modes
178/// different and use an extended value.
179static constexpr uint64_t encodeFltRoundsToHWTable(uint32_t HWF32Val,
180 uint32_t HWF64Val,
181 uint32_t FltRoundsVal) {
182 if (FltRoundsVal > TowardNegative)
183 FltRoundsVal -= ExtendedFltRoundOffset;
184 return static_cast<uint64_t>(getModeRegisterRoundMode(HWFP32Val: HWF32Val, HWFP64Val: HWF64Val))
185 << (FltRoundsVal << 2);
186}
187
188const uint64_t AMDGPU::FltRoundToHWConversionTable =
189 encodeFltRoundsToHWTableSame(HWVal: HWTowardZero, FltRoundsVal: TowardZeroF32_TowardZeroF64) |
190 encodeFltRoundsToHWTableSame(HWVal: HWNearestTiesToEven,
191 FltRoundsVal: NearestTiesToEvenF32_NearestTiesToEvenF64) |
192 encodeFltRoundsToHWTableSame(HWVal: HWTowardPositive,
193 FltRoundsVal: TowardPositiveF32_TowardPositiveF64) |
194 encodeFltRoundsToHWTableSame(HWVal: HWTowardNegative,
195 FltRoundsVal: TowardNegativeF32_TowardNegativeF64) |
196
197 encodeFltRoundsToHWTable(HWF32Val: HWTowardZero, HWF64Val: HWNearestTiesToEven,
198 FltRoundsVal: TowardZeroF32_NearestTiesToEvenF64) |
199 encodeFltRoundsToHWTable(HWF32Val: HWTowardZero, HWF64Val: HWTowardPositive,
200 FltRoundsVal: TowardZeroF32_TowardPositiveF64) |
201 encodeFltRoundsToHWTable(HWF32Val: HWTowardZero, HWF64Val: HWTowardNegative,
202 FltRoundsVal: TowardZeroF32_TowardNegativeF64) |
203
204 encodeFltRoundsToHWTable(HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardZero,
205 FltRoundsVal: NearestTiesToEvenF32_TowardZeroF64) |
206 encodeFltRoundsToHWTable(HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardPositive,
207 FltRoundsVal: NearestTiesToEvenF32_TowardPositiveF64) |
208 encodeFltRoundsToHWTable(HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardNegative,
209 FltRoundsVal: NearestTiesToEvenF32_TowardNegativeF64) |
210
211 encodeFltRoundsToHWTable(HWF32Val: HWTowardPositive, HWF64Val: HWTowardZero,
212 FltRoundsVal: TowardPositiveF32_TowardZeroF64) |
213 encodeFltRoundsToHWTable(HWF32Val: HWTowardPositive, HWF64Val: HWNearestTiesToEven,
214 FltRoundsVal: TowardPositiveF32_NearestTiesToEvenF64) |
215 encodeFltRoundsToHWTable(HWF32Val: HWTowardPositive, HWF64Val: HWTowardNegative,
216 FltRoundsVal: TowardPositiveF32_TowardNegativeF64) |
217
218 encodeFltRoundsToHWTable(HWF32Val: HWTowardNegative, HWF64Val: HWTowardZero,
219 FltRoundsVal: TowardNegativeF32_TowardZeroF64) |
220 encodeFltRoundsToHWTable(HWF32Val: HWTowardNegative, HWF64Val: HWNearestTiesToEven,
221 FltRoundsVal: TowardNegativeF32_NearestTiesToEvenF64) |
222 encodeFltRoundsToHWTable(HWF32Val: HWTowardNegative, HWF64Val: HWTowardPositive,
223 FltRoundsVal: TowardNegativeF32_TowardPositiveF64);
224
225/// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
226static constexpr uint32_t
227decodeFltRoundToHWConversionTable(uint64_t FltRoundToHWConversionTable,
228 uint32_t FltRounds) {
229 uint32_t IndexVal = FltRounds;
230 if (IndexVal > TowardNegative)
231 IndexVal -= ExtendedFltRoundOffset;
232 return (FltRoundToHWConversionTable >> (IndexVal << 2)) & 0xf;
233}
234
235uint32_t AMDGPU::decodeFltRoundToHWConversionTable(uint32_t FltRounds) {
236 return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
237 FltRounds);
238}
239
240static constexpr uint32_t decodeFltRoundToHW(uint32_t FltRounds) {
241 return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
242 FltRounds);
243}
244
245// Verify evaluation of FltRoundToHWConversionTable
246
247static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::TowardZero) ==
248 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardZero));
249static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::NearestTiesToEven) ==
250 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven,
251 HWFP64Val: HWNearestTiesToEven));
252static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::TowardPositive) ==
253 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardPositive));
254static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::TowardNegative) ==
255 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardNegative));
256
257static_assert(decodeFltRoundToHW(FltRounds: NearestTiesToEvenF32_TowardPositiveF64) ==
258 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardPositive));
259static_assert(decodeFltRoundToHW(FltRounds: NearestTiesToEvenF32_TowardNegativeF64) ==
260 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardNegative));
261static_assert(decodeFltRoundToHW(FltRounds: NearestTiesToEvenF32_TowardZeroF64) ==
262 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardZero));
263
264static_assert(decodeFltRoundToHW(FltRounds: TowardPositiveF32_NearestTiesToEvenF64) ==
265 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWNearestTiesToEven));
266static_assert(decodeFltRoundToHW(FltRounds: TowardPositiveF32_TowardNegativeF64) ==
267 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardNegative));
268static_assert(decodeFltRoundToHW(FltRounds: TowardPositiveF32_TowardZeroF64) ==
269 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardZero));
270
271static_assert(decodeFltRoundToHW(FltRounds: TowardNegativeF32_NearestTiesToEvenF64) ==
272 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWNearestTiesToEven));
273static_assert(decodeFltRoundToHW(FltRounds: TowardNegativeF32_TowardPositiveF64) ==
274 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardPositive));
275static_assert(decodeFltRoundToHW(FltRounds: TowardNegativeF32_TowardZeroF64) ==
276 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardZero));
277
278static_assert(decodeFltRoundToHW(FltRounds: TowardZeroF32_NearestTiesToEvenF64) ==
279 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWNearestTiesToEven));
280static_assert(decodeFltRoundToHW(FltRounds: TowardZeroF32_TowardPositiveF64) ==
281 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardPositive));
282static_assert(decodeFltRoundToHW(FltRounds: TowardZeroF32_TowardNegativeF64) ==
283 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardNegative));
284