1//===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SIModeRegisterDefaults.h"
10#include "GCNSubtarget.h"
11
12using namespace llvm;
13
14SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
15 const GCNSubtarget &ST) {
16 *this = getDefaultForCallingConv(CC: F.getCallingConv());
17
18 if (ST.hasIEEEMode()) {
19 StringRef IEEEAttr = F.getFnAttribute(Kind: "amdgpu-ieee").getValueAsString();
20 if (!IEEEAttr.empty())
21 IEEE = IEEEAttr == "true";
22 }
23
24 if (ST.hasDX10ClampMode()) {
25 StringRef DX10ClampAttr =
26 F.getFnAttribute(Kind: "amdgpu-dx10-clamp").getValueAsString();
27 if (!DX10ClampAttr.empty())
28 DX10Clamp = DX10ClampAttr == "true";
29 }
30
31 DenormalFPEnv FPEnv = F.getDenormalFPEnv();
32 FP64FP16Denormals = FPEnv.DefaultMode;
33 FP32Denormals = FPEnv.F32Mode;
34}
35
36using namespace AMDGPU;
37
38/// Combine f32 and f64 rounding modes into a combined rounding mode value.
39static constexpr uint32_t getModeRegisterRoundMode(uint32_t HWFP32Val,
40 uint32_t HWFP64Val) {
41 return HWFP32Val << F32FltRoundOffset | HWFP64Val << F64FltRoundOffset;
42}
43
44static constexpr uint64_t encodeFltRoundsTable(uint32_t FltRoundsVal,
45 uint32_t HWF32Val,
46 uint32_t HWF64Val) {
47 uint32_t ModeVal = getModeRegisterRoundMode(HWFP32Val: HWF32Val, HWFP64Val: HWF64Val);
48 if (FltRoundsVal > TowardNegative)
49 FltRoundsVal -= ExtendedFltRoundOffset;
50
51 uint32_t BitIndex = ModeVal << 2;
52 return static_cast<uint64_t>(FltRoundsVal) << BitIndex;
53}
54
55// Encode FLT_ROUNDS value where the two rounding modes are the same and use a
56// standard value
57static constexpr uint64_t
58encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode, uint32_t HWVal) {
59 return encodeFltRoundsTable(FltRoundsVal: FltRoundsMode, HWF32Val: HWVal, HWF64Val: HWVal);
60}
61
62// Convert mode register encoded rounding mode to AMDGPUFltRounds
63static constexpr AMDGPUFltRounds
64decodeIndexFltRoundConversionTable(uint32_t HWMode) {
65 uint32_t TableRead = (FltRoundConversionTable >> (HWMode << 2)) & 0xf;
66 if (TableRead > TowardNegative)
67 TableRead += ExtendedFltRoundOffset;
68 return static_cast<AMDGPUFltRounds>(TableRead);
69}
70
71static constexpr uint32_t HWTowardZero = FP_ROUND_ROUND_TO_ZERO;
72static constexpr uint32_t HWNearestTiesToEven = FP_ROUND_ROUND_TO_NEAREST;
73static constexpr uint32_t HWTowardPositive = FP_ROUND_ROUND_TO_INF;
74static constexpr uint32_t HWTowardNegative = FP_ROUND_ROUND_TO_NEGINF;
75
76const uint64_t AMDGPU::FltRoundConversionTable =
77 encodeFltRoundsTableSame(FltRoundsMode: TowardZeroF32_TowardZeroF64, HWVal: HWTowardZero) |
78 encodeFltRoundsTableSame(FltRoundsMode: NearestTiesToEvenF32_NearestTiesToEvenF64,
79 HWVal: HWNearestTiesToEven) |
80 encodeFltRoundsTableSame(FltRoundsMode: TowardPositiveF32_TowardPositiveF64,
81 HWVal: HWTowardPositive) |
82 encodeFltRoundsTableSame(FltRoundsMode: TowardNegativeF32_TowardNegativeF64,
83 HWVal: HWTowardNegative) |
84
85 encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_NearestTiesToEvenF64, HWF32Val: HWTowardZero,
86 HWF64Val: HWNearestTiesToEven) |
87 encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_TowardPositiveF64, HWF32Val: HWTowardZero,
88 HWF64Val: HWTowardPositive) |
89 encodeFltRoundsTable(FltRoundsVal: TowardZeroF32_TowardNegativeF64, HWF32Val: HWTowardZero,
90 HWF64Val: HWTowardNegative) |
91
92 encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardZeroF64,
93 HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardZero) |
94 encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardPositiveF64,
95 HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardPositive) |
96 encodeFltRoundsTable(FltRoundsVal: NearestTiesToEvenF32_TowardNegativeF64,
97 HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardNegative) |
98
99 encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_TowardZeroF64, HWF32Val: HWTowardPositive,
100 HWF64Val: HWTowardZero) |
101 encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_NearestTiesToEvenF64,
102 HWF32Val: HWTowardPositive, HWF64Val: HWNearestTiesToEven) |
103 encodeFltRoundsTable(FltRoundsVal: TowardPositiveF32_TowardNegativeF64, HWF32Val: HWTowardPositive,
104 HWF64Val: HWTowardNegative) |
105
106 encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_TowardZeroF64, HWF32Val: HWTowardNegative,
107 HWF64Val: HWTowardZero) |
108 encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_NearestTiesToEvenF64,
109 HWF32Val: HWTowardNegative, HWF64Val: HWNearestTiesToEven) |
110 encodeFltRoundsTable(FltRoundsVal: TowardNegativeF32_TowardPositiveF64, HWF32Val: HWTowardNegative,
111 HWF64Val: HWTowardPositive);
112
113// Verify evaluation of FltRoundConversionTable
114
115// If both modes are the same, should return the standard values.
116static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
117 HWFP32Val: HWTowardZero, HWFP64Val: HWTowardZero)) == AMDGPUFltRounds::TowardZero);
118static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
119 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWNearestTiesToEven)) ==
120 AMDGPUFltRounds::NearestTiesToEven);
121static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
122 HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardPositive)) ==
123 AMDGPUFltRounds::TowardPositive);
124static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
125 HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardNegative)) ==
126 AMDGPUFltRounds::TowardNegative);
127
128static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
129 HWFP32Val: HWTowardZero, HWFP64Val: HWNearestTiesToEven)) ==
130 TowardZeroF32_NearestTiesToEvenF64);
131static_assert(decodeIndexFltRoundConversionTable(
132 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardPositive)) ==
133 TowardZeroF32_TowardPositiveF64);
134static_assert(decodeIndexFltRoundConversionTable(
135 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardNegative)) ==
136 TowardZeroF32_TowardNegativeF64);
137
138static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
139 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardZero)) ==
140 NearestTiesToEvenF32_TowardZeroF64);
141static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
142 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardPositive)) ==
143 NearestTiesToEvenF32_TowardPositiveF64);
144static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
145 HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardNegative)) ==
146 NearestTiesToEvenF32_TowardNegativeF64);
147
148static_assert(decodeIndexFltRoundConversionTable(
149 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardZero)) ==
150 TowardPositiveF32_TowardZeroF64);
151static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
152 HWFP32Val: HWTowardPositive, HWFP64Val: HWNearestTiesToEven)) ==
153 TowardPositiveF32_NearestTiesToEvenF64);
154static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
155 HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardNegative)) ==
156 TowardPositiveF32_TowardNegativeF64);
157
158static_assert(decodeIndexFltRoundConversionTable(
159 HWMode: getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardZero)) ==
160 TowardNegativeF32_TowardZeroF64);
161static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
162 HWFP32Val: HWTowardNegative, HWFP64Val: HWNearestTiesToEven)) ==
163 TowardNegativeF32_NearestTiesToEvenF64);
164static_assert(decodeIndexFltRoundConversionTable(HWMode: getModeRegisterRoundMode(
165 HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardPositive)) ==
166 TowardNegativeF32_TowardPositiveF64);
167
168// Decode FLT_ROUNDS into the hardware value where the two rounding modes are
169// the same and use a standard value
170static constexpr uint64_t encodeFltRoundsToHWTableSame(uint32_t HWVal,
171 uint32_t FltRoundsVal) {
172 if (FltRoundsVal > TowardNegative)
173 FltRoundsVal -= ExtendedFltRoundOffset;
174
175 return static_cast<uint64_t>(getModeRegisterRoundMode(HWFP32Val: HWVal, HWFP64Val: HWVal))
176 << (FltRoundsVal << 2);
177}
178
179/// Decode FLT_ROUNDS into the hardware value where the two rounding modes
180/// different and use an extended value.
181static constexpr uint64_t encodeFltRoundsToHWTable(uint32_t HWF32Val,
182 uint32_t HWF64Val,
183 uint32_t FltRoundsVal) {
184 if (FltRoundsVal > TowardNegative)
185 FltRoundsVal -= ExtendedFltRoundOffset;
186 return static_cast<uint64_t>(getModeRegisterRoundMode(HWFP32Val: HWF32Val, HWFP64Val: HWF64Val))
187 << (FltRoundsVal << 2);
188}
189
190const uint64_t AMDGPU::FltRoundToHWConversionTable =
191 encodeFltRoundsToHWTableSame(HWVal: HWTowardZero, FltRoundsVal: TowardZeroF32_TowardZeroF64) |
192 encodeFltRoundsToHWTableSame(HWVal: HWNearestTiesToEven,
193 FltRoundsVal: NearestTiesToEvenF32_NearestTiesToEvenF64) |
194 encodeFltRoundsToHWTableSame(HWVal: HWTowardPositive,
195 FltRoundsVal: TowardPositiveF32_TowardPositiveF64) |
196 encodeFltRoundsToHWTableSame(HWVal: HWTowardNegative,
197 FltRoundsVal: TowardNegativeF32_TowardNegativeF64) |
198
199 encodeFltRoundsToHWTable(HWF32Val: HWTowardZero, HWF64Val: HWNearestTiesToEven,
200 FltRoundsVal: TowardZeroF32_NearestTiesToEvenF64) |
201 encodeFltRoundsToHWTable(HWF32Val: HWTowardZero, HWF64Val: HWTowardPositive,
202 FltRoundsVal: TowardZeroF32_TowardPositiveF64) |
203 encodeFltRoundsToHWTable(HWF32Val: HWTowardZero, HWF64Val: HWTowardNegative,
204 FltRoundsVal: TowardZeroF32_TowardNegativeF64) |
205
206 encodeFltRoundsToHWTable(HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardZero,
207 FltRoundsVal: NearestTiesToEvenF32_TowardZeroF64) |
208 encodeFltRoundsToHWTable(HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardPositive,
209 FltRoundsVal: NearestTiesToEvenF32_TowardPositiveF64) |
210 encodeFltRoundsToHWTable(HWF32Val: HWNearestTiesToEven, HWF64Val: HWTowardNegative,
211 FltRoundsVal: NearestTiesToEvenF32_TowardNegativeF64) |
212
213 encodeFltRoundsToHWTable(HWF32Val: HWTowardPositive, HWF64Val: HWTowardZero,
214 FltRoundsVal: TowardPositiveF32_TowardZeroF64) |
215 encodeFltRoundsToHWTable(HWF32Val: HWTowardPositive, HWF64Val: HWNearestTiesToEven,
216 FltRoundsVal: TowardPositiveF32_NearestTiesToEvenF64) |
217 encodeFltRoundsToHWTable(HWF32Val: HWTowardPositive, HWF64Val: HWTowardNegative,
218 FltRoundsVal: TowardPositiveF32_TowardNegativeF64) |
219
220 encodeFltRoundsToHWTable(HWF32Val: HWTowardNegative, HWF64Val: HWTowardZero,
221 FltRoundsVal: TowardNegativeF32_TowardZeroF64) |
222 encodeFltRoundsToHWTable(HWF32Val: HWTowardNegative, HWF64Val: HWNearestTiesToEven,
223 FltRoundsVal: TowardNegativeF32_NearestTiesToEvenF64) |
224 encodeFltRoundsToHWTable(HWF32Val: HWTowardNegative, HWF64Val: HWTowardPositive,
225 FltRoundsVal: TowardNegativeF32_TowardPositiveF64);
226
227/// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
228static constexpr uint32_t
229decodeFltRoundToHWConversionTable(uint64_t FltRoundToHWConversionTable,
230 uint32_t FltRounds) {
231 uint32_t IndexVal = FltRounds;
232 if (IndexVal > TowardNegative)
233 IndexVal -= ExtendedFltRoundOffset;
234 return (FltRoundToHWConversionTable >> (IndexVal << 2)) & 0xf;
235}
236
237uint32_t AMDGPU::decodeFltRoundToHWConversionTable(uint32_t FltRounds) {
238 return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
239 FltRounds);
240}
241
242static constexpr uint32_t decodeFltRoundToHW(uint32_t FltRounds) {
243 return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
244 FltRounds);
245}
246
247// Verify evaluation of FltRoundToHWConversionTable
248
249static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::TowardZero) ==
250 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardZero));
251static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::NearestTiesToEven) ==
252 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven,
253 HWFP64Val: HWNearestTiesToEven));
254static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::TowardPositive) ==
255 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardPositive));
256static_assert(decodeFltRoundToHW(FltRounds: AMDGPUFltRounds::TowardNegative) ==
257 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardNegative));
258
259static_assert(decodeFltRoundToHW(FltRounds: NearestTiesToEvenF32_TowardPositiveF64) ==
260 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardPositive));
261static_assert(decodeFltRoundToHW(FltRounds: NearestTiesToEvenF32_TowardNegativeF64) ==
262 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardNegative));
263static_assert(decodeFltRoundToHW(FltRounds: NearestTiesToEvenF32_TowardZeroF64) ==
264 getModeRegisterRoundMode(HWFP32Val: HWNearestTiesToEven, HWFP64Val: HWTowardZero));
265
266static_assert(decodeFltRoundToHW(FltRounds: TowardPositiveF32_NearestTiesToEvenF64) ==
267 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWNearestTiesToEven));
268static_assert(decodeFltRoundToHW(FltRounds: TowardPositiveF32_TowardNegativeF64) ==
269 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardNegative));
270static_assert(decodeFltRoundToHW(FltRounds: TowardPositiveF32_TowardZeroF64) ==
271 getModeRegisterRoundMode(HWFP32Val: HWTowardPositive, HWFP64Val: HWTowardZero));
272
273static_assert(decodeFltRoundToHW(FltRounds: TowardNegativeF32_NearestTiesToEvenF64) ==
274 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWNearestTiesToEven));
275static_assert(decodeFltRoundToHW(FltRounds: TowardNegativeF32_TowardPositiveF64) ==
276 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardPositive));
277static_assert(decodeFltRoundToHW(FltRounds: TowardNegativeF32_TowardZeroF64) ==
278 getModeRegisterRoundMode(HWFP32Val: HWTowardNegative, HWFP64Val: HWTowardZero));
279
280static_assert(decodeFltRoundToHW(FltRounds: TowardZeroF32_NearestTiesToEvenF64) ==
281 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWNearestTiesToEven));
282static_assert(decodeFltRoundToHW(FltRounds: TowardZeroF32_TowardPositiveF64) ==
283 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardPositive));
284static_assert(decodeFltRoundToHW(FltRounds: TowardZeroF32_TowardNegativeF64) ==
285 getModeRegisterRoundMode(HWFP32Val: HWTowardZero, HWFP64Val: HWTowardNegative));
286