SIModeRegister.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/SIModeRegister.cpp]

1	//===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This pass inserts changes to the Mode register settings as required.
10	/// Note that currently it only deals with the Double Precision Floating Point
11	/// rounding mode setting, but is intended to be generic enough to be easily
12	/// expanded.
13	///
14	//===----------------------------------------------------------------------===//
15	//
16	#include "AMDGPU.h"
17	#include "GCNSubtarget.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "llvm/ADT/Statistic.h"
20	#include "llvm/CodeGen/MachineFunctionPass.h"
21	#include <queue>
22
23	#define DEBUG_TYPE "si-mode-register"
24
25	STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
26
27	using namespace llvm;
28
29	struct Status {
30	// Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
31	// known value
32	unsigned Mask = `0`;
33	unsigned Mode = `0`;
34
35	Status() = default;
36
37	Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
38	Mode &= Mask;
39	};
40
41	// merge two status values such that only values that don't conflict are
42	// preserved
43	Status merge(const Status &S) const {
44	return Status ((Mask \| S.Mask), ((Mode & ~S.Mask) \| (S.Mode & S.Mask)));
45	}
46
47	// merge an unknown value by using the unknown value's mask to remove bits
48	// from the result
49	Status mergeUnknown(unsigned newMask) {
50	return Status (Mask & ~newMask, Mode & ~newMask);
51	}
52
53	// intersect two Status values to produce a mode and mask that is a subset
54	// of both values
55	Status intersect(const Status &S) const {
56	unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
57	unsigned NewMode = (Mode & NewMask);
58	return Status (NewMask, NewMode);
59	}
60
61	// produce the delta required to change the Mode to the required Mode
62	Status delta(const Status &S) const {
63	return Status ((S.Mask & (Mode ^ S.Mode)) \| (~Mask & S.Mask), S.Mode);
64	}
65
66	bool operator==(const Status &S) const {
67	return (Mask == S.Mask) && (Mode == S.Mode);
68	}
69
70	bool operator!=(const Status &S) const { return !(*this == S); }
71
72	bool isCompatible(Status &S) {
73	return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
74	}
75
76	bool isCombinable(Status &S) { return !(Mask & S.Mask) \|\| isCompatible(S); }
77	};
78
79	class BlockData {
80	public:
81	// The Status that represents the mode register settings required by the
82	// FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
83	Status Require;
84
85	// The Status that represents the net changes to the Mode register made by
86	// this block, Calculated in Phase 1.
87	Status Change;
88
89	// The Status that represents the mode register settings on exit from this
90	// block. Calculated in Phase 2.
91	Status Exit;
92
93	// The Status that represents the intersection of exit Mode register settings
94	// from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
95	Status Pred;
96
97	// In Phase 1 we record the first instruction that has a mode requirement,
98	// which is used in Phase 3 if we need to insert a mode change.
99	MachineInstr FirstInsertionPoint = nullptr*;
100
101	// A flag to indicate whether an Exit value has been set (we can't tell by
102	// examining the Exit value itself as all values may be valid results).
103	bool ExitSet = false;
104
105	BlockData() = default;
106	};
107
108	namespace {
109
110	class SIModeRegister : public MachineFunctionPass {
111	public:
112	static char ID;
113
114	std::vector<std::unique_ptr<BlockData>> BlockInfo;
115	std::queue<MachineBasicBlock *> Phase2List;
116
117	// The default mode register setting currently only caters for the floating
118	// point double precision rounding mode.
119	// We currently assume the default rounding mode is Round to Nearest
120	// NOTE: this should come from a per function rounding mode setting once such
121	// a setting exists.
122	unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
123	Status DefaultStatus =
124	Status (FP_ROUND_MODE_DP(`0x3`), FP_ROUND_MODE_DP(DefaultMode));
125
126	bool Changed = false;
127
128	public:
129	SIModeRegister() : MachineFunctionPass (ID) {}
130
131	bool runOnMachineFunction(MachineFunction &MF) override;
132
133	void getAnalysisUsage(AnalysisUsage &AU) const override {
134	AU.setPreservesCFG();
135	MachineFunctionPass::getAnalysisUsage(AU);
136	}
137
138	void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
139
140	void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
141
142	void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
143
144	Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
145
146	void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
147	const SIInstrInfo *TII, Status InstrMode);
148	};
149	} // End anonymous namespace.
150
151	INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
152	"Insert required mode register values", false, false)
153
154	char SIModeRegister::ID = `0`;
155
156	char &llvm::SIModeRegisterID = SIModeRegister::ID;
157
158	FunctionPass llvm::createSIModeRegisterPass() { return* new SIModeRegister (); }
159
160	// Determine the Mode register setting required for this instruction.
161	// Instructions which don't use the Mode register return a null Status.
162	// Note this currently only deals with instructions that use the floating point
163	// double precision setting.
164	Status SIModeRegister::getInstructionMode(MachineInstr &MI,
165	const SIInstrInfo *TII) {
166	if (TII->usesFPDPRounding(MI) \|\|
167	MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO \|\|
168	MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
169	switch (MI.getOpcode()) {
170	case AMDGPU::V_INTERP_P1LL_F16:
171	case AMDGPU::V_INTERP_P1LV_F16:
172	case AMDGPU::V_INTERP_P2_F16:
173	// f16 interpolation instructions need double precision round to zero
174	return Status (FP_ROUND_MODE_DP(`3`),
175	FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
176	case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
177	// Replacing the pseudo by a real instruction in place
178	if (TII->getSubtarget().hasTrue16BitInsts()) {
179	MachineBasicBlock &MBB = *MI.getParent();
180	MachineInstrBuilder B(*MBB.getParent(), MI);
181	MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F16_F32_t16_e64));
182	MachineOperand Src0 = MI.getOperand(i: `1`);
183	MI.removeOperand(OpNo: `1`);
184	B.addImm(Val: `0`); // src0_modifiers
185	B.add(MO: Src0); // re-add src0 operand
186	B.addImm(Val: `0`); // clamp
187	B.addImm(Val: `0`); // omod
188	} else
189	MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F16_F32_e32));
190	return Status (FP_ROUND_MODE_DP(`3`),
191	FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF));
192	}
193	case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
194	// Replacing the pseudo by a real instruction in place
195	if (TII->getSubtarget().hasTrue16BitInsts()) {
196	MachineBasicBlock &MBB = *MI.getParent();
197	MachineInstrBuilder B(*MBB.getParent(), MI);
198	MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F16_F32_t16_e64));
199	MachineOperand Src0 = MI.getOperand(i: `1`);
200	MI.removeOperand(OpNo: `1`);
201	B.addImm(Val: `0`); // src0_modifiers
202	B.add(MO: Src0); // re-add src0 operand
203	B.addImm(Val: `0`); // clamp
204	B.addImm(Val: `0`); // omod
205	} else
206	MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F16_F32_e32));
207	return Status (FP_ROUND_MODE_DP(`3`),
208	FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF));
209	}
210	default:
211	return DefaultStatus;
212	}
213	}
214	return Status ();
215	}
216
217	// Insert a setreg instruction to update the Mode register.
218	// It is possible (though unlikely) for an instruction to require a change to
219	// the value of disjoint parts of the Mode register when we don't know the
220	// value of the intervening bits. In that case we need to use more than one
221	// setreg instruction.
222	void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
223	const SIInstrInfo *TII, Status InstrMode) {
224	while (InstrMode.Mask) {
225	unsigned Offset = llvm::countr_zero<unsigned>(Val: InstrMode.Mask);
226	unsigned Width = llvm::countr_one<unsigned>(Value: InstrMode.Mask >> Offset);
227	unsigned Value = (InstrMode.Mode >> Offset) & ((`1` << Width) - `1`);
228	using namespace AMDGPU::Hwreg;
229	BuildMI(BB&: MBB, I: MI, MIMD: nullptr, MCID: TII->get(Opcode: AMDGPU::S_SETREG_IMM32_B32))
230	.addImm(Val: Value)
231	.addImm(Val: HwregEncoding::encode(Values: ID_MODE, Values: Offset, Values: Width));
232	++NumSetregInserted;
233	Changed = true;
234	InstrMode.Mask &= ~(((`1` << Width) - `1`) << Offset);
235	}
236	}
237
238	// In Phase 1 we iterate through the instructions of the block and for each
239	// instruction we get its mode usage. If the instruction uses the Mode register
240	// we:
241	// - update the Change status, which tracks the changes to the Mode register
242	// made by this block
243	// - if this instruction's requirements are compatible with the current setting
244	// of the Mode register we merge the modes
245	// - if it isn't compatible and an InsertionPoint isn't set, then we set the
246	// InsertionPoint to the current instruction, and we remember the current
247	// mode
248	// - if it isn't compatible and InsertionPoint is set we insert a seteg before
249	// that instruction (unless this instruction forms part of the block's
250	// entry requirements in which case the insertion is deferred until Phase 3
251	// when predecessor exit values are known), and move the insertion point to
252	// this instruction
253	// - if this is a setreg instruction we treat it as an incompatible instruction.
254	// This is sub-optimal but avoids some nasty corner cases, and is expected to
255	// occur very rarely.
256	// - on exit we have set the Require, Change, and initial Exit modes.
257	void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
258	const SIInstrInfo *TII) {
259	auto NewInfo = std::make_unique<BlockData>();
260	MachineInstr InsertionPoint = nullptr*;
261	// RequirePending is used to indicate whether we are collecting the initial
262	// requirements for the block, and need to defer the first InsertionPoint to
263	// Phase 3. It is set to false once we have set FirstInsertionPoint, or when
264	// we discover an explicit setreg that means this block doesn't have any
265	// initial requirements.
266	bool RequirePending = true;
267	Status IPChange;
268	for (MachineInstr &MI : MBB) {
269	Status InstrMode = getInstructionMode(MI, TII);
270	if (MI.getOpcode() == AMDGPU::S_SETREG_B32 \|\|
271	MI.getOpcode() == AMDGPU::S_SETREG_B32_mode \|\|
272	MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 \|\|
273	MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
274	// We preserve any explicit mode register setreg instruction we encounter,
275	// as we assume it has been inserted by a higher authority (this is
276	// likely to be a very rare occurrence).
277	unsigned Dst = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::simm16)->getImm();
278	using namespace AMDGPU::Hwreg;
279	auto [Id, Offset, Width] = HwregEncoding::decode(Encoded: Dst);
280	if (Id != ID_MODE)
281	continue;
282
283	unsigned Mask = maskTrailingOnes<unsigned>(N: Width) << Offset;
284
285	// If an InsertionPoint is set we will insert a setreg there.
286	if (InsertionPoint) {
287	insertSetreg(MBB, MI: InsertionPoint, TII, InstrMode: IPChange.delta(S: NewInfo ->Change));
288	InsertionPoint = nullptr;
289	}
290	// If this is an immediate then we know the value being set, but if it is
291	// not an immediate then we treat the modified bits of the mode register
292	// as unknown.
293	if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 \|\|
294	MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
295	unsigned Val = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::imm)->getImm();
296	unsigned Mode = (Val << Offset) & Mask;
297	Status Setreg = Status (Mask, Mode);
298	// If we haven't already set the initial requirements for the block we
299	// don't need to as the requirements start from this explicit setreg.
300	RequirePending = false;
301	NewInfo ->Change = NewInfo ->Change.merge(S: Setreg);
302	} else {
303	NewInfo ->Change = NewInfo ->Change.mergeUnknown(newMask: Mask);
304	}
305	} else if (!NewInfo ->Change.isCompatible(S&: InstrMode)) {
306	// This instruction uses the Mode register and its requirements aren't
307	// compatible with the current mode.
308	if (InsertionPoint) {
309	// If the required mode change cannot be included in the current
310	// InsertionPoint changes, we need a setreg and start a new
311	// InsertionPoint.
312	if (!IPChange.delta(S: NewInfo ->Change).isCombinable(S&: InstrMode)) {
313	if (RequirePending) {
314	// This is the first insertionPoint in the block so we will defer
315	// the insertion of the setreg to Phase 3 where we know whether or
316	// not it is actually needed.
317	NewInfo ->FirstInsertionPoint = InsertionPoint;
318	NewInfo ->Require = NewInfo ->Change;
319	RequirePending = false;
320	} else {
321	insertSetreg(MBB, MI: InsertionPoint, TII,
322	InstrMode: IPChange.delta(S: NewInfo ->Change));
323	IPChange = NewInfo ->Change;
324	}
325	// Set the new InsertionPoint
326	InsertionPoint = &MI;
327	}
328	NewInfo ->Change = NewInfo ->Change.merge(S: InstrMode);
329	} else {
330	// No InsertionPoint is currently set - this is either the first in
331	// the block or we have previously seen an explicit setreg.
332	InsertionPoint = &MI;
333	IPChange = NewInfo ->Change;
334	NewInfo ->Change = NewInfo ->Change.merge(S: InstrMode);
335	}
336	}
337	}
338	if (RequirePending) {
339	// If we haven't yet set the initial requirements for the block we set them
340	// now.
341	NewInfo ->FirstInsertionPoint = InsertionPoint;
342	NewInfo ->Require = NewInfo ->Change;
343	} else if (InsertionPoint) {
344	// We need to insert a setreg at the InsertionPoint
345	insertSetreg(MBB, MI: InsertionPoint, TII, InstrMode: IPChange.delta(S: NewInfo ->Change));
346	}
347	NewInfo ->Exit = NewInfo ->Change;
348	BlockInfo [MBB.getNumber()] = std::move(NewInfo);
349	}
350
351	// In Phase 2 we revisit each block and calculate the common Mode register
352	// value provided by all predecessor blocks. If the Exit value for the block
353	// is changed, then we add the successor blocks to the worklist so that the
354	// exit value is propagated.
355	void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
356	const SIInstrInfo *TII) {
357	bool RevisitRequired = false;
358	bool ExitSet = false;
359	unsigned ThisBlock = MBB.getNumber();
360	if (MBB.pred_empty()) {
361	// There are no predecessors, so use the default starting status.
362	BlockInfo [ThisBlock]->Pred = DefaultStatus;
363	ExitSet = true;
364	} else {
365	// Build a status that is common to all the predecessors by intersecting
366	// all the predecessor exit status values.
367	// Mask bits (which represent the Mode bits with a known value) can only be
368	// added by explicit SETREG instructions or the initial default value -
369	// the intersection process may remove Mask bits.
370	// If we find a predecessor that has not yet had an exit value determined
371	// (this can happen for example if a block is its own predecessor) we defer
372	// use of that value as the Mask will be all zero, and we will revisit this
373	// block again later (unless the only predecessor without an exit value is
374	// this block).
375	MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
376	MachineBasicBlock &PB = (P);
377	unsigned PredBlock = PB.getNumber();
378	if ((ThisBlock == PredBlock) && (std::next(x: P) == E)) {
379	BlockInfo [ThisBlock]->Pred = DefaultStatus;
380	ExitSet = true;
381	} else if (BlockInfo [PredBlock]->ExitSet) {
382	BlockInfo [ThisBlock]->Pred = BlockInfo [PredBlock]->Exit;
383	ExitSet = true;
384	} else if (PredBlock != ThisBlock)
385	RevisitRequired = true;
386
387	for (P = std::next(x: P); P != E; P = std::next(x: P)) {
388	MachineBasicBlock Pred = P;
389	unsigned PredBlock = Pred->getNumber();
390	if (BlockInfo [PredBlock]->ExitSet) {
391	if (BlockInfo [ThisBlock]->ExitSet) {
392	BlockInfo [ThisBlock]->Pred =
393	BlockInfo [ThisBlock]->Pred.intersect(S: BlockInfo [PredBlock]->Exit);
394	} else {
395	BlockInfo [ThisBlock]->Pred = BlockInfo [PredBlock]->Exit;
396	}
397	ExitSet = true;
398	} else if (PredBlock != ThisBlock)
399	RevisitRequired = true;
400	}
401	}
402	Status TmpStatus =
403	BlockInfo [ThisBlock]->Pred.merge(S: BlockInfo [ThisBlock]->Change);
404	if (BlockInfo [ThisBlock]->Exit != TmpStatus) {
405	BlockInfo [ThisBlock]->Exit = TmpStatus;
406	// Add the successors to the work list so we can propagate the changed exit
407	// status.
408	for (MachineBasicBlock *Succ : MBB.successors())
409	Phase2List.push(x: Succ);
410	}
411	BlockInfo [ThisBlock]->ExitSet = ExitSet;
412	if (RevisitRequired)
413	Phase2List.push(x: &MBB);
414	}
415
416	// In Phase 3 we revisit each block and if it has an insertion point defined we
417	// check whether the predecessor mode meets the block's entry requirements. If
418	// not we insert an appropriate setreg instruction to modify the Mode register.
419	void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
420	const SIInstrInfo *TII) {
421	unsigned ThisBlock = MBB.getNumber();
422	if (!BlockInfo [ThisBlock]->Pred.isCompatible(S&: BlockInfo [ThisBlock]->Require)) {
423	Status Delta =
424	BlockInfo [ThisBlock]->Pred.delta(S: BlockInfo [ThisBlock]->Require);
425	if (BlockInfo [ThisBlock]->FirstInsertionPoint)
426	insertSetreg(MBB, MI: BlockInfo [ThisBlock]->FirstInsertionPoint, TII, InstrMode: Delta);
427	else
428	insertSetreg(MBB, MI: &MBB.instr_front(), TII, InstrMode: Delta);
429	}
430	}
431
432	bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
433	// Constrained FP intrinsics are used to support non-default rounding modes.
434	// strictfp attribute is required to mark functions with strict FP semantics
435	// having constrained FP intrinsics. This pass fixes up operations that uses
436	// a non-default rounding mode for non-strictfp functions. But it should not
437	// assume or modify any default rounding modes in case of strictfp functions.
438	const Function &F = MF.getFunction();
439	if (F.hasFnAttribute(Kind: llvm::Attribute::StrictFP))
440	return Changed;
441	BlockInfo.resize(new_size: MF.getNumBlockIDs());
442	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
443	const SIInstrInfo *TII = ST.getInstrInfo();
444
445	// Processing is performed in a number of phases
446
447	// Phase 1 - determine the initial mode required by each block, and add setreg
448	// instructions for intra block requirements.
449	for (MachineBasicBlock &BB : MF)
450	processBlockPhase1(MBB&: BB, TII);
451
452	// Phase 2 - determine the exit mode from each block. We add all blocks to the
453	// list here, but will also add any that need to be revisited during Phase 2
454	// processing.
455	for (MachineBasicBlock &BB : MF)
456	Phase2List.push(x: &BB);
457	while (!Phase2List.empty()) {
458	processBlockPhase2(MBB&: *Phase2List.front(), TII);
459	Phase2List.pop();
460	}
461
462	// Phase 3 - add an initial setreg to each block where the required entry mode
463	// is not satisfied by the exit mode of all its predecessors.
464	for (MachineBasicBlock &BB : MF)
465	processBlockPhase3(MBB&: BB, TII);
466
467	BlockInfo.clear();
468
469	return Changed;
470	}
471

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIModeRegister.cpp