1//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a DAG scheduling mutation to cluster shader
10/// exports.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUExportClustering.h"
15#include "SIInstrInfo.h"
16#include "llvm/CodeGen/ScheduleDAGInstrs.h"
17
18using namespace llvm;
19
20namespace {
21
22class ExportClustering : public ScheduleDAGMutation {
23public:
24 ExportClustering() = default;
25 void apply(ScheduleDAGInstrs *DAG) override;
26};
27
28static bool isExport(const SUnit &SU) {
29 return SIInstrInfo::isEXP(MI: *SU.getInstr());
30}
31
32static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
33 const MachineInstr *MI = SU->getInstr();
34 unsigned Imm = TII->getNamedOperand(MI: *MI, OperandName: AMDGPU::OpName::tgt)->getImm();
35 return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
36}
37
38static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
39 unsigned PosCount) {
40 if (!PosCount || PosCount == Chain.size())
41 return;
42
43 // Position exports should occur as soon as possible in the shader
44 // for optimal performance. This moves position exports before
45 // other exports while preserving the order within different export
46 // types (pos or other).
47 SmallVector<SUnit *, 8> Copy(Chain);
48 unsigned PosIdx = 0;
49 unsigned OtherIdx = PosCount;
50 for (SUnit *SU : Copy) {
51 if (isPositionExport(TII, SU))
52 Chain[PosIdx++] = SU;
53 else
54 Chain[OtherIdx++] = SU;
55 }
56}
57
58static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
59 SUnit *ChainHead = Exports.front();
60
61 // Now construct cluster from chain by adding new edges.
62 for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
63 SUnit *SUa = Exports[Idx];
64 SUnit *SUb = Exports[Idx + 1];
65
66 // Copy all dependencies to the head of the chain to avoid any
67 // computation being inserted into the chain.
68 for (const SDep &Pred : SUb->Preds) {
69 SUnit *PredSU = Pred.getSUnit();
70 if (!isExport(SU: *PredSU) && !Pred.isWeak())
71 DAG->addEdge(SuccSU: ChainHead, PredDep: SDep(PredSU, SDep::Artificial));
72 }
73
74 // New barrier edge ordering exports
75 DAG->addEdge(SuccSU: SUb, PredDep: SDep(SUa, SDep::Barrier));
76 // Also add cluster edge
77 DAG->addEdge(SuccSU: SUb, PredDep: SDep(SUa, SDep::Cluster));
78 }
79}
80
81static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
82 SmallVector<SDep, 2> ToAdd, ToRemove;
83
84 for (const SDep &Pred : SU.Preds) {
85 SUnit *PredSU = Pred.getSUnit();
86 if (Pred.isBarrier() && isExport(SU: *PredSU)) {
87 ToRemove.push_back(Elt: Pred);
88 if (isExport(SU))
89 continue;
90
91 // If we remove a barrier we need to copy dependencies
92 // from the predecessor to maintain order.
93 for (const SDep &ExportPred : PredSU->Preds) {
94 SUnit *ExportPredSU = ExportPred.getSUnit();
95 if (ExportPred.isBarrier() && !isExport(SU: *ExportPredSU))
96 ToAdd.push_back(Elt: SDep(ExportPredSU, SDep::Barrier));
97 }
98 }
99 }
100
101 for (SDep Pred : ToRemove)
102 SU.removePred(D: Pred);
103 for (SDep Pred : ToAdd)
104 DAG->addEdge(SuccSU: &SU, PredDep: Pred);
105}
106
107void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
108 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
109
110 SmallVector<SUnit *, 8> Chain;
111
112 // Pass through DAG gathering a list of exports and removing barrier edges
113 // creating dependencies on exports. Freeing exports of successor edges
114 // allows more scheduling freedom, and nothing should be order dependent
115 // on exports. Edges will be added later to order the exports.
116 unsigned PosCount = 0;
117 for (SUnit &SU : DAG->SUnits) {
118 if (!isExport(SU))
119 continue;
120
121 Chain.push_back(Elt: &SU);
122 if (isPositionExport(TII, SU: &SU))
123 PosCount++;
124
125 removeExportDependencies(DAG, SU);
126
127 SmallVector<SDep, 4> Succs(SU.Succs);
128 for (SDep Succ : Succs)
129 removeExportDependencies(DAG, SU&: *Succ.getSUnit());
130 }
131
132 // Apply clustering if there are multiple exports
133 if (Chain.size() > 1) {
134 sortChain(TII, Chain, PosCount);
135 buildCluster(Exports: Chain, DAG);
136 }
137}
138
139} // end namespace
140
141std::unique_ptr<ScheduleDAGMutation>
142llvm::createAMDGPUExportClusteringDAGMutation() {
143 return std::make_unique<ExportClustering>();
144}
145