1 | //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file This file contains a DAG scheduling mutation to cluster shader |
10 | /// exports. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AMDGPUExportClustering.h" |
15 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
16 | #include "SIInstrInfo.h" |
17 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
18 | |
19 | using namespace llvm; |
20 | |
21 | namespace { |
22 | |
23 | class ExportClustering : public ScheduleDAGMutation { |
24 | public: |
25 | ExportClustering() = default; |
26 | void apply(ScheduleDAGInstrs *DAG) override; |
27 | }; |
28 | |
29 | static bool isExport(const SUnit &SU) { |
30 | return SIInstrInfo::isEXP(MI: *SU.getInstr()); |
31 | } |
32 | |
33 | static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { |
34 | const MachineInstr *MI = SU->getInstr(); |
35 | unsigned Imm = TII->getNamedOperand(MI: *MI, OpName: AMDGPU::OpName::tgt)->getImm(); |
36 | return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST; |
37 | } |
38 | |
39 | static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, |
40 | unsigned PosCount) { |
41 | if (!PosCount || PosCount == Chain.size()) |
42 | return; |
43 | |
44 | // Position exports should occur as soon as possible in the shader |
45 | // for optimal performance. This moves position exports before |
46 | // other exports while preserving the order within different export |
47 | // types (pos or other). |
48 | SmallVector<SUnit *, 8> Copy(Chain); |
49 | unsigned PosIdx = 0; |
50 | unsigned OtherIdx = PosCount; |
51 | for (SUnit *SU : Copy) { |
52 | if (isPositionExport(TII, SU)) |
53 | Chain[PosIdx++] = SU; |
54 | else |
55 | Chain[OtherIdx++] = SU; |
56 | } |
57 | } |
58 | |
59 | static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { |
60 | SUnit *ChainHead = Exports.front(); |
61 | |
62 | // Now construct cluster from chain by adding new edges. |
63 | for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { |
64 | SUnit *SUa = Exports[Idx]; |
65 | SUnit *SUb = Exports[Idx + 1]; |
66 | |
67 | // Copy all dependencies to the head of the chain to avoid any |
68 | // computation being inserted into the chain. |
69 | for (const SDep &Pred : SUb->Preds) { |
70 | SUnit *PredSU = Pred.getSUnit(); |
71 | if (!isExport(SU: *PredSU) && !Pred.isWeak()) |
72 | DAG->addEdge(SuccSU: ChainHead, PredDep: SDep(PredSU, SDep::Artificial)); |
73 | } |
74 | |
75 | // New barrier edge ordering exports |
76 | DAG->addEdge(SuccSU: SUb, PredDep: SDep(SUa, SDep::Barrier)); |
77 | // Also add cluster edge |
78 | DAG->addEdge(SuccSU: SUb, PredDep: SDep(SUa, SDep::Cluster)); |
79 | } |
80 | } |
81 | |
82 | static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { |
83 | SmallVector<SDep, 2> ToAdd, ToRemove; |
84 | |
85 | for (const SDep &Pred : SU.Preds) { |
86 | SUnit *PredSU = Pred.getSUnit(); |
87 | if (Pred.isBarrier() && isExport(SU: *PredSU)) { |
88 | ToRemove.push_back(Elt: Pred); |
89 | if (isExport(SU)) |
90 | continue; |
91 | |
92 | // If we remove a barrier we need to copy dependencies |
93 | // from the predecessor to maintain order. |
94 | for (const SDep &ExportPred : PredSU->Preds) { |
95 | SUnit *ExportPredSU = ExportPred.getSUnit(); |
96 | if (ExportPred.isBarrier() && !isExport(SU: *ExportPredSU)) |
97 | ToAdd.push_back(Elt: SDep(ExportPredSU, SDep::Barrier)); |
98 | } |
99 | } |
100 | } |
101 | |
102 | for (SDep Pred : ToRemove) |
103 | SU.removePred(D: Pred); |
104 | for (SDep Pred : ToAdd) |
105 | DAG->addEdge(SuccSU: &SU, PredDep: Pred); |
106 | } |
107 | |
108 | void ExportClustering::apply(ScheduleDAGInstrs *DAG) { |
109 | const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); |
110 | |
111 | SmallVector<SUnit *, 8> Chain; |
112 | |
113 | // Pass through DAG gathering a list of exports and removing barrier edges |
114 | // creating dependencies on exports. Freeing exports of successor edges |
115 | // allows more scheduling freedom, and nothing should be order dependent |
116 | // on exports. Edges will be added later to order the exports. |
117 | unsigned PosCount = 0; |
118 | for (SUnit &SU : DAG->SUnits) { |
119 | if (!isExport(SU)) |
120 | continue; |
121 | |
122 | Chain.push_back(Elt: &SU); |
123 | if (isPositionExport(TII, SU: &SU)) |
124 | PosCount++; |
125 | |
126 | removeExportDependencies(DAG, SU); |
127 | |
128 | SmallVector<SDep, 4> Succs(SU.Succs); |
129 | for (SDep Succ : Succs) |
130 | removeExportDependencies(DAG, SU&: *Succ.getSUnit()); |
131 | } |
132 | |
133 | // Apply clustering if there are multiple exports |
134 | if (Chain.size() > 1) { |
135 | sortChain(TII, Chain, PosCount); |
136 | buildCluster(Exports: Chain, DAG); |
137 | } |
138 | } |
139 | |
140 | } // end namespace |
141 | |
142 | namespace llvm { |
143 | |
144 | std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() { |
145 | return std::make_unique<ExportClustering>(); |
146 | } |
147 | |
148 | } // end namespace llvm |
149 | |