| 1 | //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file This file contains a DAG scheduling mutation to cluster shader |
| 10 | /// exports. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "AMDGPUExportClustering.h" |
| 15 | #include "SIInstrInfo.h" |
| 16 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
| 17 | |
| 18 | using namespace llvm; |
| 19 | |
| 20 | namespace { |
| 21 | |
| 22 | class ExportClustering : public ScheduleDAGMutation { |
| 23 | public: |
| 24 | ExportClustering() = default; |
| 25 | void apply(ScheduleDAGInstrs *DAG) override; |
| 26 | }; |
| 27 | |
| 28 | static bool isExport(const SUnit &SU) { |
| 29 | return SIInstrInfo::isEXP(MI: *SU.getInstr()); |
| 30 | } |
| 31 | |
| 32 | static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { |
| 33 | const MachineInstr *MI = SU->getInstr(); |
| 34 | unsigned Imm = TII->getNamedOperand(MI: *MI, OperandName: AMDGPU::OpName::tgt)->getImm(); |
| 35 | return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST; |
| 36 | } |
| 37 | |
| 38 | static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, |
| 39 | unsigned PosCount) { |
| 40 | if (!PosCount || PosCount == Chain.size()) |
| 41 | return; |
| 42 | |
| 43 | // Position exports should occur as soon as possible in the shader |
| 44 | // for optimal performance. This moves position exports before |
| 45 | // other exports while preserving the order within different export |
| 46 | // types (pos or other). |
| 47 | SmallVector<SUnit *, 8> Copy(Chain); |
| 48 | unsigned PosIdx = 0; |
| 49 | unsigned OtherIdx = PosCount; |
| 50 | for (SUnit *SU : Copy) { |
| 51 | if (isPositionExport(TII, SU)) |
| 52 | Chain[PosIdx++] = SU; |
| 53 | else |
| 54 | Chain[OtherIdx++] = SU; |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { |
| 59 | SUnit *ChainHead = Exports.front(); |
| 60 | |
| 61 | // Now construct cluster from chain by adding new edges. |
| 62 | for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { |
| 63 | SUnit *SUa = Exports[Idx]; |
| 64 | SUnit *SUb = Exports[Idx + 1]; |
| 65 | |
| 66 | // Copy all dependencies to the head of the chain to avoid any |
| 67 | // computation being inserted into the chain. |
| 68 | for (const SDep &Pred : SUb->Preds) { |
| 69 | SUnit *PredSU = Pred.getSUnit(); |
| 70 | if (!isExport(SU: *PredSU) && !Pred.isWeak()) |
| 71 | DAG->addEdge(SuccSU: ChainHead, PredDep: SDep(PredSU, SDep::Artificial)); |
| 72 | } |
| 73 | |
| 74 | // New barrier edge ordering exports |
| 75 | DAG->addEdge(SuccSU: SUb, PredDep: SDep(SUa, SDep::Barrier)); |
| 76 | // Also add cluster edge |
| 77 | DAG->addEdge(SuccSU: SUb, PredDep: SDep(SUa, SDep::Cluster)); |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { |
| 82 | SmallVector<SDep, 2> ToAdd, ToRemove; |
| 83 | |
| 84 | for (const SDep &Pred : SU.Preds) { |
| 85 | SUnit *PredSU = Pred.getSUnit(); |
| 86 | if (Pred.isBarrier() && isExport(SU: *PredSU)) { |
| 87 | ToRemove.push_back(Elt: Pred); |
| 88 | if (isExport(SU)) |
| 89 | continue; |
| 90 | |
| 91 | // If we remove a barrier we need to copy dependencies |
| 92 | // from the predecessor to maintain order. |
| 93 | for (const SDep &ExportPred : PredSU->Preds) { |
| 94 | SUnit *ExportPredSU = ExportPred.getSUnit(); |
| 95 | if (ExportPred.isBarrier() && !isExport(SU: *ExportPredSU)) |
| 96 | ToAdd.push_back(Elt: SDep(ExportPredSU, SDep::Barrier)); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | for (SDep Pred : ToRemove) |
| 102 | SU.removePred(D: Pred); |
| 103 | for (SDep Pred : ToAdd) |
| 104 | DAG->addEdge(SuccSU: &SU, PredDep: Pred); |
| 105 | } |
| 106 | |
| 107 | void ExportClustering::apply(ScheduleDAGInstrs *DAG) { |
| 108 | const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); |
| 109 | |
| 110 | SmallVector<SUnit *, 8> Chain; |
| 111 | |
| 112 | // Pass through DAG gathering a list of exports and removing barrier edges |
| 113 | // creating dependencies on exports. Freeing exports of successor edges |
| 114 | // allows more scheduling freedom, and nothing should be order dependent |
| 115 | // on exports. Edges will be added later to order the exports. |
| 116 | unsigned PosCount = 0; |
| 117 | for (SUnit &SU : DAG->SUnits) { |
| 118 | if (!isExport(SU)) |
| 119 | continue; |
| 120 | |
| 121 | Chain.push_back(Elt: &SU); |
| 122 | if (isPositionExport(TII, SU: &SU)) |
| 123 | PosCount++; |
| 124 | |
| 125 | removeExportDependencies(DAG, SU); |
| 126 | |
| 127 | SmallVector<SDep, 4> Succs(SU.Succs); |
| 128 | for (SDep Succ : Succs) |
| 129 | removeExportDependencies(DAG, SU&: *Succ.getSUnit()); |
| 130 | } |
| 131 | |
| 132 | // Apply clustering if there are multiple exports |
| 133 | if (Chain.size() > 1) { |
| 134 | sortChain(TII, Chain, PosCount); |
| 135 | buildCluster(Exports: Chain, DAG); |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | } // end namespace |
| 140 | |
| 141 | std::unique_ptr<ScheduleDAGMutation> |
| 142 | llvm::createAMDGPUExportClusteringDAGMutation() { |
| 143 | return std::make_unique<ExportClustering>(); |
| 144 | } |
| 145 | |