| 1 | //===- MacroFusion.cpp - Macro Fusion -------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file This file contains the implementation of the DAG scheduling mutation |
| 10 | /// to pair instructions back to back. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/CodeGen/MacroFusion.h" |
| 15 | #include "llvm/ADT/Statistic.h" |
| 16 | #include "llvm/CodeGen/MachineInstr.h" |
| 17 | #include "llvm/CodeGen/ScheduleDAG.h" |
| 18 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
| 19 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
| 20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 21 | #include "llvm/Support/CommandLine.h" |
| 22 | #include "llvm/Support/Debug.h" |
| 23 | #include "llvm/Support/raw_ostream.h" |
| 24 | |
| 25 | #define DEBUG_TYPE "machine-scheduler" |
| 26 | |
| 27 | STATISTIC(NumFused, "Number of instr pairs fused" ); |
| 28 | STATISTIC(NumFusionConflicts, |
| 29 | "Number of conflicts between a fusion pair and an already existing " |
| 30 | "cluster (either fusion or non-fusion)" ); |
| 31 | |
| 32 | using namespace llvm; |
| 33 | |
| 34 | static cl::opt<bool> EnableMacroFusion("misched-fusion" , cl::Hidden, |
| 35 | cl::desc("Enable scheduling for macro fusion." ), cl::init(Val: true)); |
| 36 | |
| 37 | static bool isHazard(const SDep &Dep) { |
| 38 | return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; |
| 39 | } |
| 40 | |
| 41 | static SUnit *getPredClusterSU(const SUnit &SU) { |
| 42 | for (const SDep &SI : SU.Preds) |
| 43 | if (SI.isCluster()) |
| 44 | return SI.getSUnit(); |
| 45 | |
| 46 | return nullptr; |
| 47 | } |
| 48 | |
| 49 | bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { |
| 50 | unsigned Num = 1; |
| 51 | const SUnit *CurrentSU = &SU; |
| 52 | while ((CurrentSU = getPredClusterSU(SU: *CurrentSU)) && Num < FuseLimit) Num ++; |
| 53 | return Num < FuseLimit; |
| 54 | } |
| 55 | |
| 56 | bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, |
| 57 | SUnit &SecondSU) { |
| 58 | // Check that neither instr is already associated with a cluster (either |
| 59 | // fusion or non-fusion) |
| 60 | if (FirstSU.isClustered() || SecondSU.isClustered()) { |
| 61 | ++NumFusionConflicts; |
| 62 | LLVM_DEBUG({ |
| 63 | dbgs() << "Fusion conflict: cannot fuse SU(" << FirstSU.NodeNum |
| 64 | << ") and SU(" << SecondSU.NodeNum << ")\n" ; |
| 65 | if (FirstSU.isClustered()) |
| 66 | dbgs() << " SU(" << FirstSU.NodeNum << ") already clustered\n" ; |
| 67 | if (SecondSU.isClustered()) |
| 68 | dbgs() << " SU(" << SecondSU.NodeNum << ") already clustered\n" ; |
| 69 | }); |
| 70 | return false; |
| 71 | } |
| 72 | |
| 73 | // Create a single weak edge between the adjacent instrs. The only effect is |
| 74 | // to cause bottom-up scheduling to heavily prioritize the clustered instrs. |
| 75 | if (!DAG.addEdge(SuccSU: &SecondSU, PredDep: SDep(&FirstSU, SDep::Cluster))) |
| 76 | return false; |
| 77 | |
| 78 | auto &Clusters = DAG.getClusters(); |
| 79 | |
| 80 | unsigned ClusterIdx = Clusters.size(); |
| 81 | FirstSU.ParentClusterIdx = ClusterIdx; |
| 82 | SecondSU.ParentClusterIdx = ClusterIdx; |
| 83 | |
| 84 | SmallPtrSet<SUnit *, 8> Cluster{{&FirstSU, &SecondSU}}; |
| 85 | Clusters.push_back(Elt: Cluster); |
| 86 | |
| 87 | // TODO - If we want to chain more than two instructions, we need to create |
| 88 | // artifical edges to make dependencies from the FirstSU also dependent |
| 89 | // on other chained instructions, and other chained instructions also |
| 90 | // dependent on the dependencies of the SecondSU, to prevent them from being |
| 91 | // scheduled into these chained instructions. |
| 92 | assert(hasLessThanNumFused(FirstSU, 2) && |
| 93 | "Currently we only support chaining together two instructions" ); |
| 94 | |
| 95 | // Adjust the latency between both instrs. |
| 96 | for (SDep &SI : FirstSU.Succs) |
| 97 | if (SI.getSUnit() == &SecondSU) |
| 98 | SI.setLatency(0); |
| 99 | |
| 100 | for (SDep &SI : SecondSU.Preds) |
| 101 | if (SI.getSUnit() == &FirstSU) |
| 102 | SI.setLatency(0); |
| 103 | |
| 104 | LLVM_DEBUG( |
| 105 | dbgs() << "Macro fuse: " ; DAG.dumpNodeName(FirstSU); dbgs() << " - " ; |
| 106 | DAG.dumpNodeName(SecondSU); dbgs() << " / " ; |
| 107 | dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " |
| 108 | << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';); |
| 109 | |
| 110 | // Make data dependencies from the FirstSU also dependent on the SecondSU to |
| 111 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
| 112 | if (&SecondSU != &DAG.ExitSU) |
| 113 | for (const SDep &SI : FirstSU.Succs) { |
| 114 | SUnit *SU = SI.getSUnit(); |
| 115 | if (SI.isWeak() || isHazard(Dep: SI) || |
| 116 | SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(N: &SecondSU)) |
| 117 | continue; |
| 118 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(SecondSU); |
| 119 | dbgs() << " - " ; DAG.dumpNodeName(*SU); dbgs() << '\n';); |
| 120 | DAG.addEdge(SuccSU: SU, PredDep: SDep(&SecondSU, SDep::Artificial)); |
| 121 | } |
| 122 | |
| 123 | // Make the FirstSU also dependent on the dependencies of the SecondSU to |
| 124 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
| 125 | if (&FirstSU != &DAG.EntrySU) { |
| 126 | for (const SDep &SI : SecondSU.Preds) { |
| 127 | SUnit *SU = SI.getSUnit(); |
| 128 | if (SI.isWeak() || isHazard(Dep: SI) || &FirstSU == SU || FirstSU.isSucc(N: SU)) |
| 129 | continue; |
| 130 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(*SU); dbgs() << " - " ; |
| 131 | DAG.dumpNodeName(FirstSU); dbgs() << '\n';); |
| 132 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(SU, SDep::Artificial)); |
| 133 | } |
| 134 | // ExitSU comes last by design, which acts like an implicit dependency |
| 135 | // between ExitSU and any bottom root in the graph. We should transfer |
| 136 | // this to FirstSU as well. |
| 137 | if (&SecondSU == &DAG.ExitSU) { |
| 138 | for (SUnit &SU : DAG.SUnits) { |
| 139 | if (SU.Succs.empty()) |
| 140 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(&SU, SDep::Artificial)); |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | ++NumFused; |
| 146 | return true; |
| 147 | } |
| 148 | |
| 149 | namespace { |
| 150 | |
| 151 | /// Post-process the DAG to create cluster edges between instrs that may |
| 152 | /// be fused by the processor into a single operation. |
| 153 | class MacroFusion : public ScheduleDAGMutation { |
| 154 | std::vector<MacroFusionPredTy> Predicates; |
| 155 | bool FuseBlock; |
| 156 | bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU); |
| 157 | |
| 158 | public: |
| 159 | MacroFusion(ArrayRef<MacroFusionPredTy> Predicates, bool FuseBlock) |
| 160 | : Predicates(Predicates.begin(), Predicates.end()), FuseBlock(FuseBlock) { |
| 161 | } |
| 162 | |
| 163 | void apply(ScheduleDAGInstrs *DAGInstrs) override; |
| 164 | |
| 165 | bool shouldScheduleAdjacent(const TargetInstrInfo &TII, |
| 166 | const TargetSubtargetInfo &STI, |
| 167 | const MachineInstr *FirstMI, |
| 168 | const MachineInstr &SecondMI); |
| 169 | }; |
| 170 | |
| 171 | } // end anonymous namespace |
| 172 | |
| 173 | bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII, |
| 174 | const TargetSubtargetInfo &STI, |
| 175 | const MachineInstr *FirstMI, |
| 176 | const MachineInstr &SecondMI) { |
| 177 | return llvm::any_of(Range&: Predicates, P: [&](MacroFusionPredTy Predicate) { |
| 178 | return Predicate(TII, STI, FirstMI, SecondMI); |
| 179 | }); |
| 180 | } |
| 181 | |
| 182 | void MacroFusion::apply(ScheduleDAGInstrs *DAG) { |
| 183 | if (FuseBlock) |
| 184 | // For each of the SUnits in the scheduling block, try to fuse the instr in |
| 185 | // it with one in its predecessors. |
| 186 | for (SUnit &ISU : DAG->SUnits) |
| 187 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: ISU); |
| 188 | |
| 189 | if (DAG->ExitSU.getInstr()) |
| 190 | // Try to fuse the instr in the ExitSU with one in its predecessors. |
| 191 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: DAG->ExitSU); |
| 192 | } |
| 193 | |
| 194 | /// Implement the fusion of instr pairs in the scheduling DAG, |
| 195 | /// anchored at the instr in AnchorSU.. |
| 196 | bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) { |
| 197 | const MachineInstr &AnchorMI = *AnchorSU.getInstr(); |
| 198 | const TargetInstrInfo &TII = *DAG.TII; |
| 199 | const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); |
| 200 | |
| 201 | // Check if the anchor instr may be fused. |
| 202 | if (!shouldScheduleAdjacent(TII, STI: ST, FirstMI: nullptr, SecondMI: AnchorMI)) |
| 203 | return false; |
| 204 | |
| 205 | // Explorer for fusion candidates among the dependencies of the anchor instr. |
| 206 | for (SDep &Dep : AnchorSU.Preds) { |
| 207 | // Ignore dependencies other than data |
| 208 | if (Dep.getKind() != SDep::Data) |
| 209 | continue; |
| 210 | |
| 211 | SUnit &DepSU = *Dep.getSUnit(); |
| 212 | if (DepSU.isBoundaryNode()) |
| 213 | continue; |
| 214 | |
| 215 | // Only chain two instructions together at most. |
| 216 | const MachineInstr *DepMI = DepSU.getInstr(); |
| 217 | if (!hasLessThanNumFused(SU: DepSU, FuseLimit: 2) || |
| 218 | !shouldScheduleAdjacent(TII, STI: ST, FirstMI: DepMI, SecondMI: AnchorMI)) |
| 219 | continue; |
| 220 | |
| 221 | if (fuseInstructionPair(DAG, FirstSU&: DepSU, SecondSU&: AnchorSU)) |
| 222 | return true; |
| 223 | } |
| 224 | |
| 225 | return false; |
| 226 | } |
| 227 | |
| 228 | std::unique_ptr<ScheduleDAGMutation> |
| 229 | llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates, |
| 230 | bool BranchOnly) { |
| 231 | if (EnableMacroFusion) |
| 232 | return std::make_unique<MacroFusion>(args&: Predicates, args: !BranchOnly); |
| 233 | return nullptr; |
| 234 | } |
| 235 | |