| 1 | //===- MacroFusion.cpp - Macro Fusion -------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file This file contains the implementation of the DAG scheduling mutation |
| 10 | /// to pair instructions back to back. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/CodeGen/MacroFusion.h" |
| 15 | #include "llvm/ADT/Statistic.h" |
| 16 | #include "llvm/CodeGen/MachineInstr.h" |
| 17 | #include "llvm/CodeGen/ScheduleDAG.h" |
| 18 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
| 19 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
| 20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 21 | #include "llvm/Support/CommandLine.h" |
| 22 | #include "llvm/Support/Debug.h" |
| 23 | #include "llvm/Support/raw_ostream.h" |
| 24 | |
| 25 | #define DEBUG_TYPE "machine-scheduler" |
| 26 | |
| 27 | STATISTIC(NumFused, "Number of instr pairs fused" ); |
| 28 | |
| 29 | using namespace llvm; |
| 30 | |
| 31 | static cl::opt<bool> EnableMacroFusion("misched-fusion" , cl::Hidden, |
| 32 | cl::desc("Enable scheduling for macro fusion." ), cl::init(Val: true)); |
| 33 | |
| 34 | static bool isHazard(const SDep &Dep) { |
| 35 | return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; |
| 36 | } |
| 37 | |
| 38 | static SUnit *getPredClusterSU(const SUnit &SU) { |
| 39 | for (const SDep &SI : SU.Preds) |
| 40 | if (SI.isCluster()) |
| 41 | return SI.getSUnit(); |
| 42 | |
| 43 | return nullptr; |
| 44 | } |
| 45 | |
| 46 | bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { |
| 47 | unsigned Num = 1; |
| 48 | const SUnit *CurrentSU = &SU; |
| 49 | while ((CurrentSU = getPredClusterSU(SU: *CurrentSU)) && Num < FuseLimit) Num ++; |
| 50 | return Num < FuseLimit; |
| 51 | } |
| 52 | |
| 53 | bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, |
| 54 | SUnit &SecondSU) { |
| 55 | // Check that neither instr is already paired with another along the edge |
| 56 | // between them. |
| 57 | for (SDep &SI : FirstSU.Succs) |
| 58 | if (SI.isCluster()) |
| 59 | return false; |
| 60 | |
| 61 | for (SDep &SI : SecondSU.Preds) |
| 62 | if (SI.isCluster()) |
| 63 | return false; |
| 64 | |
| 65 | assert(FirstSU.ParentClusterIdx == InvalidClusterId && |
| 66 | SecondSU.ParentClusterIdx == InvalidClusterId); |
| 67 | |
| 68 | // Though the reachability checks above could be made more generic, |
| 69 | // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid, |
| 70 | // the extra computation cost makes it less interesting in general cases. |
| 71 | |
| 72 | // Create a single weak edge between the adjacent instrs. The only effect is |
| 73 | // to cause bottom-up scheduling to heavily prioritize the clustered instrs. |
| 74 | if (!DAG.addEdge(SuccSU: &SecondSU, PredDep: SDep(&FirstSU, SDep::Cluster))) |
| 75 | return false; |
| 76 | |
| 77 | auto &Clusters = DAG.getClusters(); |
| 78 | |
| 79 | FirstSU.ParentClusterIdx = Clusters.size(); |
| 80 | SecondSU.ParentClusterIdx = Clusters.size(); |
| 81 | |
| 82 | SmallSet<SUnit *, 8> Cluster{{&FirstSU, &SecondSU}}; |
| 83 | Clusters.push_back(Elt: Cluster); |
| 84 | |
| 85 | // TODO - If we want to chain more than two instructions, we need to create |
| 86 | // artifical edges to make dependencies from the FirstSU also dependent |
| 87 | // on other chained instructions, and other chained instructions also |
| 88 | // dependent on the dependencies of the SecondSU, to prevent them from being |
| 89 | // scheduled into these chained instructions. |
| 90 | assert(hasLessThanNumFused(FirstSU, 2) && |
| 91 | "Currently we only support chaining together two instructions" ); |
| 92 | |
| 93 | // Adjust the latency between both instrs. |
| 94 | for (SDep &SI : FirstSU.Succs) |
| 95 | if (SI.getSUnit() == &SecondSU) |
| 96 | SI.setLatency(0); |
| 97 | |
| 98 | for (SDep &SI : SecondSU.Preds) |
| 99 | if (SI.getSUnit() == &FirstSU) |
| 100 | SI.setLatency(0); |
| 101 | |
| 102 | LLVM_DEBUG( |
| 103 | dbgs() << "Macro fuse: " ; DAG.dumpNodeName(FirstSU); dbgs() << " - " ; |
| 104 | DAG.dumpNodeName(SecondSU); dbgs() << " / " ; |
| 105 | dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " |
| 106 | << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';); |
| 107 | |
| 108 | // Make data dependencies from the FirstSU also dependent on the SecondSU to |
| 109 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
| 110 | if (&SecondSU != &DAG.ExitSU) |
| 111 | for (const SDep &SI : FirstSU.Succs) { |
| 112 | SUnit *SU = SI.getSUnit(); |
| 113 | if (SI.isWeak() || isHazard(Dep: SI) || |
| 114 | SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(N: &SecondSU)) |
| 115 | continue; |
| 116 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(SecondSU); |
| 117 | dbgs() << " - " ; DAG.dumpNodeName(*SU); dbgs() << '\n';); |
| 118 | DAG.addEdge(SuccSU: SU, PredDep: SDep(&SecondSU, SDep::Artificial)); |
| 119 | } |
| 120 | |
| 121 | // Make the FirstSU also dependent on the dependencies of the SecondSU to |
| 122 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
| 123 | if (&FirstSU != &DAG.EntrySU) { |
| 124 | for (const SDep &SI : SecondSU.Preds) { |
| 125 | SUnit *SU = SI.getSUnit(); |
| 126 | if (SI.isWeak() || isHazard(Dep: SI) || &FirstSU == SU || FirstSU.isSucc(N: SU)) |
| 127 | continue; |
| 128 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(*SU); dbgs() << " - " ; |
| 129 | DAG.dumpNodeName(FirstSU); dbgs() << '\n';); |
| 130 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(SU, SDep::Artificial)); |
| 131 | } |
| 132 | // ExitSU comes last by design, which acts like an implicit dependency |
| 133 | // between ExitSU and any bottom root in the graph. We should transfer |
| 134 | // this to FirstSU as well. |
| 135 | if (&SecondSU == &DAG.ExitSU) { |
| 136 | for (SUnit &SU : DAG.SUnits) { |
| 137 | if (SU.Succs.empty()) |
| 138 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(&SU, SDep::Artificial)); |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | ++NumFused; |
| 144 | return true; |
| 145 | } |
| 146 | |
| 147 | namespace { |
| 148 | |
| 149 | /// Post-process the DAG to create cluster edges between instrs that may |
| 150 | /// be fused by the processor into a single operation. |
| 151 | class MacroFusion : public ScheduleDAGMutation { |
| 152 | std::vector<MacroFusionPredTy> Predicates; |
| 153 | bool FuseBlock; |
| 154 | bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU); |
| 155 | |
| 156 | public: |
| 157 | MacroFusion(ArrayRef<MacroFusionPredTy> Predicates, bool FuseBlock) |
| 158 | : Predicates(Predicates.begin(), Predicates.end()), FuseBlock(FuseBlock) { |
| 159 | } |
| 160 | |
| 161 | void apply(ScheduleDAGInstrs *DAGInstrs) override; |
| 162 | |
| 163 | bool shouldScheduleAdjacent(const TargetInstrInfo &TII, |
| 164 | const TargetSubtargetInfo &STI, |
| 165 | const MachineInstr *FirstMI, |
| 166 | const MachineInstr &SecondMI); |
| 167 | }; |
| 168 | |
| 169 | } // end anonymous namespace |
| 170 | |
| 171 | bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII, |
| 172 | const TargetSubtargetInfo &STI, |
| 173 | const MachineInstr *FirstMI, |
| 174 | const MachineInstr &SecondMI) { |
| 175 | return llvm::any_of(Range&: Predicates, P: [&](MacroFusionPredTy Predicate) { |
| 176 | return Predicate(TII, STI, FirstMI, SecondMI); |
| 177 | }); |
| 178 | } |
| 179 | |
| 180 | void MacroFusion::apply(ScheduleDAGInstrs *DAG) { |
| 181 | if (FuseBlock) |
| 182 | // For each of the SUnits in the scheduling block, try to fuse the instr in |
| 183 | // it with one in its predecessors. |
| 184 | for (SUnit &ISU : DAG->SUnits) |
| 185 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: ISU); |
| 186 | |
| 187 | if (DAG->ExitSU.getInstr()) |
| 188 | // Try to fuse the instr in the ExitSU with one in its predecessors. |
| 189 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: DAG->ExitSU); |
| 190 | } |
| 191 | |
| 192 | /// Implement the fusion of instr pairs in the scheduling DAG, |
| 193 | /// anchored at the instr in AnchorSU.. |
| 194 | bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) { |
| 195 | const MachineInstr &AnchorMI = *AnchorSU.getInstr(); |
| 196 | const TargetInstrInfo &TII = *DAG.TII; |
| 197 | const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); |
| 198 | |
| 199 | // Check if the anchor instr may be fused. |
| 200 | if (!shouldScheduleAdjacent(TII, STI: ST, FirstMI: nullptr, SecondMI: AnchorMI)) |
| 201 | return false; |
| 202 | |
| 203 | // Explorer for fusion candidates among the dependencies of the anchor instr. |
| 204 | for (SDep &Dep : AnchorSU.Preds) { |
| 205 | // Ignore dependencies other than data or strong ordering. |
| 206 | if (Dep.isWeak() || isHazard(Dep)) |
| 207 | continue; |
| 208 | |
| 209 | SUnit &DepSU = *Dep.getSUnit(); |
| 210 | if (DepSU.isBoundaryNode()) |
| 211 | continue; |
| 212 | |
| 213 | // Only chain two instructions together at most. |
| 214 | const MachineInstr *DepMI = DepSU.getInstr(); |
| 215 | if (!hasLessThanNumFused(SU: DepSU, FuseLimit: 2) || |
| 216 | !shouldScheduleAdjacent(TII, STI: ST, FirstMI: DepMI, SecondMI: AnchorMI)) |
| 217 | continue; |
| 218 | |
| 219 | if (fuseInstructionPair(DAG, FirstSU&: DepSU, SecondSU&: AnchorSU)) |
| 220 | return true; |
| 221 | } |
| 222 | |
| 223 | return false; |
| 224 | } |
| 225 | |
| 226 | std::unique_ptr<ScheduleDAGMutation> |
| 227 | llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates, |
| 228 | bool BranchOnly) { |
| 229 | if (EnableMacroFusion) |
| 230 | return std::make_unique<MacroFusion>(args&: Predicates, args: !BranchOnly); |
| 231 | return nullptr; |
| 232 | } |
| 233 | |