1 | //===- MacroFusion.cpp - Macro Fusion -------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file This file contains the implementation of the DAG scheduling mutation |
10 | /// to pair instructions back to back. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/MacroFusion.h" |
15 | #include "llvm/ADT/Statistic.h" |
16 | #include "llvm/CodeGen/MachineInstr.h" |
17 | #include "llvm/CodeGen/ScheduleDAG.h" |
18 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
19 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
21 | #include "llvm/Support/CommandLine.h" |
22 | #include "llvm/Support/Debug.h" |
23 | #include "llvm/Support/raw_ostream.h" |
24 | |
25 | #define DEBUG_TYPE "machine-scheduler" |
26 | |
27 | STATISTIC(NumFused, "Number of instr pairs fused" ); |
28 | |
29 | using namespace llvm; |
30 | |
31 | static cl::opt<bool> EnableMacroFusion("misched-fusion" , cl::Hidden, |
32 | cl::desc("Enable scheduling for macro fusion." ), cl::init(Val: true)); |
33 | |
34 | static bool isHazard(const SDep &Dep) { |
35 | return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; |
36 | } |
37 | |
38 | static SUnit *getPredClusterSU(const SUnit &SU) { |
39 | for (const SDep &SI : SU.Preds) |
40 | if (SI.isCluster()) |
41 | return SI.getSUnit(); |
42 | |
43 | return nullptr; |
44 | } |
45 | |
46 | bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { |
47 | unsigned Num = 1; |
48 | const SUnit *CurrentSU = &SU; |
49 | while ((CurrentSU = getPredClusterSU(SU: *CurrentSU)) && Num < FuseLimit) Num ++; |
50 | return Num < FuseLimit; |
51 | } |
52 | |
53 | bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, |
54 | SUnit &SecondSU) { |
55 | // Check that neither instr is already paired with another along the edge |
56 | // between them. |
57 | for (SDep &SI : FirstSU.Succs) |
58 | if (SI.isCluster()) |
59 | return false; |
60 | |
61 | for (SDep &SI : SecondSU.Preds) |
62 | if (SI.isCluster()) |
63 | return false; |
64 | |
65 | assert(FirstSU.ParentClusterIdx == InvalidClusterId && |
66 | SecondSU.ParentClusterIdx == InvalidClusterId); |
67 | |
68 | // Though the reachability checks above could be made more generic, |
69 | // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid, |
70 | // the extra computation cost makes it less interesting in general cases. |
71 | |
72 | // Create a single weak edge between the adjacent instrs. The only effect is |
73 | // to cause bottom-up scheduling to heavily prioritize the clustered instrs. |
74 | if (!DAG.addEdge(SuccSU: &SecondSU, PredDep: SDep(&FirstSU, SDep::Cluster))) |
75 | return false; |
76 | |
77 | auto &Clusters = DAG.getClusters(); |
78 | |
79 | FirstSU.ParentClusterIdx = Clusters.size(); |
80 | SecondSU.ParentClusterIdx = Clusters.size(); |
81 | |
82 | SmallSet<SUnit *, 8> Cluster{{&FirstSU, &SecondSU}}; |
83 | Clusters.push_back(Elt: Cluster); |
84 | |
85 | // TODO - If we want to chain more than two instructions, we need to create |
86 | // artifical edges to make dependencies from the FirstSU also dependent |
87 | // on other chained instructions, and other chained instructions also |
88 | // dependent on the dependencies of the SecondSU, to prevent them from being |
89 | // scheduled into these chained instructions. |
90 | assert(hasLessThanNumFused(FirstSU, 2) && |
91 | "Currently we only support chaining together two instructions" ); |
92 | |
93 | // Adjust the latency between both instrs. |
94 | for (SDep &SI : FirstSU.Succs) |
95 | if (SI.getSUnit() == &SecondSU) |
96 | SI.setLatency(0); |
97 | |
98 | for (SDep &SI : SecondSU.Preds) |
99 | if (SI.getSUnit() == &FirstSU) |
100 | SI.setLatency(0); |
101 | |
102 | LLVM_DEBUG( |
103 | dbgs() << "Macro fuse: " ; DAG.dumpNodeName(FirstSU); dbgs() << " - " ; |
104 | DAG.dumpNodeName(SecondSU); dbgs() << " / " ; |
105 | dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " |
106 | << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';); |
107 | |
108 | // Make data dependencies from the FirstSU also dependent on the SecondSU to |
109 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
110 | if (&SecondSU != &DAG.ExitSU) |
111 | for (const SDep &SI : FirstSU.Succs) { |
112 | SUnit *SU = SI.getSUnit(); |
113 | if (SI.isWeak() || isHazard(Dep: SI) || |
114 | SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(N: &SecondSU)) |
115 | continue; |
116 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(SecondSU); |
117 | dbgs() << " - " ; DAG.dumpNodeName(*SU); dbgs() << '\n';); |
118 | DAG.addEdge(SuccSU: SU, PredDep: SDep(&SecondSU, SDep::Artificial)); |
119 | } |
120 | |
121 | // Make the FirstSU also dependent on the dependencies of the SecondSU to |
122 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
123 | if (&FirstSU != &DAG.EntrySU) { |
124 | for (const SDep &SI : SecondSU.Preds) { |
125 | SUnit *SU = SI.getSUnit(); |
126 | if (SI.isWeak() || isHazard(Dep: SI) || &FirstSU == SU || FirstSU.isSucc(N: SU)) |
127 | continue; |
128 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(*SU); dbgs() << " - " ; |
129 | DAG.dumpNodeName(FirstSU); dbgs() << '\n';); |
130 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(SU, SDep::Artificial)); |
131 | } |
132 | // ExitSU comes last by design, which acts like an implicit dependency |
133 | // between ExitSU and any bottom root in the graph. We should transfer |
134 | // this to FirstSU as well. |
135 | if (&SecondSU == &DAG.ExitSU) { |
136 | for (SUnit &SU : DAG.SUnits) { |
137 | if (SU.Succs.empty()) |
138 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(&SU, SDep::Artificial)); |
139 | } |
140 | } |
141 | } |
142 | |
143 | ++NumFused; |
144 | return true; |
145 | } |
146 | |
147 | namespace { |
148 | |
149 | /// Post-process the DAG to create cluster edges between instrs that may |
150 | /// be fused by the processor into a single operation. |
151 | class MacroFusion : public ScheduleDAGMutation { |
152 | std::vector<MacroFusionPredTy> Predicates; |
153 | bool FuseBlock; |
154 | bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU); |
155 | |
156 | public: |
157 | MacroFusion(ArrayRef<MacroFusionPredTy> Predicates, bool FuseBlock) |
158 | : Predicates(Predicates.begin(), Predicates.end()), FuseBlock(FuseBlock) { |
159 | } |
160 | |
161 | void apply(ScheduleDAGInstrs *DAGInstrs) override; |
162 | |
163 | bool shouldScheduleAdjacent(const TargetInstrInfo &TII, |
164 | const TargetSubtargetInfo &STI, |
165 | const MachineInstr *FirstMI, |
166 | const MachineInstr &SecondMI); |
167 | }; |
168 | |
169 | } // end anonymous namespace |
170 | |
171 | bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII, |
172 | const TargetSubtargetInfo &STI, |
173 | const MachineInstr *FirstMI, |
174 | const MachineInstr &SecondMI) { |
175 | return llvm::any_of(Range&: Predicates, P: [&](MacroFusionPredTy Predicate) { |
176 | return Predicate(TII, STI, FirstMI, SecondMI); |
177 | }); |
178 | } |
179 | |
180 | void MacroFusion::apply(ScheduleDAGInstrs *DAG) { |
181 | if (FuseBlock) |
182 | // For each of the SUnits in the scheduling block, try to fuse the instr in |
183 | // it with one in its predecessors. |
184 | for (SUnit &ISU : DAG->SUnits) |
185 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: ISU); |
186 | |
187 | if (DAG->ExitSU.getInstr()) |
188 | // Try to fuse the instr in the ExitSU with one in its predecessors. |
189 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: DAG->ExitSU); |
190 | } |
191 | |
192 | /// Implement the fusion of instr pairs in the scheduling DAG, |
193 | /// anchored at the instr in AnchorSU.. |
194 | bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) { |
195 | const MachineInstr &AnchorMI = *AnchorSU.getInstr(); |
196 | const TargetInstrInfo &TII = *DAG.TII; |
197 | const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); |
198 | |
199 | // Check if the anchor instr may be fused. |
200 | if (!shouldScheduleAdjacent(TII, STI: ST, FirstMI: nullptr, SecondMI: AnchorMI)) |
201 | return false; |
202 | |
203 | // Explorer for fusion candidates among the dependencies of the anchor instr. |
204 | for (SDep &Dep : AnchorSU.Preds) { |
205 | // Ignore dependencies other than data or strong ordering. |
206 | if (Dep.isWeak() || isHazard(Dep)) |
207 | continue; |
208 | |
209 | SUnit &DepSU = *Dep.getSUnit(); |
210 | if (DepSU.isBoundaryNode()) |
211 | continue; |
212 | |
213 | // Only chain two instructions together at most. |
214 | const MachineInstr *DepMI = DepSU.getInstr(); |
215 | if (!hasLessThanNumFused(SU: DepSU, FuseLimit: 2) || |
216 | !shouldScheduleAdjacent(TII, STI: ST, FirstMI: DepMI, SecondMI: AnchorMI)) |
217 | continue; |
218 | |
219 | if (fuseInstructionPair(DAG, FirstSU&: DepSU, SecondSU&: AnchorSU)) |
220 | return true; |
221 | } |
222 | |
223 | return false; |
224 | } |
225 | |
226 | std::unique_ptr<ScheduleDAGMutation> |
227 | llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates, |
228 | bool BranchOnly) { |
229 | if (EnableMacroFusion) |
230 | return std::make_unique<MacroFusion>(args&: Predicates, args: !BranchOnly); |
231 | return nullptr; |
232 | } |
233 | |