1 | //===- MacroFusion.cpp - Macro Fusion -------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file This file contains the implementation of the DAG scheduling mutation |
10 | /// to pair instructions back to back. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/MacroFusion.h" |
15 | #include "llvm/ADT/Statistic.h" |
16 | #include "llvm/CodeGen/MachineInstr.h" |
17 | #include "llvm/CodeGen/ScheduleDAG.h" |
18 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
19 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
21 | #include "llvm/Support/CommandLine.h" |
22 | #include "llvm/Support/Debug.h" |
23 | #include "llvm/Support/raw_ostream.h" |
24 | |
25 | #define DEBUG_TYPE "machine-scheduler" |
26 | |
27 | STATISTIC(NumFused, "Number of instr pairs fused" ); |
28 | |
29 | using namespace llvm; |
30 | |
31 | static cl::opt<bool> EnableMacroFusion("misched-fusion" , cl::Hidden, |
32 | cl::desc("Enable scheduling for macro fusion." ), cl::init(Val: true)); |
33 | |
34 | static bool isHazard(const SDep &Dep) { |
35 | return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; |
36 | } |
37 | |
38 | static SUnit *getPredClusterSU(const SUnit &SU) { |
39 | for (const SDep &SI : SU.Preds) |
40 | if (SI.isCluster()) |
41 | return SI.getSUnit(); |
42 | |
43 | return nullptr; |
44 | } |
45 | |
46 | bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { |
47 | unsigned Num = 1; |
48 | const SUnit *CurrentSU = &SU; |
49 | while ((CurrentSU = getPredClusterSU(SU: *CurrentSU)) && Num < FuseLimit) Num ++; |
50 | return Num < FuseLimit; |
51 | } |
52 | |
53 | bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, |
54 | SUnit &SecondSU) { |
55 | // Check that neither instr is already paired with another along the edge |
56 | // between them. |
57 | for (SDep &SI : FirstSU.Succs) |
58 | if (SI.isCluster()) |
59 | return false; |
60 | |
61 | for (SDep &SI : SecondSU.Preds) |
62 | if (SI.isCluster()) |
63 | return false; |
64 | // Though the reachability checks above could be made more generic, |
65 | // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid, |
66 | // the extra computation cost makes it less interesting in general cases. |
67 | |
68 | // Create a single weak edge between the adjacent instrs. The only effect is |
69 | // to cause bottom-up scheduling to heavily prioritize the clustered instrs. |
70 | if (!DAG.addEdge(SuccSU: &SecondSU, PredDep: SDep(&FirstSU, SDep::Cluster))) |
71 | return false; |
72 | |
73 | // TODO - If we want to chain more than two instructions, we need to create |
74 | // artifical edges to make dependencies from the FirstSU also dependent |
75 | // on other chained instructions, and other chained instructions also |
76 | // dependent on the dependencies of the SecondSU, to prevent them from being |
77 | // scheduled into these chained instructions. |
78 | assert(hasLessThanNumFused(FirstSU, 2) && |
79 | "Currently we only support chaining together two instructions" ); |
80 | |
81 | // Adjust the latency between both instrs. |
82 | for (SDep &SI : FirstSU.Succs) |
83 | if (SI.getSUnit() == &SecondSU) |
84 | SI.setLatency(0); |
85 | |
86 | for (SDep &SI : SecondSU.Preds) |
87 | if (SI.getSUnit() == &FirstSU) |
88 | SI.setLatency(0); |
89 | |
90 | LLVM_DEBUG( |
91 | dbgs() << "Macro fuse: " ; DAG.dumpNodeName(FirstSU); dbgs() << " - " ; |
92 | DAG.dumpNodeName(SecondSU); dbgs() << " / " ; |
93 | dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " |
94 | << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';); |
95 | |
96 | // Make data dependencies from the FirstSU also dependent on the SecondSU to |
97 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
98 | if (&SecondSU != &DAG.ExitSU) |
99 | for (const SDep &SI : FirstSU.Succs) { |
100 | SUnit *SU = SI.getSUnit(); |
101 | if (SI.isWeak() || isHazard(Dep: SI) || |
102 | SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(N: &SecondSU)) |
103 | continue; |
104 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(SecondSU); |
105 | dbgs() << " - " ; DAG.dumpNodeName(*SU); dbgs() << '\n';); |
106 | DAG.addEdge(SuccSU: SU, PredDep: SDep(&SecondSU, SDep::Artificial)); |
107 | } |
108 | |
109 | // Make the FirstSU also dependent on the dependencies of the SecondSU to |
110 | // prevent them from being scheduled between the FirstSU and the SecondSU. |
111 | if (&FirstSU != &DAG.EntrySU) { |
112 | for (const SDep &SI : SecondSU.Preds) { |
113 | SUnit *SU = SI.getSUnit(); |
114 | if (SI.isWeak() || isHazard(Dep: SI) || &FirstSU == SU || FirstSU.isSucc(N: SU)) |
115 | continue; |
116 | LLVM_DEBUG(dbgs() << " Bind " ; DAG.dumpNodeName(*SU); dbgs() << " - " ; |
117 | DAG.dumpNodeName(FirstSU); dbgs() << '\n';); |
118 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(SU, SDep::Artificial)); |
119 | } |
120 | // ExitSU comes last by design, which acts like an implicit dependency |
121 | // between ExitSU and any bottom root in the graph. We should transfer |
122 | // this to FirstSU as well. |
123 | if (&SecondSU == &DAG.ExitSU) { |
124 | for (SUnit &SU : DAG.SUnits) { |
125 | if (SU.Succs.empty()) |
126 | DAG.addEdge(SuccSU: &FirstSU, PredDep: SDep(&SU, SDep::Artificial)); |
127 | } |
128 | } |
129 | } |
130 | |
131 | ++NumFused; |
132 | return true; |
133 | } |
134 | |
135 | namespace { |
136 | |
137 | /// Post-process the DAG to create cluster edges between instrs that may |
138 | /// be fused by the processor into a single operation. |
139 | class MacroFusion : public ScheduleDAGMutation { |
140 | std::vector<MacroFusionPredTy> Predicates; |
141 | bool FuseBlock; |
142 | bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU); |
143 | |
144 | public: |
145 | MacroFusion(ArrayRef<MacroFusionPredTy> Predicates, bool FuseBlock) |
146 | : Predicates(Predicates.begin(), Predicates.end()), FuseBlock(FuseBlock) { |
147 | } |
148 | |
149 | void apply(ScheduleDAGInstrs *DAGInstrs) override; |
150 | |
151 | bool shouldScheduleAdjacent(const TargetInstrInfo &TII, |
152 | const TargetSubtargetInfo &STI, |
153 | const MachineInstr *FirstMI, |
154 | const MachineInstr &SecondMI); |
155 | }; |
156 | |
157 | } // end anonymous namespace |
158 | |
159 | bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII, |
160 | const TargetSubtargetInfo &STI, |
161 | const MachineInstr *FirstMI, |
162 | const MachineInstr &SecondMI) { |
163 | return llvm::any_of(Range&: Predicates, P: [&](MacroFusionPredTy Predicate) { |
164 | return Predicate(TII, STI, FirstMI, SecondMI); |
165 | }); |
166 | } |
167 | |
168 | void MacroFusion::apply(ScheduleDAGInstrs *DAG) { |
169 | if (FuseBlock) |
170 | // For each of the SUnits in the scheduling block, try to fuse the instr in |
171 | // it with one in its predecessors. |
172 | for (SUnit &ISU : DAG->SUnits) |
173 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: ISU); |
174 | |
175 | if (DAG->ExitSU.getInstr()) |
176 | // Try to fuse the instr in the ExitSU with one in its predecessors. |
177 | scheduleAdjacentImpl(DAG&: *DAG, AnchorSU&: DAG->ExitSU); |
178 | } |
179 | |
180 | /// Implement the fusion of instr pairs in the scheduling DAG, |
181 | /// anchored at the instr in AnchorSU.. |
182 | bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) { |
183 | const MachineInstr &AnchorMI = *AnchorSU.getInstr(); |
184 | const TargetInstrInfo &TII = *DAG.TII; |
185 | const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); |
186 | |
187 | // Check if the anchor instr may be fused. |
188 | if (!shouldScheduleAdjacent(TII, STI: ST, FirstMI: nullptr, SecondMI: AnchorMI)) |
189 | return false; |
190 | |
191 | // Explorer for fusion candidates among the dependencies of the anchor instr. |
192 | for (SDep &Dep : AnchorSU.Preds) { |
193 | // Ignore dependencies other than data or strong ordering. |
194 | if (Dep.isWeak() || isHazard(Dep)) |
195 | continue; |
196 | |
197 | SUnit &DepSU = *Dep.getSUnit(); |
198 | if (DepSU.isBoundaryNode()) |
199 | continue; |
200 | |
201 | // Only chain two instructions together at most. |
202 | const MachineInstr *DepMI = DepSU.getInstr(); |
203 | if (!hasLessThanNumFused(SU: DepSU, FuseLimit: 2) || |
204 | !shouldScheduleAdjacent(TII, STI: ST, FirstMI: DepMI, SecondMI: AnchorMI)) |
205 | continue; |
206 | |
207 | if (fuseInstructionPair(DAG, FirstSU&: DepSU, SecondSU&: AnchorSU)) |
208 | return true; |
209 | } |
210 | |
211 | return false; |
212 | } |
213 | |
214 | std::unique_ptr<ScheduleDAGMutation> |
215 | llvm::createMacroFusionDAGMutation(ArrayRef<MacroFusionPredTy> Predicates, |
216 | bool BranchOnly) { |
217 | if (EnableMacroFusion) |
218 | return std::make_unique<MacroFusion>(args&: Predicates, args: !BranchOnly); |
219 | return nullptr; |
220 | } |
221 | |