1 | //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | //! \file |
10 | //! This pass performs merges of loads and stores on both sides of a |
11 | // diamond (hammock). It hoists the loads and sinks the stores. |
12 | // |
13 | // The algorithm iteratively hoists two loads to the same address out of a |
14 | // diamond (hammock) and merges them into a single load in the header. Similar |
15 | // it sinks and merges two stores to the tail block (footer). The algorithm |
16 | // iterates over the instructions of one side of the diamond and attempts to |
17 | // find a matching load/store on the other side. New tail/footer block may be |
18 | // insterted if the tail/footer block has more predecessors (not only the two |
19 | // predecessors that are forming the diamond). It hoists / sinks when it thinks |
20 | // it safe to do so. This optimization helps with eg. hiding load latencies, |
21 | // triggering if-conversion, and reducing static code size. |
22 | // |
23 | // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist. |
24 | // |
25 | //===----------------------------------------------------------------------===// |
26 | // |
27 | // |
28 | // Example: |
29 | // Diamond shaped code before merge: |
30 | // |
31 | // header: |
32 | // br %cond, label %if.then, label %if.else |
33 | // + + |
34 | // + + |
35 | // + + |
36 | // if.then: if.else: |
37 | // %lt = load %addr_l %le = load %addr_l |
38 | // <use %lt> <use %le> |
39 | // <...> <...> |
40 | // store %st, %addr_s store %se, %addr_s |
41 | // br label %if.end br label %if.end |
42 | // + + |
43 | // + + |
44 | // + + |
45 | // if.end ("footer"): |
46 | // <...> |
47 | // |
48 | // Diamond shaped code after merge: |
49 | // |
50 | // header: |
51 | // %l = load %addr_l |
52 | // br %cond, label %if.then, label %if.else |
53 | // + + |
54 | // + + |
55 | // + + |
56 | // if.then: if.else: |
57 | // <use %l> <use %l> |
58 | // <...> <...> |
59 | // br label %if.end br label %if.end |
60 | // + + |
61 | // + + |
62 | // + + |
63 | // if.end ("footer"): |
64 | // %s.sink = phi [%st, if.then], [%se, if.else] |
65 | // <...> |
66 | // store %s.sink, %addr_s |
67 | // <...> |
68 | // |
69 | // |
70 | //===----------------------- TODO -----------------------------------------===// |
71 | // |
72 | // 1) Generalize to regions other than diamonds |
73 | // 2) Be more aggressive merging memory operations |
74 | // Note that both changes require register pressure control |
75 | // |
76 | //===----------------------------------------------------------------------===// |
77 | |
78 | #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" |
79 | #include "llvm/Analysis/AliasAnalysis.h" |
80 | #include "llvm/Analysis/GlobalsModRef.h" |
81 | #include "llvm/IR/IRBuilder.h" |
82 | #include "llvm/IR/Instructions.h" |
83 | #include "llvm/Support/Debug.h" |
84 | #include "llvm/Support/raw_ostream.h" |
85 | #include "llvm/Transforms/Scalar.h" |
86 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
87 | #include "llvm/Transforms/Utils/Local.h" |
88 | |
89 | using namespace llvm; |
90 | |
91 | #define DEBUG_TYPE "mldst-motion" |
92 | |
93 | namespace { |
94 | //===----------------------------------------------------------------------===// |
95 | // MergedLoadStoreMotion Pass |
96 | //===----------------------------------------------------------------------===// |
97 | class MergedLoadStoreMotion { |
98 | AliasAnalysis *AA = nullptr; |
99 | |
100 | // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, |
101 | // where Size0 and Size1 are the #instructions on the two sides of |
102 | // the diamond. The constant chosen here is arbitrary. Compiler Time |
103 | // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. |
104 | const int MagicCompileTimeControl = 250; |
105 | |
106 | const bool ; |
107 | public: |
108 | MergedLoadStoreMotion(bool ) : SplitFooterBB(SplitFooterBB) {} |
109 | bool run(Function &F, AliasAnalysis &AA); |
110 | |
111 | private: |
112 | BasicBlock *getDiamondTail(BasicBlock *BB); |
113 | bool isDiamondHead(BasicBlock *BB); |
114 | // Routines for sinking stores |
115 | StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); |
116 | PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); |
117 | bool isStoreSinkBarrierInRange(const Instruction &Start, |
118 | const Instruction &End, MemoryLocation Loc); |
119 | bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const; |
120 | void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand, |
121 | StoreInst *ElseInst); |
122 | bool mergeStores(BasicBlock *BB); |
123 | }; |
124 | } // end anonymous namespace |
125 | |
126 | /// |
127 | /// Return tail block of a diamond. |
128 | /// |
129 | BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) { |
130 | assert(isDiamondHead(BB) && "Basic block is not head of a diamond" ); |
131 | return BB->getTerminator()->getSuccessor(Idx: 0)->getSingleSuccessor(); |
132 | } |
133 | |
134 | /// |
135 | /// True when BB is the head of a diamond (hammock) |
136 | /// |
137 | bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) { |
138 | if (!BB) |
139 | return false; |
140 | auto *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
141 | if (!BI || !BI->isConditional()) |
142 | return false; |
143 | |
144 | BasicBlock *Succ0 = BI->getSuccessor(i: 0); |
145 | BasicBlock *Succ1 = BI->getSuccessor(i: 1); |
146 | |
147 | if (!Succ0->getSinglePredecessor()) |
148 | return false; |
149 | if (!Succ1->getSinglePredecessor()) |
150 | return false; |
151 | |
152 | BasicBlock *Succ0Succ = Succ0->getSingleSuccessor(); |
153 | BasicBlock *Succ1Succ = Succ1->getSingleSuccessor(); |
154 | // Ignore triangles. |
155 | if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ) |
156 | return false; |
157 | return true; |
158 | } |
159 | |
160 | |
161 | /// |
162 | /// True when instruction is a sink barrier for a store |
163 | /// located in Loc |
164 | /// |
165 | /// Whenever an instruction could possibly read or modify the |
166 | /// value being stored or protect against the store from |
167 | /// happening it is considered a sink barrier. |
168 | /// |
169 | bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start, |
170 | const Instruction &End, |
171 | MemoryLocation Loc) { |
172 | for (const Instruction &Inst : |
173 | make_range(x: Start.getIterator(), y: End.getIterator())) |
174 | if (Inst.mayThrow()) |
175 | return true; |
176 | return AA->canInstructionRangeModRef(I1: Start, I2: End, Loc, Mode: ModRefInfo::ModRef); |
177 | } |
178 | |
179 | /// |
180 | /// Check if \p BB contains a store to the same address as \p SI |
181 | /// |
182 | /// \return The store in \p when it is safe to sink. Otherwise return Null. |
183 | /// |
184 | StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, |
185 | StoreInst *Store0) { |
186 | LLVM_DEBUG(dbgs() << "can Sink? : " ; Store0->dump(); dbgs() << "\n" ); |
187 | BasicBlock *BB0 = Store0->getParent(); |
188 | for (Instruction &Inst : reverse(C&: *BB1)) { |
189 | auto *Store1 = dyn_cast<StoreInst>(Val: &Inst); |
190 | if (!Store1) |
191 | continue; |
192 | |
193 | MemoryLocation Loc0 = MemoryLocation::get(SI: Store0); |
194 | MemoryLocation Loc1 = MemoryLocation::get(SI: Store1); |
195 | |
196 | if (AA->isMustAlias(LocA: Loc0, LocB: Loc1) && |
197 | !isStoreSinkBarrierInRange(Start: *Store1->getNextNode(), End: BB1->back(), Loc: Loc1) && |
198 | !isStoreSinkBarrierInRange(Start: *Store0->getNextNode(), End: BB0->back(), Loc: Loc0) && |
199 | Store0->hasSameSpecialState(I2: Store1) && |
200 | CastInst::isBitOrNoopPointerCastable( |
201 | SrcTy: Store0->getValueOperand()->getType(), |
202 | DestTy: Store1->getValueOperand()->getType(), |
203 | DL: Store0->getDataLayout())) |
204 | return Store1; |
205 | } |
206 | return nullptr; |
207 | } |
208 | |
209 | /// |
210 | /// Create a PHI node in BB for the operands of S0 and S1 |
211 | /// |
212 | PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, |
213 | StoreInst *S1) { |
214 | // Create a phi if the values mismatch. |
215 | Value *Opd1 = S0->getValueOperand(); |
216 | Value *Opd2 = S1->getValueOperand(); |
217 | if (Opd1 == Opd2) |
218 | return nullptr; |
219 | |
220 | auto *NewPN = PHINode::Create(Ty: Opd1->getType(), NumReservedValues: 2, NameStr: Opd2->getName() + ".sink" ); |
221 | NewPN->insertBefore(InsertPos: BB->begin()); |
222 | NewPN->applyMergedLocation(LocA: S0->getDebugLoc(), LocB: S1->getDebugLoc()); |
223 | NewPN->addIncoming(V: Opd1, BB: S0->getParent()); |
224 | NewPN->addIncoming(V: Opd2, BB: S1->getParent()); |
225 | return NewPN; |
226 | } |
227 | |
228 | /// |
229 | /// Check if 2 stores can be sunk, optionally together with corresponding GEPs. |
230 | /// |
231 | bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0, |
232 | StoreInst *S1) const { |
233 | if (S0->getPointerOperand() == S1->getPointerOperand()) |
234 | return true; |
235 | auto *GEP0 = dyn_cast<GetElementPtrInst>(Val: S0->getPointerOperand()); |
236 | auto *GEP1 = dyn_cast<GetElementPtrInst>(Val: S1->getPointerOperand()); |
237 | return GEP0 && GEP1 && GEP0->isIdenticalTo(I: GEP1) && GEP0->hasOneUse() && |
238 | (GEP0->getParent() == S0->getParent()) && GEP1->hasOneUse() && |
239 | (GEP1->getParent() == S1->getParent()); |
240 | } |
241 | |
242 | /// |
243 | /// Merge two stores to same address and sink into \p BB |
244 | /// |
245 | /// Optionally also sinks GEP instruction computing the store address |
246 | /// |
247 | void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0, |
248 | StoreInst *S1) { |
249 | Value *Ptr0 = S0->getPointerOperand(); |
250 | Value *Ptr1 = S1->getPointerOperand(); |
251 | // Only one definition? |
252 | LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n" ; BB->dump(); |
253 | dbgs() << "Instruction Left\n" ; S0->dump(); dbgs() << "\n" ; |
254 | dbgs() << "Instruction Right\n" ; S1->dump(); dbgs() << "\n" ); |
255 | // Hoist the instruction. |
256 | BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); |
257 | // Intersect optional metadata. |
258 | S0->andIRFlags(V: S1); |
259 | |
260 | combineMetadataForCSE(K: S0, J: S1, DoesKMove: true); |
261 | S0->applyMergedLocation(LocA: S0->getDebugLoc(), LocB: S1->getDebugLoc()); |
262 | S0->mergeDIAssignID(SourceInstructions: S1); |
263 | |
264 | // Insert bitcast for conflicting typed stores (or just use original value if |
265 | // same type). |
266 | IRBuilder<> Builder(S0); |
267 | auto Cast = Builder.CreateBitOrPointerCast(V: S0->getValueOperand(), |
268 | DestTy: S1->getValueOperand()->getType()); |
269 | S0->setOperand(i_nocapture: 0, Val_nocapture: Cast); |
270 | |
271 | // Create the new store to be inserted at the join point. |
272 | StoreInst *SNew = cast<StoreInst>(Val: S0->clone()); |
273 | SNew->insertBefore(InsertPos: InsertPt); |
274 | // New PHI operand? Use it. |
275 | if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) |
276 | SNew->setOperand(i_nocapture: 0, Val_nocapture: NewPN); |
277 | S0->eraseFromParent(); |
278 | S1->eraseFromParent(); |
279 | |
280 | if (Ptr0 != Ptr1) { |
281 | auto *GEP0 = cast<GetElementPtrInst>(Val: Ptr0); |
282 | auto *GEP1 = cast<GetElementPtrInst>(Val: Ptr1); |
283 | Instruction *GEPNew = GEP0->clone(); |
284 | GEPNew->insertBefore(InsertPos: SNew->getIterator()); |
285 | GEPNew->applyMergedLocation(LocA: GEP0->getDebugLoc(), LocB: GEP1->getDebugLoc()); |
286 | SNew->setOperand(i_nocapture: 1, Val_nocapture: GEPNew); |
287 | GEP0->replaceAllUsesWith(V: GEPNew); |
288 | GEP0->eraseFromParent(); |
289 | GEP1->replaceAllUsesWith(V: GEPNew); |
290 | GEP1->eraseFromParent(); |
291 | } |
292 | } |
293 | |
294 | /// |
295 | /// True when two stores are equivalent and can sink into the footer |
296 | /// |
297 | /// Starting from a diamond head block, iterate over the instructions in one |
298 | /// successor block and try to match a store in the second successor. |
299 | /// |
300 | bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) { |
301 | |
302 | bool MergedStores = false; |
303 | BasicBlock *TailBB = getDiamondTail(BB: HeadBB); |
304 | BasicBlock *SinkBB = TailBB; |
305 | assert(SinkBB && "Footer of a diamond cannot be empty" ); |
306 | |
307 | succ_iterator SI = succ_begin(BB: HeadBB); |
308 | assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors" ); |
309 | BasicBlock *Pred0 = *SI; |
310 | ++SI; |
311 | assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor" ); |
312 | BasicBlock *Pred1 = *SI; |
313 | // tail block of a diamond/hammock? |
314 | if (Pred0 == Pred1) |
315 | return false; // No. |
316 | // bail out early if we can not merge into the footer BB |
317 | if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(N: 3)) |
318 | return false; |
319 | // #Instructions in Pred1 for Compile Time Control |
320 | auto InstsNoDbg = Pred1->instructionsWithoutDebug(); |
321 | int Size1 = std::distance(first: InstsNoDbg.begin(), last: InstsNoDbg.end()); |
322 | int NStores = 0; |
323 | |
324 | for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend(); |
325 | RBI != RBE;) { |
326 | |
327 | Instruction *I = &*RBI; |
328 | ++RBI; |
329 | |
330 | // Don't sink non-simple (atomic, volatile) stores. |
331 | auto *S0 = dyn_cast<StoreInst>(Val: I); |
332 | if (!S0 || !S0->isSimple()) |
333 | continue; |
334 | |
335 | ++NStores; |
336 | if (NStores * Size1 >= MagicCompileTimeControl) |
337 | break; |
338 | if (StoreInst *S1 = canSinkFromBlock(BB1: Pred1, Store0: S0)) { |
339 | if (!canSinkStoresAndGEPs(S0, S1)) |
340 | // Don't attempt to sink below stores that had to stick around |
341 | // But after removal of a store and some of its feeding |
342 | // instruction search again from the beginning since the iterator |
343 | // is likely stale at this point. |
344 | break; |
345 | |
346 | if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(N: 3)) { |
347 | // We have more than 2 predecessors. Insert a new block |
348 | // postdominating 2 predecessors we're going to sink from. |
349 | SinkBB = SplitBlockPredecessors(BB: TailBB, Preds: {Pred0, Pred1}, Suffix: ".sink.split" ); |
350 | if (!SinkBB) |
351 | break; |
352 | } |
353 | |
354 | MergedStores = true; |
355 | sinkStoresAndGEPs(BB: SinkBB, S0, S1); |
356 | RBI = Pred0->rbegin(); |
357 | RBE = Pred0->rend(); |
358 | LLVM_DEBUG(dbgs() << "Search again\n" ; Instruction *I = &*RBI; I->dump()); |
359 | } |
360 | } |
361 | return MergedStores; |
362 | } |
363 | |
364 | bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) { |
365 | this->AA = &AA; |
366 | |
367 | bool Changed = false; |
368 | LLVM_DEBUG(dbgs() << "Instruction Merger\n" ); |
369 | |
370 | // Merge unconditional branches, allowing PRE to catch more |
371 | // optimization opportunities. |
372 | // This loop doesn't care about newly inserted/split blocks |
373 | // since they never will be diamond heads. |
374 | for (BasicBlock &BB : make_early_inc_range(Range&: F)) |
375 | // Hoist equivalent loads and sink stores |
376 | // outside diamonds when possible |
377 | if (isDiamondHead(BB: &BB)) |
378 | Changed |= mergeStores(HeadBB: &BB); |
379 | return Changed; |
380 | } |
381 | |
382 | PreservedAnalyses |
383 | MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) { |
384 | MergedLoadStoreMotion Impl(Options.SplitFooterBB); |
385 | auto &AA = AM.getResult<AAManager>(IR&: F); |
386 | if (!Impl.run(F, AA)) |
387 | return PreservedAnalyses::all(); |
388 | |
389 | PreservedAnalyses PA; |
390 | if (!Options.SplitFooterBB) |
391 | PA.preserveSet<CFGAnalyses>(); |
392 | return PA; |
393 | } |
394 | |
395 | void MergedLoadStoreMotionPass::printPipeline( |
396 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
397 | static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline( |
398 | OS, MapClassName2PassName); |
399 | OS << '<'; |
400 | OS << (Options.SplitFooterBB ? "" : "no-" ) << "split-footer-bb" ; |
401 | OS << '>'; |
402 | } |
403 | |