1 | //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | //! \file |
10 | //! This pass performs merges of loads and stores on both sides of a |
11 | // diamond (hammock). It hoists the loads and sinks the stores. |
12 | // |
13 | // The algorithm iteratively hoists two loads to the same address out of a |
14 | // diamond (hammock) and merges them into a single load in the header. Similar |
15 | // it sinks and merges two stores to the tail block (footer). The algorithm |
16 | // iterates over the instructions of one side of the diamond and attempts to |
17 | // find a matching load/store on the other side. New tail/footer block may be |
18 | // insterted if the tail/footer block has more predecessors (not only the two |
19 | // predecessors that are forming the diamond). It hoists / sinks when it thinks |
20 | // it safe to do so. This optimization helps with eg. hiding load latencies, |
21 | // triggering if-conversion, and reducing static code size. |
22 | // |
23 | // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist. |
24 | // |
25 | //===----------------------------------------------------------------------===// |
26 | // |
27 | // |
28 | // Example: |
29 | // Diamond shaped code before merge: |
30 | // |
31 | // header: |
32 | // br %cond, label %if.then, label %if.else |
33 | // + + |
34 | // + + |
35 | // + + |
36 | // if.then: if.else: |
37 | // %lt = load %addr_l %le = load %addr_l |
38 | // <use %lt> <use %le> |
39 | // <...> <...> |
40 | // store %st, %addr_s store %se, %addr_s |
41 | // br label %if.end br label %if.end |
42 | // + + |
43 | // + + |
44 | // + + |
45 | // if.end ("footer"): |
46 | // <...> |
47 | // |
48 | // Diamond shaped code after merge: |
49 | // |
50 | // header: |
51 | // %l = load %addr_l |
52 | // br %cond, label %if.then, label %if.else |
53 | // + + |
54 | // + + |
55 | // + + |
56 | // if.then: if.else: |
57 | // <use %l> <use %l> |
58 | // <...> <...> |
59 | // br label %if.end br label %if.end |
60 | // + + |
61 | // + + |
62 | // + + |
63 | // if.end ("footer"): |
64 | // %s.sink = phi [%st, if.then], [%se, if.else] |
65 | // <...> |
66 | // store %s.sink, %addr_s |
67 | // <...> |
68 | // |
69 | // |
70 | //===----------------------- TODO -----------------------------------------===// |
71 | // |
72 | // 1) Generalize to regions other than diamonds |
73 | // 2) Be more aggressive merging memory operations |
74 | // Note that both changes require register pressure control |
75 | // |
76 | //===----------------------------------------------------------------------===// |
77 | |
78 | #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" |
79 | #include "llvm/Analysis/AliasAnalysis.h" |
80 | #include "llvm/Analysis/GlobalsModRef.h" |
81 | #include "llvm/IR/IRBuilder.h" |
82 | #include "llvm/IR/Instructions.h" |
83 | #include "llvm/Support/Debug.h" |
84 | #include "llvm/Support/raw_ostream.h" |
85 | #include "llvm/Transforms/Scalar.h" |
86 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
87 | |
88 | using namespace llvm; |
89 | |
90 | #define DEBUG_TYPE "mldst-motion" |
91 | |
92 | namespace { |
93 | //===----------------------------------------------------------------------===// |
94 | // MergedLoadStoreMotion Pass |
95 | //===----------------------------------------------------------------------===// |
96 | class MergedLoadStoreMotion { |
97 | AliasAnalysis *AA = nullptr; |
98 | |
99 | // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, |
100 | // where Size0 and Size1 are the #instructions on the two sides of |
101 | // the diamond. The constant chosen here is arbitrary. Compiler Time |
102 | // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. |
103 | const int MagicCompileTimeControl = 250; |
104 | |
105 | const bool ; |
106 | public: |
107 | MergedLoadStoreMotion(bool ) : SplitFooterBB(SplitFooterBB) {} |
108 | bool run(Function &F, AliasAnalysis &AA); |
109 | |
110 | private: |
111 | BasicBlock *getDiamondTail(BasicBlock *BB); |
112 | bool isDiamondHead(BasicBlock *BB); |
113 | // Routines for sinking stores |
114 | StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); |
115 | PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); |
116 | bool isStoreSinkBarrierInRange(const Instruction &Start, |
117 | const Instruction &End, MemoryLocation Loc); |
118 | bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const; |
119 | void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand, |
120 | StoreInst *ElseInst); |
121 | bool mergeStores(BasicBlock *BB); |
122 | }; |
123 | } // end anonymous namespace |
124 | |
125 | /// |
126 | /// Return tail block of a diamond. |
127 | /// |
128 | BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) { |
129 | assert(isDiamondHead(BB) && "Basic block is not head of a diamond" ); |
130 | return BB->getTerminator()->getSuccessor(Idx: 0)->getSingleSuccessor(); |
131 | } |
132 | |
133 | /// |
134 | /// True when BB is the head of a diamond (hammock) |
135 | /// |
136 | bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) { |
137 | if (!BB) |
138 | return false; |
139 | auto *BI = dyn_cast<BranchInst>(Val: BB->getTerminator()); |
140 | if (!BI || !BI->isConditional()) |
141 | return false; |
142 | |
143 | BasicBlock *Succ0 = BI->getSuccessor(i: 0); |
144 | BasicBlock *Succ1 = BI->getSuccessor(i: 1); |
145 | |
146 | if (!Succ0->getSinglePredecessor()) |
147 | return false; |
148 | if (!Succ1->getSinglePredecessor()) |
149 | return false; |
150 | |
151 | BasicBlock *Succ0Succ = Succ0->getSingleSuccessor(); |
152 | BasicBlock *Succ1Succ = Succ1->getSingleSuccessor(); |
153 | // Ignore triangles. |
154 | if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ) |
155 | return false; |
156 | return true; |
157 | } |
158 | |
159 | |
160 | /// |
161 | /// True when instruction is a sink barrier for a store |
162 | /// located in Loc |
163 | /// |
164 | /// Whenever an instruction could possibly read or modify the |
165 | /// value being stored or protect against the store from |
166 | /// happening it is considered a sink barrier. |
167 | /// |
168 | bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start, |
169 | const Instruction &End, |
170 | MemoryLocation Loc) { |
171 | for (const Instruction &Inst : |
172 | make_range(x: Start.getIterator(), y: End.getIterator())) |
173 | if (Inst.mayThrow()) |
174 | return true; |
175 | return AA->canInstructionRangeModRef(I1: Start, I2: End, Loc, Mode: ModRefInfo::ModRef); |
176 | } |
177 | |
178 | /// |
179 | /// Check if \p BB contains a store to the same address as \p SI |
180 | /// |
181 | /// \return The store in \p when it is safe to sink. Otherwise return Null. |
182 | /// |
183 | StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, |
184 | StoreInst *Store0) { |
185 | LLVM_DEBUG(dbgs() << "can Sink? : " ; Store0->dump(); dbgs() << "\n" ); |
186 | BasicBlock *BB0 = Store0->getParent(); |
187 | for (Instruction &Inst : reverse(C&: *BB1)) { |
188 | auto *Store1 = dyn_cast<StoreInst>(Val: &Inst); |
189 | if (!Store1) |
190 | continue; |
191 | |
192 | MemoryLocation Loc0 = MemoryLocation::get(SI: Store0); |
193 | MemoryLocation Loc1 = MemoryLocation::get(SI: Store1); |
194 | |
195 | if (AA->isMustAlias(LocA: Loc0, LocB: Loc1) && |
196 | !isStoreSinkBarrierInRange(Start: *Store1->getNextNode(), End: BB1->back(), Loc: Loc1) && |
197 | !isStoreSinkBarrierInRange(Start: *Store0->getNextNode(), End: BB0->back(), Loc: Loc0) && |
198 | Store0->hasSameSpecialState(I2: Store1) && |
199 | CastInst::isBitOrNoopPointerCastable( |
200 | SrcTy: Store0->getValueOperand()->getType(), |
201 | DestTy: Store1->getValueOperand()->getType(), |
202 | DL: Store0->getDataLayout())) |
203 | return Store1; |
204 | } |
205 | return nullptr; |
206 | } |
207 | |
208 | /// |
209 | /// Create a PHI node in BB for the operands of S0 and S1 |
210 | /// |
211 | PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, |
212 | StoreInst *S1) { |
213 | // Create a phi if the values mismatch. |
214 | Value *Opd1 = S0->getValueOperand(); |
215 | Value *Opd2 = S1->getValueOperand(); |
216 | if (Opd1 == Opd2) |
217 | return nullptr; |
218 | |
219 | auto *NewPN = PHINode::Create(Ty: Opd1->getType(), NumReservedValues: 2, NameStr: Opd2->getName() + ".sink" ); |
220 | NewPN->insertBefore(InsertPos: BB->begin()); |
221 | NewPN->applyMergedLocation(LocA: S0->getDebugLoc(), LocB: S1->getDebugLoc()); |
222 | NewPN->addIncoming(V: Opd1, BB: S0->getParent()); |
223 | NewPN->addIncoming(V: Opd2, BB: S1->getParent()); |
224 | return NewPN; |
225 | } |
226 | |
227 | /// |
228 | /// Check if 2 stores can be sunk, optionally together with corresponding GEPs. |
229 | /// |
230 | bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0, |
231 | StoreInst *S1) const { |
232 | if (S0->getPointerOperand() == S1->getPointerOperand()) |
233 | return true; |
234 | auto *GEP0 = dyn_cast<GetElementPtrInst>(Val: S0->getPointerOperand()); |
235 | auto *GEP1 = dyn_cast<GetElementPtrInst>(Val: S1->getPointerOperand()); |
236 | return GEP0 && GEP1 && GEP0->isIdenticalTo(I: GEP1) && GEP0->hasOneUse() && |
237 | (GEP0->getParent() == S0->getParent()) && GEP1->hasOneUse() && |
238 | (GEP1->getParent() == S1->getParent()); |
239 | } |
240 | |
241 | /// |
242 | /// Merge two stores to same address and sink into \p BB |
243 | /// |
244 | /// Optionally also sinks GEP instruction computing the store address |
245 | /// |
246 | void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0, |
247 | StoreInst *S1) { |
248 | Value *Ptr0 = S0->getPointerOperand(); |
249 | Value *Ptr1 = S1->getPointerOperand(); |
250 | // Only one definition? |
251 | LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n" ; BB->dump(); |
252 | dbgs() << "Instruction Left\n" ; S0->dump(); dbgs() << "\n" ; |
253 | dbgs() << "Instruction Right\n" ; S1->dump(); dbgs() << "\n" ); |
254 | // Hoist the instruction. |
255 | BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); |
256 | // Intersect optional metadata. |
257 | S0->andIRFlags(V: S1); |
258 | S0->dropUnknownNonDebugMetadata(); |
259 | S0->applyMergedLocation(LocA: S0->getDebugLoc(), LocB: S1->getDebugLoc()); |
260 | S0->mergeDIAssignID(SourceInstructions: S1); |
261 | |
262 | // Insert bitcast for conflicting typed stores (or just use original value if |
263 | // same type). |
264 | IRBuilder<> Builder(S0); |
265 | auto Cast = Builder.CreateBitOrPointerCast(V: S0->getValueOperand(), |
266 | DestTy: S1->getValueOperand()->getType()); |
267 | S0->setOperand(i_nocapture: 0, Val_nocapture: Cast); |
268 | |
269 | // Create the new store to be inserted at the join point. |
270 | StoreInst *SNew = cast<StoreInst>(Val: S0->clone()); |
271 | SNew->insertBefore(InsertPos: InsertPt); |
272 | // New PHI operand? Use it. |
273 | if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) |
274 | SNew->setOperand(i_nocapture: 0, Val_nocapture: NewPN); |
275 | S0->eraseFromParent(); |
276 | S1->eraseFromParent(); |
277 | |
278 | if (Ptr0 != Ptr1) { |
279 | auto *GEP0 = cast<GetElementPtrInst>(Val: Ptr0); |
280 | auto *GEP1 = cast<GetElementPtrInst>(Val: Ptr1); |
281 | Instruction *GEPNew = GEP0->clone(); |
282 | GEPNew->insertBefore(InsertPos: SNew); |
283 | GEPNew->applyMergedLocation(LocA: GEP0->getDebugLoc(), LocB: GEP1->getDebugLoc()); |
284 | SNew->setOperand(i_nocapture: 1, Val_nocapture: GEPNew); |
285 | GEP0->replaceAllUsesWith(V: GEPNew); |
286 | GEP0->eraseFromParent(); |
287 | GEP1->replaceAllUsesWith(V: GEPNew); |
288 | GEP1->eraseFromParent(); |
289 | } |
290 | } |
291 | |
292 | /// |
293 | /// True when two stores are equivalent and can sink into the footer |
294 | /// |
295 | /// Starting from a diamond head block, iterate over the instructions in one |
296 | /// successor block and try to match a store in the second successor. |
297 | /// |
298 | bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) { |
299 | |
300 | bool MergedStores = false; |
301 | BasicBlock *TailBB = getDiamondTail(BB: HeadBB); |
302 | BasicBlock *SinkBB = TailBB; |
303 | assert(SinkBB && "Footer of a diamond cannot be empty" ); |
304 | |
305 | succ_iterator SI = succ_begin(BB: HeadBB); |
306 | assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors" ); |
307 | BasicBlock *Pred0 = *SI; |
308 | ++SI; |
309 | assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor" ); |
310 | BasicBlock *Pred1 = *SI; |
311 | // tail block of a diamond/hammock? |
312 | if (Pred0 == Pred1) |
313 | return false; // No. |
314 | // bail out early if we can not merge into the footer BB |
315 | if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(N: 3)) |
316 | return false; |
317 | // #Instructions in Pred1 for Compile Time Control |
318 | auto InstsNoDbg = Pred1->instructionsWithoutDebug(); |
319 | int Size1 = std::distance(first: InstsNoDbg.begin(), last: InstsNoDbg.end()); |
320 | int NStores = 0; |
321 | |
322 | for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend(); |
323 | RBI != RBE;) { |
324 | |
325 | Instruction *I = &*RBI; |
326 | ++RBI; |
327 | |
328 | // Don't sink non-simple (atomic, volatile) stores. |
329 | auto *S0 = dyn_cast<StoreInst>(Val: I); |
330 | if (!S0 || !S0->isSimple()) |
331 | continue; |
332 | |
333 | ++NStores; |
334 | if (NStores * Size1 >= MagicCompileTimeControl) |
335 | break; |
336 | if (StoreInst *S1 = canSinkFromBlock(BB1: Pred1, Store0: S0)) { |
337 | if (!canSinkStoresAndGEPs(S0, S1)) |
338 | // Don't attempt to sink below stores that had to stick around |
339 | // But after removal of a store and some of its feeding |
340 | // instruction search again from the beginning since the iterator |
341 | // is likely stale at this point. |
342 | break; |
343 | |
344 | if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(N: 3)) { |
345 | // We have more than 2 predecessors. Insert a new block |
346 | // postdominating 2 predecessors we're going to sink from. |
347 | SinkBB = SplitBlockPredecessors(BB: TailBB, Preds: {Pred0, Pred1}, Suffix: ".sink.split" ); |
348 | if (!SinkBB) |
349 | break; |
350 | } |
351 | |
352 | MergedStores = true; |
353 | sinkStoresAndGEPs(BB: SinkBB, S0, S1); |
354 | RBI = Pred0->rbegin(); |
355 | RBE = Pred0->rend(); |
356 | LLVM_DEBUG(dbgs() << "Search again\n" ; Instruction *I = &*RBI; I->dump()); |
357 | } |
358 | } |
359 | return MergedStores; |
360 | } |
361 | |
362 | bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) { |
363 | this->AA = &AA; |
364 | |
365 | bool Changed = false; |
366 | LLVM_DEBUG(dbgs() << "Instruction Merger\n" ); |
367 | |
368 | // Merge unconditional branches, allowing PRE to catch more |
369 | // optimization opportunities. |
370 | // This loop doesn't care about newly inserted/split blocks |
371 | // since they never will be diamond heads. |
372 | for (BasicBlock &BB : make_early_inc_range(Range&: F)) |
373 | // Hoist equivalent loads and sink stores |
374 | // outside diamonds when possible |
375 | if (isDiamondHead(BB: &BB)) |
376 | Changed |= mergeStores(HeadBB: &BB); |
377 | return Changed; |
378 | } |
379 | |
380 | PreservedAnalyses |
381 | MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) { |
382 | MergedLoadStoreMotion Impl(Options.SplitFooterBB); |
383 | auto &AA = AM.getResult<AAManager>(IR&: F); |
384 | if (!Impl.run(F, AA)) |
385 | return PreservedAnalyses::all(); |
386 | |
387 | PreservedAnalyses PA; |
388 | if (!Options.SplitFooterBB) |
389 | PA.preserveSet<CFGAnalyses>(); |
390 | return PA; |
391 | } |
392 | |
393 | void MergedLoadStoreMotionPass::printPipeline( |
394 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
395 | static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline( |
396 | OS, MapClassName2PassName); |
397 | OS << '<'; |
398 | OS << (Options.SplitFooterBB ? "" : "no-" ) << "split-footer-bb" ; |
399 | OS << '>'; |
400 | } |
401 | |