1 | //===- R600MachineCFGStructurizer.cpp - CFG Structurizer ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //==-----------------------------------------------------------------------===// |
8 | |
9 | #include "MCTargetDesc/R600MCTargetDesc.h" |
10 | #include "R600.h" |
11 | #include "R600RegisterInfo.h" |
12 | #include "R600Subtarget.h" |
13 | #include "llvm/ADT/DepthFirstIterator.h" |
14 | #include "llvm/ADT/SCCIterator.h" |
15 | #include "llvm/ADT/Statistic.h" |
16 | #include "llvm/CodeGen/MachineFunction.h" |
17 | #include "llvm/CodeGen/MachineFunctionPass.h" |
18 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
19 | #include "llvm/CodeGen/MachineLoopInfo.h" |
20 | #include "llvm/CodeGen/MachinePostDominators.h" |
21 | #include "llvm/InitializePasses.h" |
22 | |
23 | using namespace llvm; |
24 | |
25 | #define DEBUG_TYPE "structcfg" |
26 | |
27 | enum { DEFAULT_VEC_SLOTS = 8 }; |
28 | |
29 | // TODO: move-begin. |
30 | |
31 | //===----------------------------------------------------------------------===// |
32 | // |
33 | // Statistics for CFGStructurizer. |
34 | // |
35 | //===----------------------------------------------------------------------===// |
36 | |
37 | STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " |
38 | "matched" ); |
39 | STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " |
40 | "matched" ); |
41 | STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks" ); |
42 | STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions" ); |
43 | |
44 | namespace llvm { |
45 | |
46 | void initializeR600MachineCFGStructurizerPass(PassRegistry &); |
47 | |
48 | } // end namespace llvm |
49 | |
50 | namespace { |
51 | |
52 | //===----------------------------------------------------------------------===// |
53 | // |
54 | // Miscellaneous utility for CFGStructurizer. |
55 | // |
56 | //===----------------------------------------------------------------------===// |
57 | |
58 | #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); |
59 | |
60 | #define SHOWNEWBLK(b, msg) \ |
61 | LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ |
62 | dbgs() << "\n";); |
63 | |
64 | #define SHOWBLK_DETAIL(b, msg) \ |
65 | LLVM_DEBUG(if (b) { \ |
66 | dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ |
67 | b->print(dbgs()); \ |
68 | dbgs() << "\n"; \ |
69 | }); |
70 | |
71 | #define INVALIDSCCNUM -1 |
72 | |
73 | //===----------------------------------------------------------------------===// |
74 | // |
75 | // supporting data structure for CFGStructurizer |
76 | // |
77 | //===----------------------------------------------------------------------===// |
78 | |
79 | class BlockInformation { |
80 | public: |
81 | bool IsRetired = false; |
82 | int SccNum = INVALIDSCCNUM; |
83 | |
84 | BlockInformation() = default; |
85 | }; |
86 | |
87 | //===----------------------------------------------------------------------===// |
88 | // |
89 | // CFGStructurizer |
90 | // |
91 | //===----------------------------------------------------------------------===// |
92 | |
93 | class R600MachineCFGStructurizer : public MachineFunctionPass { |
94 | public: |
95 | using MBBVector = SmallVector<MachineBasicBlock *, 32>; |
96 | using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>; |
97 | using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>; |
98 | |
99 | enum PathToKind { |
100 | Not_SinglePath = 0, |
101 | SinglePath_InPath = 1, |
102 | SinglePath_NotInPath = 2 |
103 | }; |
104 | |
105 | static char ID; |
106 | |
107 | R600MachineCFGStructurizer() : MachineFunctionPass(ID) { |
108 | initializeR600MachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); |
109 | } |
110 | |
111 | StringRef getPassName() const override { |
112 | return "AMDGPU Control Flow Graph structurizer Pass" ; |
113 | } |
114 | |
115 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
116 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
117 | AU.addRequired<MachinePostDominatorTreeWrapperPass>(); |
118 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
119 | MachineFunctionPass::getAnalysisUsage(AU); |
120 | } |
121 | |
122 | /// Perform the CFG structurization |
123 | bool run(); |
124 | |
125 | /// Perform the CFG preparation |
126 | /// This step will remove every unconditionnal/dead jump instructions and make |
127 | /// sure all loops have an exit block |
128 | bool prepare(); |
129 | |
130 | bool runOnMachineFunction(MachineFunction &MF) override { |
131 | // FIXME: This pass causes verification failures. |
132 | MF.getProperties().set( |
133 | MachineFunctionProperties::Property::FailsVerification); |
134 | |
135 | TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); |
136 | TRI = &TII->getRegisterInfo(); |
137 | LLVM_DEBUG(MF.dump();); |
138 | OrderedBlks.clear(); |
139 | Visited.clear(); |
140 | FuncRep = &MF; |
141 | MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
142 | LLVM_DEBUG(dbgs() << "LoopInfo:\n" ; PrintLoopinfo(*MLI);); |
143 | MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
144 | LLVM_DEBUG(MDT->print(dbgs());); |
145 | PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); |
146 | LLVM_DEBUG(PDT->print(dbgs());); |
147 | prepare(); |
148 | run(); |
149 | LLVM_DEBUG(MF.dump();); |
150 | return true; |
151 | } |
152 | |
153 | protected: |
154 | MachineDominatorTree *MDT; |
155 | MachinePostDominatorTree *PDT; |
156 | MachineLoopInfo *MLI; |
157 | const R600InstrInfo *TII = nullptr; |
158 | const R600RegisterInfo *TRI = nullptr; |
159 | |
160 | // PRINT FUNCTIONS |
161 | /// Print the ordered Blocks. |
162 | void printOrderedBlocks() const { |
163 | size_t i = 0; |
164 | for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), |
165 | iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { |
166 | dbgs() << "BB" << (*iterBlk)->getNumber(); |
167 | dbgs() << "(" << getSCCNum(MBB: *iterBlk) << "," << (*iterBlk)->size() << ")" ; |
168 | if (i != 0 && i % 10 == 0) { |
169 | dbgs() << "\n" ; |
170 | } else { |
171 | dbgs() << " " ; |
172 | } |
173 | } |
174 | } |
175 | |
176 | static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { |
177 | for (const MachineLoop *L : LoopInfo) |
178 | L->print(OS&: dbgs()); |
179 | } |
180 | |
181 | // UTILITY FUNCTIONS |
182 | int getSCCNum(MachineBasicBlock *MBB) const; |
183 | MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; |
184 | bool hasBackEdge(MachineBasicBlock *MBB) const; |
185 | bool isRetiredBlock(MachineBasicBlock *MBB) const; |
186 | bool isActiveLoophead(MachineBasicBlock *MBB) const; |
187 | PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, |
188 | bool AllowSideEntry = true) const; |
189 | int countActiveBlock(MBBVector::const_iterator It, |
190 | MBBVector::const_iterator E) const; |
191 | bool needMigrateBlock(MachineBasicBlock *MBB) const; |
192 | |
193 | // Utility Functions |
194 | void reversePredicateSetter(MachineBasicBlock::iterator I, |
195 | MachineBasicBlock &MBB); |
196 | /// Compute the reversed DFS post order of Blocks |
197 | void orderBlocks(MachineFunction *MF); |
198 | |
199 | // Function originally from CFGStructTraits |
200 | void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, |
201 | const DebugLoc &DL = DebugLoc()); |
202 | MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, |
203 | const DebugLoc &DL = DebugLoc()); |
204 | MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); |
205 | void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, |
206 | const DebugLoc &DL); |
207 | void insertCondBranchBefore(MachineBasicBlock *MBB, |
208 | MachineBasicBlock::iterator I, int NewOpcode, |
209 | int RegNum, const DebugLoc &DL); |
210 | |
211 | static int getBranchNzeroOpcode(int OldOpcode); |
212 | static int getBranchZeroOpcode(int OldOpcode); |
213 | static int getContinueNzeroOpcode(int OldOpcode); |
214 | static int getContinueZeroOpcode(int OldOpcode); |
215 | static MachineBasicBlock *getTrueBranch(MachineInstr *MI); |
216 | static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); |
217 | static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, |
218 | MachineInstr *MI); |
219 | static bool isCondBranch(MachineInstr *MI); |
220 | static bool isUncondBranch(MachineInstr *MI); |
221 | static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); |
222 | static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); |
223 | |
224 | /// The correct naming for this is getPossibleLoopendBlockBranchInstr. |
225 | /// |
226 | /// BB with backward-edge could have move instructions after the branch |
227 | /// instruction. Such move instruction "belong to" the loop backward-edge. |
228 | MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); |
229 | |
230 | static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); |
231 | static bool isReturnBlock(MachineBasicBlock *MBB); |
232 | static void cloneSuccessorList(MachineBasicBlock *DstMBB, |
233 | MachineBasicBlock *SrcMBB); |
234 | static MachineBasicBlock *clone(MachineBasicBlock *MBB); |
235 | |
236 | /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose |
237 | /// because the AMDGPU instruction is not recognized as terminator fix this |
238 | /// and retire this routine |
239 | void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, |
240 | MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); |
241 | |
242 | static void wrapup(MachineBasicBlock *MBB); |
243 | |
244 | int patternMatch(MachineBasicBlock *MBB); |
245 | int patternMatchGroup(MachineBasicBlock *MBB); |
246 | int serialPatternMatch(MachineBasicBlock *MBB); |
247 | int ifPatternMatch(MachineBasicBlock *MBB); |
248 | int loopendPatternMatch(); |
249 | int mergeLoop(MachineLoop *LoopRep); |
250 | |
251 | /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in |
252 | /// the same loop with LoopLandInfo without explicitly keeping track of |
253 | /// loopContBlks and loopBreakBlks, this is a method to get the information. |
254 | bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, |
255 | MachineBasicBlock *Src2MBB); |
256 | int handleJumpintoIf(MachineBasicBlock *HeadMBB, |
257 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); |
258 | int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, |
259 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); |
260 | int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, |
261 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, |
262 | MachineBasicBlock **LandMBBPtr); |
263 | void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, |
264 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, |
265 | MachineBasicBlock *LandMBB, bool Detail = false); |
266 | int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, |
267 | MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); |
268 | void mergeSerialBlock(MachineBasicBlock *DstMBB, |
269 | MachineBasicBlock *SrcMBB); |
270 | |
271 | void mergeIfthenelseBlock(MachineInstr *BranchMI, |
272 | MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, |
273 | MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); |
274 | void mergeLooplandBlock(MachineBasicBlock *DstMBB, |
275 | MachineBasicBlock *LandMBB); |
276 | void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, |
277 | MachineBasicBlock *LandMBB); |
278 | void settleLoopcontBlock(MachineBasicBlock *ContingMBB, |
279 | MachineBasicBlock *ContMBB); |
280 | |
281 | /// normalizeInfiniteLoopExit change |
282 | /// B1: |
283 | /// uncond_br LoopHeader |
284 | /// |
285 | /// to |
286 | /// B1: |
287 | /// cond_br 1 LoopHeader dummyExit |
288 | /// and return the newly added dummy exit block |
289 | MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); |
290 | void removeUnconditionalBranch(MachineBasicBlock *MBB); |
291 | |
292 | /// Remove duplicate branches instructions in a block. |
293 | /// For instance |
294 | /// B0: |
295 | /// cond_br X B1 B2 |
296 | /// cond_br X B1 B2 |
297 | /// is transformed to |
298 | /// B0: |
299 | /// cond_br X B1 B2 |
300 | void removeRedundantConditionalBranch(MachineBasicBlock *MBB); |
301 | |
302 | void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); |
303 | void removeSuccessor(MachineBasicBlock *MBB); |
304 | MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, |
305 | MachineBasicBlock *PredMBB); |
306 | void migrateInstruction(MachineBasicBlock *SrcMBB, |
307 | MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); |
308 | void recordSccnum(MachineBasicBlock *MBB, int SCCNum); |
309 | void retireBlock(MachineBasicBlock *MBB); |
310 | |
311 | private: |
312 | MBBInfoMap BlockInfoMap; |
313 | LoopLandInfoMap LLInfoMap; |
314 | std::map<MachineLoop *, bool> Visited; |
315 | MachineFunction *FuncRep; |
316 | SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; |
317 | }; |
318 | |
319 | } // end anonymous namespace |
320 | |
321 | char R600MachineCFGStructurizer::ID = 0; |
322 | |
323 | int R600MachineCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { |
324 | MBBInfoMap::const_iterator It = BlockInfoMap.find(x: MBB); |
325 | if (It == BlockInfoMap.end()) |
326 | return INVALIDSCCNUM; |
327 | return (*It).second->SccNum; |
328 | } |
329 | |
330 | MachineBasicBlock *R600MachineCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) |
331 | const { |
332 | LoopLandInfoMap::const_iterator It = LLInfoMap.find(x: LoopRep); |
333 | if (It == LLInfoMap.end()) |
334 | return nullptr; |
335 | return (*It).second; |
336 | } |
337 | |
338 | bool R600MachineCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { |
339 | MachineLoop *LoopRep = MLI->getLoopFor(BB: MBB); |
340 | if (!LoopRep) |
341 | return false; |
342 | MachineBasicBlock * = LoopRep->getHeader(); |
343 | return MBB->isSuccessor(MBB: LoopHeader); |
344 | } |
345 | |
346 | bool R600MachineCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { |
347 | MBBInfoMap::const_iterator It = BlockInfoMap.find(x: MBB); |
348 | if (It == BlockInfoMap.end()) |
349 | return false; |
350 | return (*It).second->IsRetired; |
351 | } |
352 | |
353 | bool R600MachineCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { |
354 | MachineLoop *LoopRep = MLI->getLoopFor(BB: MBB); |
355 | while (LoopRep && LoopRep->getHeader() == MBB) { |
356 | MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); |
357 | if(!LoopLand) |
358 | return true; |
359 | if (!isRetiredBlock(MBB: LoopLand)) |
360 | return true; |
361 | LoopRep = LoopRep->getParentLoop(); |
362 | } |
363 | return false; |
364 | } |
365 | |
366 | R600MachineCFGStructurizer::PathToKind R600MachineCFGStructurizer::singlePathTo( |
367 | MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, |
368 | bool AllowSideEntry) const { |
369 | assert(DstMBB); |
370 | if (SrcMBB == DstMBB) |
371 | return SinglePath_InPath; |
372 | while (SrcMBB && SrcMBB->succ_size() == 1) { |
373 | SrcMBB = *SrcMBB->succ_begin(); |
374 | if (SrcMBB == DstMBB) |
375 | return SinglePath_InPath; |
376 | if (!AllowSideEntry && SrcMBB->pred_size() > 1) |
377 | return Not_SinglePath; |
378 | } |
379 | if (SrcMBB && SrcMBB->succ_size()==0) |
380 | return SinglePath_NotInPath; |
381 | return Not_SinglePath; |
382 | } |
383 | |
384 | int R600MachineCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, |
385 | MBBVector::const_iterator E) const { |
386 | int Count = 0; |
387 | while (It != E) { |
388 | if (!isRetiredBlock(MBB: *It)) |
389 | ++Count; |
390 | ++It; |
391 | } |
392 | return Count; |
393 | } |
394 | |
395 | bool R600MachineCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { |
396 | unsigned BlockSizeThreshold = 30; |
397 | unsigned CloneInstrThreshold = 100; |
398 | bool MultiplePreds = MBB && (MBB->pred_size() > 1); |
399 | |
400 | if(!MultiplePreds) |
401 | return false; |
402 | unsigned BlkSize = MBB->size(); |
403 | return ((BlkSize > BlockSizeThreshold) && |
404 | (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); |
405 | } |
406 | |
407 | void R600MachineCFGStructurizer::reversePredicateSetter( |
408 | MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { |
409 | assert(I.isValid() && "Expected valid iterator" ); |
410 | for (;; --I) { |
411 | if (I == MBB.end()) |
412 | continue; |
413 | if (I->getOpcode() == R600::PRED_X) { |
414 | switch (I->getOperand(i: 2).getImm()) { |
415 | case R600::PRED_SETE_INT: |
416 | I->getOperand(i: 2).setImm(R600::PRED_SETNE_INT); |
417 | return; |
418 | case R600::PRED_SETNE_INT: |
419 | I->getOperand(i: 2).setImm(R600::PRED_SETE_INT); |
420 | return; |
421 | case R600::PRED_SETE: |
422 | I->getOperand(i: 2).setImm(R600::PRED_SETNE); |
423 | return; |
424 | case R600::PRED_SETNE: |
425 | I->getOperand(i: 2).setImm(R600::PRED_SETE); |
426 | return; |
427 | default: |
428 | llvm_unreachable("PRED_X Opcode invalid!" ); |
429 | } |
430 | } |
431 | } |
432 | } |
433 | |
434 | void R600MachineCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, |
435 | int NewOpcode, const DebugLoc &DL) { |
436 | MachineInstr *MI = |
437 | MBB->getParent()->CreateMachineInstr(MCID: TII->get(Opcode: NewOpcode), DL); |
438 | MBB->push_back(MI); |
439 | //assume the instruction doesn't take any reg operand ... |
440 | SHOWNEWINSTR(MI); |
441 | } |
442 | |
443 | MachineInstr *R600MachineCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, |
444 | int NewOpcode, |
445 | const DebugLoc &DL) { |
446 | MachineInstr *MI = |
447 | MBB->getParent()->CreateMachineInstr(MCID: TII->get(Opcode: NewOpcode), DL); |
448 | if (!MBB->empty()) |
449 | MBB->insert(I: MBB->begin(), MI); |
450 | else |
451 | MBB->push_back(MI); |
452 | SHOWNEWINSTR(MI); |
453 | return MI; |
454 | } |
455 | |
456 | MachineInstr *R600MachineCFGStructurizer::insertInstrBefore( |
457 | MachineBasicBlock::iterator I, int NewOpcode) { |
458 | MachineInstr *OldMI = &(*I); |
459 | MachineBasicBlock *MBB = OldMI->getParent(); |
460 | MachineInstr *NewMBB = |
461 | MBB->getParent()->CreateMachineInstr(MCID: TII->get(Opcode: NewOpcode), DL: DebugLoc()); |
462 | MBB->insert(I, MI: NewMBB); |
463 | //assume the instruction doesn't take any reg operand ... |
464 | SHOWNEWINSTR(NewMBB); |
465 | return NewMBB; |
466 | } |
467 | |
468 | void R600MachineCFGStructurizer::insertCondBranchBefore( |
469 | MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { |
470 | MachineInstr *OldMI = &(*I); |
471 | MachineBasicBlock *MBB = OldMI->getParent(); |
472 | MachineFunction *MF = MBB->getParent(); |
473 | MachineInstr *NewMI = MF->CreateMachineInstr(MCID: TII->get(Opcode: NewOpcode), DL); |
474 | MBB->insert(I, MI: NewMI); |
475 | MachineInstrBuilder MIB(*MF, NewMI); |
476 | MIB.addReg(RegNo: OldMI->getOperand(i: 1).getReg(), flags: false); |
477 | SHOWNEWINSTR(NewMI); |
478 | //erase later oldInstr->eraseFromParent(); |
479 | } |
480 | |
481 | void R600MachineCFGStructurizer::insertCondBranchBefore( |
482 | MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, |
483 | int RegNum, const DebugLoc &DL) { |
484 | MachineFunction *MF = blk->getParent(); |
485 | MachineInstr *NewInstr = MF->CreateMachineInstr(MCID: TII->get(Opcode: NewOpcode), DL); |
486 | //insert before |
487 | blk->insert(I, MI: NewInstr); |
488 | MachineInstrBuilder(*MF, NewInstr).addReg(RegNo: RegNum, flags: false); |
489 | SHOWNEWINSTR(NewInstr); |
490 | } |
491 | |
492 | int R600MachineCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { |
493 | switch(OldOpcode) { |
494 | case R600::JUMP_COND: |
495 | case R600::JUMP: return R600::IF_PREDICATE_SET; |
496 | case R600::BRANCH_COND_i32: |
497 | case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; |
498 | default: llvm_unreachable("internal error" ); |
499 | } |
500 | return -1; |
501 | } |
502 | |
503 | int R600MachineCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { |
504 | switch(OldOpcode) { |
505 | case R600::JUMP_COND: |
506 | case R600::JUMP: return R600::IF_PREDICATE_SET; |
507 | case R600::BRANCH_COND_i32: |
508 | case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; |
509 | default: llvm_unreachable("internal error" ); |
510 | } |
511 | return -1; |
512 | } |
513 | |
514 | int R600MachineCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { |
515 | switch(OldOpcode) { |
516 | case R600::JUMP_COND: |
517 | case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; |
518 | default: llvm_unreachable("internal error" ); |
519 | } |
520 | return -1; |
521 | } |
522 | |
523 | int R600MachineCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { |
524 | switch(OldOpcode) { |
525 | case R600::JUMP_COND: |
526 | case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; |
527 | default: llvm_unreachable("internal error" ); |
528 | } |
529 | return -1; |
530 | } |
531 | |
532 | MachineBasicBlock *R600MachineCFGStructurizer::getTrueBranch(MachineInstr *MI) { |
533 | return MI->getOperand(i: 0).getMBB(); |
534 | } |
535 | |
536 | void R600MachineCFGStructurizer::setTrueBranch(MachineInstr *MI, |
537 | MachineBasicBlock *MBB) { |
538 | MI->getOperand(i: 0).setMBB(MBB); |
539 | } |
540 | |
541 | MachineBasicBlock * |
542 | R600MachineCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, |
543 | MachineInstr *MI) { |
544 | assert(MBB->succ_size() == 2); |
545 | MachineBasicBlock *TrueBranch = getTrueBranch(MI); |
546 | MachineBasicBlock::succ_iterator It = MBB->succ_begin(); |
547 | MachineBasicBlock::succ_iterator Next = It; |
548 | ++Next; |
549 | return (*It == TrueBranch) ? *Next : *It; |
550 | } |
551 | |
552 | bool R600MachineCFGStructurizer::isCondBranch(MachineInstr *MI) { |
553 | switch (MI->getOpcode()) { |
554 | case R600::JUMP_COND: |
555 | case R600::BRANCH_COND_i32: |
556 | case R600::BRANCH_COND_f32: return true; |
557 | default: |
558 | return false; |
559 | } |
560 | return false; |
561 | } |
562 | |
563 | bool R600MachineCFGStructurizer::isUncondBranch(MachineInstr *MI) { |
564 | switch (MI->getOpcode()) { |
565 | case R600::JUMP: |
566 | case R600::BRANCH: |
567 | return true; |
568 | default: |
569 | return false; |
570 | } |
571 | return false; |
572 | } |
573 | |
574 | DebugLoc R600MachineCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { |
575 | //get DebugLoc from the first MachineBasicBlock instruction with debug info |
576 | DebugLoc DL; |
577 | for (MachineInstr &MI : *MBB) |
578 | if (MI.getDebugLoc()) |
579 | DL = MI.getDebugLoc(); |
580 | return DL; |
581 | } |
582 | |
583 | MachineInstr *R600MachineCFGStructurizer::getNormalBlockBranchInstr( |
584 | MachineBasicBlock *MBB) { |
585 | MachineBasicBlock::reverse_iterator It = MBB->rbegin(); |
586 | MachineInstr *MI = &*It; |
587 | if (MI && (isCondBranch(MI) || isUncondBranch(MI))) |
588 | return MI; |
589 | return nullptr; |
590 | } |
591 | |
592 | MachineInstr *R600MachineCFGStructurizer::getLoopendBlockBranchInstr( |
593 | MachineBasicBlock *MBB) { |
594 | for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); |
595 | It != E; ++It) { |
596 | // FIXME: Simplify |
597 | MachineInstr *MI = &*It; |
598 | if (MI) { |
599 | if (isCondBranch(MI) || isUncondBranch(MI)) |
600 | return MI; |
601 | if (!TII->isMov(Opcode: MI->getOpcode())) |
602 | break; |
603 | } |
604 | } |
605 | return nullptr; |
606 | } |
607 | |
608 | MachineInstr *R600MachineCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { |
609 | MachineBasicBlock::reverse_iterator It = MBB->rbegin(); |
610 | if (It != MBB->rend()) { |
611 | MachineInstr *instr = &(*It); |
612 | if (instr->getOpcode() == R600::RETURN) |
613 | return instr; |
614 | } |
615 | return nullptr; |
616 | } |
617 | |
618 | bool R600MachineCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { |
619 | MachineInstr *MI = getReturnInstr(MBB); |
620 | bool IsReturn = MBB->succ_empty(); |
621 | if (MI) |
622 | assert(IsReturn); |
623 | else if (IsReturn) |
624 | LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() |
625 | << " is return block without RETURN instr\n" ;); |
626 | return IsReturn; |
627 | } |
628 | |
629 | void R600MachineCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, |
630 | MachineBasicBlock *SrcMBB) { |
631 | for (MachineBasicBlock *Succ : SrcMBB->successors()) |
632 | DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of |
633 | } |
634 | |
635 | MachineBasicBlock *R600MachineCFGStructurizer::clone(MachineBasicBlock *MBB) { |
636 | MachineFunction *Func = MBB->getParent(); |
637 | MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); |
638 | Func->push_back(MBB: NewMBB); //insert to function |
639 | for (const MachineInstr &It : *MBB) |
640 | NewMBB->push_back(MI: Func->CloneMachineInstr(Orig: &It)); |
641 | return NewMBB; |
642 | } |
643 | |
644 | void R600MachineCFGStructurizer::replaceInstrUseOfBlockWith( |
645 | MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, |
646 | MachineBasicBlock *NewBlk) { |
647 | MachineInstr *BranchMI = getLoopendBlockBranchInstr(MBB: SrcMBB); |
648 | if (BranchMI && isCondBranch(MI: BranchMI) && |
649 | getTrueBranch(MI: BranchMI) == OldMBB) |
650 | setTrueBranch(MI: BranchMI, MBB: NewBlk); |
651 | } |
652 | |
653 | void R600MachineCFGStructurizer::wrapup(MachineBasicBlock *MBB) { |
654 | assert((!MBB->getParent()->getJumpTableInfo() |
655 | || MBB->getParent()->getJumpTableInfo()->isEmpty()) |
656 | && "found a jump table" ); |
657 | |
658 | //collect continue right before endloop |
659 | SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; |
660 | MachineBasicBlock::iterator Pre = MBB->begin(); |
661 | MachineBasicBlock::iterator E = MBB->end(); |
662 | MachineBasicBlock::iterator It = Pre; |
663 | while (It != E) { |
664 | if (Pre->getOpcode() == R600::CONTINUE |
665 | && It->getOpcode() == R600::ENDLOOP) |
666 | ContInstr.push_back(Elt: &*Pre); |
667 | Pre = It; |
668 | ++It; |
669 | } |
670 | |
671 | //delete continue right before endloop |
672 | for (auto *MI : ContInstr) |
673 | MI->eraseFromParent(); |
674 | |
675 | // TODO to fix up jump table so later phase won't be confused. if |
676 | // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but |
677 | // there isn't such an interface yet. alternatively, replace all the other |
678 | // blocks in the jump table with the entryBlk //} |
679 | } |
680 | |
681 | bool R600MachineCFGStructurizer::prepare() { |
682 | bool Changed = false; |
683 | |
684 | //FIXME: if not reducible flow graph, make it so ??? |
685 | |
686 | LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::prepare\n" ;); |
687 | |
688 | orderBlocks(MF: FuncRep); |
689 | |
690 | SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; |
691 | |
692 | // Add an ExitBlk to loop that don't have one |
693 | for (MachineLoop *LoopRep : *MLI) { |
694 | MBBVector ExitingMBBs; |
695 | LoopRep->getExitingBlocks(ExitingBlocks&: ExitingMBBs); |
696 | |
697 | if (ExitingMBBs.size() == 0) { |
698 | MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); |
699 | if (DummyExitBlk) |
700 | RetBlks.push_back(Elt: DummyExitBlk); |
701 | } |
702 | } |
703 | |
704 | // Remove unconditional branch instr. |
705 | // Add dummy exit block iff there are multiple returns. |
706 | for (MachineBasicBlock *MBB : OrderedBlks) { |
707 | removeUnconditionalBranch(MBB); |
708 | removeRedundantConditionalBranch(MBB); |
709 | if (isReturnBlock(MBB)) { |
710 | RetBlks.push_back(Elt: MBB); |
711 | } |
712 | assert(MBB->succ_size() <= 2); |
713 | } |
714 | |
715 | if (RetBlks.size() >= 2) { |
716 | addDummyExitBlock(RetMBB&: RetBlks); |
717 | Changed = true; |
718 | } |
719 | |
720 | return Changed; |
721 | } |
722 | |
723 | bool R600MachineCFGStructurizer::run() { |
724 | //Assume reducible CFG... |
725 | LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::run\n" ); |
726 | |
727 | #ifdef STRESSTEST |
728 | //Use the worse block ordering to test the algorithm. |
729 | ReverseVector(orderedBlks); |
730 | #endif |
731 | |
732 | LLVM_DEBUG(dbgs() << "Ordered blocks:\n" ; printOrderedBlocks();); |
733 | int NumIter = 0; |
734 | bool Finish = false; |
735 | MachineBasicBlock *MBB; |
736 | bool MakeProgress = false; |
737 | int NumRemainedBlk = countActiveBlock(It: OrderedBlks.begin(), |
738 | E: OrderedBlks.end()); |
739 | |
740 | do { |
741 | ++NumIter; |
742 | LLVM_DEBUG(dbgs() << "numIter = " << NumIter |
743 | << ", numRemaintedBlk = " << NumRemainedBlk << "\n" ;); |
744 | (void)NumIter; |
745 | |
746 | SmallVectorImpl<MachineBasicBlock *>::const_iterator It = |
747 | OrderedBlks.begin(); |
748 | SmallVectorImpl<MachineBasicBlock *>::const_iterator E = |
749 | OrderedBlks.end(); |
750 | |
751 | SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = |
752 | It; |
753 | MachineBasicBlock *SccBeginMBB = nullptr; |
754 | int SccNumBlk = 0; // The number of active blocks, init to a |
755 | // maximum possible number. |
756 | int SccNumIter; // Number of iteration in this SCC. |
757 | |
758 | while (It != E) { |
759 | MBB = *It; |
760 | |
761 | if (!SccBeginMBB) { |
762 | SccBeginIter = It; |
763 | SccBeginMBB = MBB; |
764 | SccNumIter = 0; |
765 | SccNumBlk = NumRemainedBlk; // Init to maximum possible number. |
766 | LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); |
767 | dbgs() << "\n" ;); |
768 | } |
769 | |
770 | if (!isRetiredBlock(MBB)) |
771 | patternMatch(MBB); |
772 | |
773 | ++It; |
774 | |
775 | bool ContNextScc = true; |
776 | if (It == E |
777 | || getSCCNum(MBB: SccBeginMBB) != getSCCNum(MBB: *It)) { |
778 | // Just finish one scc. |
779 | ++SccNumIter; |
780 | int sccRemainedNumBlk = countActiveBlock(It: SccBeginIter, E: It); |
781 | if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { |
782 | LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) |
783 | << ", sccNumIter = " << SccNumIter; |
784 | dbgs() << "doesn't make any progress\n" ;); |
785 | (void)SccNumIter; |
786 | ContNextScc = true; |
787 | } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { |
788 | SccNumBlk = sccRemainedNumBlk; |
789 | It = SccBeginIter; |
790 | ContNextScc = false; |
791 | LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) |
792 | << "sccNumIter = " << SccNumIter << '\n';); |
793 | } else { |
794 | // Finish the current scc. |
795 | ContNextScc = true; |
796 | } |
797 | } else { |
798 | // Continue on next component in the current scc. |
799 | ContNextScc = false; |
800 | } |
801 | |
802 | if (ContNextScc) |
803 | SccBeginMBB = nullptr; |
804 | } //while, "one iteration" over the function. |
805 | |
806 | MachineBasicBlock *EntryMBB = |
807 | *GraphTraits<MachineFunction *>::nodes_begin(F: FuncRep); |
808 | if (EntryMBB->succ_empty()) { |
809 | Finish = true; |
810 | LLVM_DEBUG(dbgs() << "Reduce to one block\n" ;); |
811 | } else { |
812 | int NewnumRemainedBlk |
813 | = countActiveBlock(It: OrderedBlks.begin(), E: OrderedBlks.end()); |
814 | // consider cloned blocks ?? |
815 | if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { |
816 | MakeProgress = true; |
817 | NumRemainedBlk = NewnumRemainedBlk; |
818 | } else { |
819 | MakeProgress = false; |
820 | LLVM_DEBUG(dbgs() << "No progress\n" ;); |
821 | } |
822 | } |
823 | } while (!Finish && MakeProgress); |
824 | |
825 | // Misc wrap up to maintain the consistency of the Function representation. |
826 | wrapup(MBB: *GraphTraits<MachineFunction *>::nodes_begin(F: FuncRep)); |
827 | |
828 | // Detach retired Block, release memory. |
829 | for (auto &It : BlockInfoMap) { |
830 | if (It.second && It.second->IsRetired) { |
831 | assert((It.first)->getNumber() != -1); |
832 | LLVM_DEBUG(dbgs() << "Erase BB" << (It.first)->getNumber() << "\n" ;); |
833 | It.first->eraseFromParent(); // Remove from the parent Function. |
834 | } |
835 | delete It.second; |
836 | } |
837 | BlockInfoMap.clear(); |
838 | LLInfoMap.clear(); |
839 | |
840 | if (!Finish) { |
841 | LLVM_DEBUG(FuncRep->viewCFG()); |
842 | report_fatal_error(reason: "IRREDUCIBLE_CFG" ); |
843 | } |
844 | |
845 | return true; |
846 | } |
847 | |
848 | void R600MachineCFGStructurizer::orderBlocks(MachineFunction *MF) { |
849 | int SccNum = 0; |
850 | for (scc_iterator<MachineFunction *> It = scc_begin(G: MF); !It.isAtEnd(); |
851 | ++It, ++SccNum) { |
852 | const std::vector<MachineBasicBlock *> &SccNext = *It; |
853 | for (MachineBasicBlock *MBB : SccNext) { |
854 | OrderedBlks.push_back(Elt: MBB); |
855 | recordSccnum(MBB, SCCNum: SccNum); |
856 | } |
857 | } |
858 | |
859 | // walk through all the block in func to check for unreachable |
860 | for (auto *MBB : nodes(G: MF)) { |
861 | SccNum = getSCCNum(MBB); |
862 | if (SccNum == INVALIDSCCNUM) |
863 | dbgs() << "unreachable block BB" << MBB->getNumber() << "\n" ; |
864 | } |
865 | } |
866 | |
867 | int R600MachineCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { |
868 | int NumMatch = 0; |
869 | int CurMatch; |
870 | |
871 | LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n" ;); |
872 | |
873 | while ((CurMatch = patternMatchGroup(MBB)) > 0) |
874 | NumMatch += CurMatch; |
875 | |
876 | LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() |
877 | << ", numMatch = " << NumMatch << "\n" ;); |
878 | |
879 | return NumMatch; |
880 | } |
881 | |
882 | int R600MachineCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { |
883 | int NumMatch = 0; |
884 | NumMatch += loopendPatternMatch(); |
885 | NumMatch += serialPatternMatch(MBB); |
886 | NumMatch += ifPatternMatch(MBB); |
887 | return NumMatch; |
888 | } |
889 | |
890 | int R600MachineCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { |
891 | if (MBB->succ_size() != 1) |
892 | return 0; |
893 | |
894 | MachineBasicBlock *childBlk = *MBB->succ_begin(); |
895 | if (childBlk->pred_size() != 1 || isActiveLoophead(MBB: childBlk)) |
896 | return 0; |
897 | |
898 | mergeSerialBlock(DstMBB: MBB, SrcMBB: childBlk); |
899 | ++numSerialPatternMatch; |
900 | return 1; |
901 | } |
902 | |
903 | int R600MachineCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { |
904 | //two edges |
905 | if (MBB->succ_size() != 2) |
906 | return 0; |
907 | if (hasBackEdge(MBB)) |
908 | return 0; |
909 | MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); |
910 | if (!BranchMI) |
911 | return 0; |
912 | |
913 | assert(isCondBranch(BranchMI)); |
914 | int NumMatch = 0; |
915 | |
916 | MachineBasicBlock *TrueMBB = getTrueBranch(MI: BranchMI); |
917 | NumMatch += serialPatternMatch(MBB: TrueMBB); |
918 | NumMatch += ifPatternMatch(MBB: TrueMBB); |
919 | MachineBasicBlock *FalseMBB = getFalseBranch(MBB, MI: BranchMI); |
920 | NumMatch += serialPatternMatch(MBB: FalseMBB); |
921 | NumMatch += ifPatternMatch(MBB: FalseMBB); |
922 | MachineBasicBlock *LandBlk; |
923 | int Cloned = 0; |
924 | |
925 | assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); |
926 | // TODO: Simplify |
927 | if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 |
928 | && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { |
929 | // Diamond pattern |
930 | LandBlk = *TrueMBB->succ_begin(); |
931 | } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { |
932 | // Triangle pattern, false is empty |
933 | LandBlk = FalseMBB; |
934 | FalseMBB = nullptr; |
935 | } else if (FalseMBB->succ_size() == 1 |
936 | && *FalseMBB->succ_begin() == TrueMBB) { |
937 | // Triangle pattern, true is empty |
938 | // We reverse the predicate to make a triangle, empty false pattern; |
939 | std::swap(a&: TrueMBB, b&: FalseMBB); |
940 | reversePredicateSetter(I: MBB->end(), MBB&: *MBB); |
941 | LandBlk = FalseMBB; |
942 | FalseMBB = nullptr; |
943 | } else if (FalseMBB->succ_size() == 1 |
944 | && isSameloopDetachedContbreak(Src1MBB: TrueMBB, Src2MBB: FalseMBB)) { |
945 | LandBlk = *FalseMBB->succ_begin(); |
946 | } else if (TrueMBB->succ_size() == 1 |
947 | && isSameloopDetachedContbreak(Src1MBB: FalseMBB, Src2MBB: TrueMBB)) { |
948 | LandBlk = *TrueMBB->succ_begin(); |
949 | } else { |
950 | return NumMatch + handleJumpintoIf(HeadMBB: MBB, TrueMBB, FalseMBB); |
951 | } |
952 | |
953 | // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the |
954 | // new BB created for landBlk==NULL may introduce new challenge to the |
955 | // reduction process. |
956 | if (LandBlk && |
957 | ((TrueMBB && TrueMBB->pred_size() > 1) |
958 | || (FalseMBB && FalseMBB->pred_size() > 1))) { |
959 | Cloned += improveSimpleJumpintoIf(HeadMBB: MBB, TrueMBB, FalseMBB, LandMBBPtr: &LandBlk); |
960 | } |
961 | |
962 | if (TrueMBB && TrueMBB->pred_size() > 1) { |
963 | TrueMBB = cloneBlockForPredecessor(MBB: TrueMBB, PredMBB: MBB); |
964 | ++Cloned; |
965 | } |
966 | |
967 | if (FalseMBB && FalseMBB->pred_size() > 1) { |
968 | FalseMBB = cloneBlockForPredecessor(MBB: FalseMBB, PredMBB: MBB); |
969 | ++Cloned; |
970 | } |
971 | |
972 | mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandMBB: LandBlk); |
973 | |
974 | ++numIfPatternMatch; |
975 | |
976 | numClonedBlock += Cloned; |
977 | |
978 | return 1 + Cloned + NumMatch; |
979 | } |
980 | |
981 | int R600MachineCFGStructurizer::loopendPatternMatch() { |
982 | std::deque<MachineLoop *> NestedLoops; |
983 | for (auto &It: *MLI) |
984 | for (MachineLoop *ML : depth_first(G: It)) |
985 | NestedLoops.push_front(x: ML); |
986 | |
987 | if (NestedLoops.empty()) |
988 | return 0; |
989 | |
990 | // Process nested loop outside->inside (we did push_front), |
991 | // so "continue" to a outside loop won't be mistaken as "break" |
992 | // of the current loop. |
993 | int Num = 0; |
994 | for (MachineLoop *ExaminedLoop : NestedLoops) { |
995 | if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) |
996 | continue; |
997 | LLVM_DEBUG(dbgs() << "Processing:\n" ; ExaminedLoop->dump();); |
998 | int NumBreak = mergeLoop(LoopRep: ExaminedLoop); |
999 | if (NumBreak == -1) |
1000 | break; |
1001 | Num += NumBreak; |
1002 | } |
1003 | return Num; |
1004 | } |
1005 | |
1006 | int R600MachineCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { |
1007 | MachineBasicBlock * = LoopRep->getHeader(); |
1008 | MBBVector ExitingMBBs; |
1009 | LoopRep->getExitingBlocks(ExitingBlocks&: ExitingMBBs); |
1010 | assert(!ExitingMBBs.empty() && "Infinite Loop not supported" ); |
1011 | LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() |
1012 | << " exiting blocks\n" ;); |
1013 | // We assume a single ExitBlk |
1014 | MBBVector ExitBlks; |
1015 | LoopRep->getExitBlocks(ExitBlocks&: ExitBlks); |
1016 | SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; |
1017 | for (MachineBasicBlock *MBB : ExitBlks) |
1018 | ExitBlkSet.insert(Ptr: MBB); |
1019 | assert(ExitBlkSet.size() == 1); |
1020 | MachineBasicBlock *ExitBlk = *ExitBlks.begin(); |
1021 | assert(ExitBlk && "Loop has several exit block" ); |
1022 | MBBVector LatchBlks; |
1023 | for (auto *LB : inverse_children<MachineBasicBlock*>(G: LoopHeader)) |
1024 | if (LoopRep->contains(BB: LB)) |
1025 | LatchBlks.push_back(Elt: LB); |
1026 | |
1027 | for (MachineBasicBlock *MBB : ExitingMBBs) |
1028 | mergeLoopbreakBlock(ExitingMBB: MBB, LandMBB: ExitBlk); |
1029 | for (MachineBasicBlock *MBB : LatchBlks) |
1030 | settleLoopcontBlock(ContingMBB: MBB, ContMBB: LoopHeader); |
1031 | int Match = 0; |
1032 | do { |
1033 | Match = 0; |
1034 | Match += serialPatternMatch(MBB: LoopHeader); |
1035 | Match += ifPatternMatch(MBB: LoopHeader); |
1036 | } while (Match > 0); |
1037 | mergeLooplandBlock(DstMBB: LoopHeader, LandMBB: ExitBlk); |
1038 | MachineLoop *ParentLoop = LoopRep->getParentLoop(); |
1039 | if (ParentLoop) |
1040 | MLI->changeLoopFor(BB: LoopHeader, L: ParentLoop); |
1041 | else |
1042 | MLI->removeBlock(BB: LoopHeader); |
1043 | Visited[LoopRep] = true; |
1044 | return 1; |
1045 | } |
1046 | |
1047 | bool R600MachineCFGStructurizer::isSameloopDetachedContbreak( |
1048 | MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { |
1049 | if (Src1MBB->succ_empty()) { |
1050 | MachineLoop *LoopRep = MLI->getLoopFor(BB: Src1MBB); |
1051 | if (LoopRep&& LoopRep == MLI->getLoopFor(BB: Src2MBB)) { |
1052 | MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; |
1053 | if (TheEntry) { |
1054 | LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" |
1055 | << Src1MBB->getNumber() << " src2 = BB" |
1056 | << Src2MBB->getNumber() << "\n" ;); |
1057 | return true; |
1058 | } |
1059 | } |
1060 | } |
1061 | return false; |
1062 | } |
1063 | |
1064 | int R600MachineCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, |
1065 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { |
1066 | int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); |
1067 | if (Num == 0) { |
1068 | LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" |
1069 | << "\n" ;); |
1070 | Num = handleJumpintoIfImp(HeadMBB, TrueMBB: FalseMBB, FalseMBB: TrueMBB); |
1071 | } |
1072 | return Num; |
1073 | } |
1074 | |
1075 | int R600MachineCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, |
1076 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { |
1077 | int Num = 0; |
1078 | MachineBasicBlock *DownBlk; |
1079 | |
1080 | //trueBlk could be the common post dominator |
1081 | DownBlk = TrueMBB; |
1082 | |
1083 | LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() |
1084 | << " true = BB" << TrueMBB->getNumber() |
1085 | << ", numSucc=" << TrueMBB->succ_size() << " false = BB" |
1086 | << FalseMBB->getNumber() << "\n" ;); |
1087 | |
1088 | while (DownBlk) { |
1089 | LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); |
1090 | |
1091 | if (singlePathTo(SrcMBB: FalseMBB, DstMBB: DownBlk) == SinglePath_InPath) { |
1092 | LLVM_DEBUG(dbgs() << " working\n" ;); |
1093 | |
1094 | Num += cloneOnSideEntryTo(PreMBB: HeadMBB, SrcMBB: TrueMBB, DstMBB: DownBlk); |
1095 | Num += cloneOnSideEntryTo(PreMBB: HeadMBB, SrcMBB: FalseMBB, DstMBB: DownBlk); |
1096 | |
1097 | numClonedBlock += Num; |
1098 | Num += serialPatternMatch(MBB: *HeadMBB->succ_begin()); |
1099 | Num += serialPatternMatch(MBB: *std::next(x: HeadMBB->succ_begin())); |
1100 | Num += ifPatternMatch(MBB: HeadMBB); |
1101 | assert(Num > 0); |
1102 | |
1103 | break; |
1104 | } |
1105 | LLVM_DEBUG(dbgs() << " not working\n" ;); |
1106 | DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; |
1107 | } // walk down the postDomTree |
1108 | |
1109 | return Num; |
1110 | } |
1111 | |
1112 | #ifndef NDEBUG |
1113 | void R600MachineCFGStructurizer::showImproveSimpleJumpintoIf( |
1114 | MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, |
1115 | MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { |
1116 | dbgs() << "head = BB" << HeadMBB->getNumber() |
1117 | << " size = " << HeadMBB->size(); |
1118 | if (Detail) { |
1119 | dbgs() << "\n" ; |
1120 | HeadMBB->print(dbgs()); |
1121 | dbgs() << "\n" ; |
1122 | } |
1123 | |
1124 | if (TrueMBB) { |
1125 | dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " |
1126 | << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); |
1127 | if (Detail) { |
1128 | dbgs() << "\n" ; |
1129 | TrueMBB->print(dbgs()); |
1130 | dbgs() << "\n" ; |
1131 | } |
1132 | } |
1133 | if (FalseMBB) { |
1134 | dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " |
1135 | << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); |
1136 | if (Detail) { |
1137 | dbgs() << "\n" ; |
1138 | FalseMBB->print(dbgs()); |
1139 | dbgs() << "\n" ; |
1140 | } |
1141 | } |
1142 | if (LandMBB) { |
1143 | dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " |
1144 | << LandMBB->size() << " numPred = " << LandMBB->pred_size(); |
1145 | if (Detail) { |
1146 | dbgs() << "\n" ; |
1147 | LandMBB->print(dbgs()); |
1148 | dbgs() << "\n" ; |
1149 | } |
1150 | } |
1151 | |
1152 | dbgs() << "\n" ; |
1153 | } |
1154 | #endif |
1155 | |
1156 | int R600MachineCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, |
1157 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, |
1158 | MachineBasicBlock **LandMBBPtr) { |
1159 | bool MigrateTrue = false; |
1160 | bool MigrateFalse = false; |
1161 | |
1162 | MachineBasicBlock *LandBlk = *LandMBBPtr; |
1163 | |
1164 | assert((!TrueMBB || TrueMBB->succ_size() <= 1) |
1165 | && (!FalseMBB || FalseMBB->succ_size() <= 1)); |
1166 | |
1167 | if (TrueMBB == FalseMBB) |
1168 | return 0; |
1169 | |
1170 | MigrateTrue = needMigrateBlock(MBB: TrueMBB); |
1171 | MigrateFalse = needMigrateBlock(MBB: FalseMBB); |
1172 | |
1173 | if (!MigrateTrue && !MigrateFalse) |
1174 | return 0; |
1175 | |
1176 | // If we need to migrate either trueBlk and falseBlk, migrate the rest that |
1177 | // have more than one predecessors. without doing this, its predecessor |
1178 | // rather than headBlk will have undefined value in initReg. |
1179 | if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) |
1180 | MigrateTrue = true; |
1181 | if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) |
1182 | MigrateFalse = true; |
1183 | |
1184 | LLVM_DEBUG( |
1185 | dbgs() << "before improveSimpleJumpintoIf: " ; |
1186 | showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); |
1187 | |
1188 | // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk |
1189 | // |
1190 | // new: headBlk => if () {initReg = 1; org trueBlk branch} else |
1191 | // {initReg = 0; org falseBlk branch } |
1192 | // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} |
1193 | // => org landBlk |
1194 | // if landBlk->pred_size() > 2, put the about if-else inside |
1195 | // if (initReg !=2) {...} |
1196 | // |
1197 | // add initReg = initVal to headBlk |
1198 | |
1199 | const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(VT: MVT::i32); |
1200 | if (!MigrateTrue || !MigrateFalse) { |
1201 | // XXX: We have an opportunity here to optimize the "branch into if" case |
1202 | // here. Branch into if looks like this: |
1203 | // entry |
1204 | // / | |
1205 | // diamond_head branch_from |
1206 | // / \ | |
1207 | // diamond_false diamond_true |
1208 | // \ / |
1209 | // done |
1210 | // |
1211 | // The diamond_head block begins the "if" and the diamond_true block |
1212 | // is the block being "branched into". |
1213 | // |
1214 | // If MigrateTrue is true, then TrueBB is the block being "branched into" |
1215 | // and if MigrateFalse is true, then FalseBB is the block being |
1216 | // "branched into" |
1217 | // |
1218 | // Here is the pseudo code for how I think the optimization should work: |
1219 | // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. |
1220 | // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. |
1221 | // 3. Move the branch instruction from diamond_head into its own basic |
1222 | // block (new_block). |
1223 | // 4. Add an unconditional branch from diamond_head to new_block |
1224 | // 5. Replace the branch instruction in branch_from with an unconditional |
1225 | // branch to new_block. If branch_from has multiple predecessors, then |
1226 | // we need to replace the True/False block in the branch |
1227 | // instruction instead of replacing it. |
1228 | // 6. Change the condition of the branch instruction in new_block from |
1229 | // COND to (COND || GPR0) |
1230 | // |
1231 | // In order insert these MOV instruction, we will need to use the |
1232 | // RegisterScavenger. Usually liveness stops being tracked during |
1233 | // the late machine optimization passes, however if we implement |
1234 | // bool TargetRegisterInfo::requiresRegisterScavenging( |
1235 | // const MachineFunction &MF) |
1236 | // and have it return true, liveness will be tracked correctly |
1237 | // by generic optimization passes. We will also need to make sure that |
1238 | // all of our target-specific passes that run after regalloc and before |
1239 | // the CFGStructurizer track liveness and we will need to modify this pass |
1240 | // to correctly track liveness. |
1241 | // |
1242 | // After the above changes, the new CFG should look like this: |
1243 | // entry |
1244 | // / | |
1245 | // diamond_head branch_from |
1246 | // \ / |
1247 | // new_block |
1248 | // / | |
1249 | // diamond_false diamond_true |
1250 | // \ / |
1251 | // done |
1252 | // |
1253 | // Without this optimization, we are forced to duplicate the diamond_true |
1254 | // block and we will end up with a CFG like this: |
1255 | // |
1256 | // entry |
1257 | // / | |
1258 | // diamond_head branch_from |
1259 | // / \ | |
1260 | // diamond_false diamond_true diamond_true (duplicate) |
1261 | // \ / | |
1262 | // done --------------------| |
1263 | // |
1264 | // Duplicating diamond_true can be very costly especially if it has a |
1265 | // lot of instructions. |
1266 | return 0; |
1267 | } |
1268 | |
1269 | int NumNewBlk = 0; |
1270 | |
1271 | bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); |
1272 | |
1273 | //insert R600::ENDIF to avoid special case "input landBlk == NULL" |
1274 | MachineBasicBlock::iterator I = insertInstrBefore(MBB: LandBlk, NewOpcode: R600::ENDIF); |
1275 | |
1276 | if (LandBlkHasOtherPred) { |
1277 | report_fatal_error(reason: "Extra register needed to handle CFG" ); |
1278 | Register CmpResReg = |
1279 | HeadMBB->getParent()->getRegInfo().createVirtualRegister(RegClass: I32RC); |
1280 | report_fatal_error(reason: "Extra compare instruction needed to handle CFG" ); |
1281 | insertCondBranchBefore(blk: LandBlk, I, NewOpcode: R600::IF_PREDICATE_SET, |
1282 | RegNum: CmpResReg, DL: DebugLoc()); |
1283 | } |
1284 | |
1285 | // XXX: We are running this after RA, so creating virtual registers will |
1286 | // cause an assertion failure in the PostRA scheduling pass. |
1287 | Register InitReg = |
1288 | HeadMBB->getParent()->getRegInfo().createVirtualRegister(RegClass: I32RC); |
1289 | insertCondBranchBefore(blk: LandBlk, I, NewOpcode: R600::IF_PREDICATE_SET, RegNum: InitReg, |
1290 | DL: DebugLoc()); |
1291 | |
1292 | if (MigrateTrue) { |
1293 | migrateInstruction(SrcMBB: TrueMBB, DstMBB: LandBlk, I); |
1294 | // need to uncondionally insert the assignment to ensure a path from its |
1295 | // predecessor rather than headBlk has valid value in initReg if |
1296 | // (initVal != 1). |
1297 | report_fatal_error(reason: "Extra register needed to handle CFG" ); |
1298 | } |
1299 | insertInstrBefore(I, NewOpcode: R600::ELSE); |
1300 | |
1301 | if (MigrateFalse) { |
1302 | migrateInstruction(SrcMBB: FalseMBB, DstMBB: LandBlk, I); |
1303 | // need to uncondionally insert the assignment to ensure a path from its |
1304 | // predecessor rather than headBlk has valid value in initReg if |
1305 | // (initVal != 0) |
1306 | report_fatal_error(reason: "Extra register needed to handle CFG" ); |
1307 | } |
1308 | |
1309 | if (LandBlkHasOtherPred) { |
1310 | // add endif |
1311 | insertInstrBefore(I, NewOpcode: R600::ENDIF); |
1312 | |
1313 | // put initReg = 2 to other predecessors of landBlk |
1314 | for (MachineBasicBlock *MBB : LandBlk->predecessors()) |
1315 | if (MBB != TrueMBB && MBB != FalseMBB) |
1316 | report_fatal_error(reason: "Extra register needed to handle CFG" ); |
1317 | } |
1318 | LLVM_DEBUG( |
1319 | dbgs() << "result from improveSimpleJumpintoIf: " ; |
1320 | showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); |
1321 | |
1322 | // update landBlk |
1323 | *LandMBBPtr = LandBlk; |
1324 | |
1325 | return NumNewBlk; |
1326 | } |
1327 | |
1328 | void R600MachineCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, |
1329 | MachineBasicBlock *SrcMBB) { |
1330 | LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" |
1331 | << SrcMBB->getNumber() << "\n" ;); |
1332 | DstMBB->splice(Where: DstMBB->end(), Other: SrcMBB, From: SrcMBB->begin(), To: SrcMBB->end()); |
1333 | |
1334 | DstMBB->removeSuccessor(Succ: SrcMBB, NormalizeSuccProbs: true); |
1335 | cloneSuccessorList(DstMBB, SrcMBB); |
1336 | |
1337 | removeSuccessor(MBB: SrcMBB); |
1338 | MLI->removeBlock(BB: SrcMBB); |
1339 | retireBlock(MBB: SrcMBB); |
1340 | } |
1341 | |
1342 | void R600MachineCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, |
1343 | MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, |
1344 | MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { |
1345 | assert (TrueMBB); |
1346 | LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ " ; |
1347 | if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() |
1348 | << " } else " ; |
1349 | dbgs() << "{ " ; if (FalseMBB) { |
1350 | dbgs() << "BB" << FalseMBB->getNumber(); |
1351 | } dbgs() << " }\n " ; |
1352 | dbgs() << "landBlock: " ; if (!LandMBB) { dbgs() << "NULL" ; } else { |
1353 | dbgs() << "BB" << LandMBB->getNumber(); |
1354 | } dbgs() << "\n" ;); |
1355 | |
1356 | int OldOpcode = BranchMI->getOpcode(); |
1357 | DebugLoc BranchDL = BranchMI->getDebugLoc(); |
1358 | |
1359 | // transform to |
1360 | // if cond |
1361 | // trueBlk |
1362 | // else |
1363 | // falseBlk |
1364 | // endif |
1365 | // landBlk |
1366 | |
1367 | MachineBasicBlock::iterator I = BranchMI; |
1368 | insertCondBranchBefore(I, NewOpcode: getBranchNzeroOpcode(OldOpcode), |
1369 | DL: BranchDL); |
1370 | |
1371 | if (TrueMBB) { |
1372 | MBB->splice(Where: I, Other: TrueMBB, From: TrueMBB->begin(), To: TrueMBB->end()); |
1373 | MBB->removeSuccessor(Succ: TrueMBB, NormalizeSuccProbs: true); |
1374 | if (LandMBB && TrueMBB->succ_size()!=0) |
1375 | TrueMBB->removeSuccessor(Succ: LandMBB, NormalizeSuccProbs: true); |
1376 | retireBlock(MBB: TrueMBB); |
1377 | MLI->removeBlock(BB: TrueMBB); |
1378 | } |
1379 | |
1380 | if (FalseMBB) { |
1381 | insertInstrBefore(I, NewOpcode: R600::ELSE); |
1382 | MBB->splice(Where: I, Other: FalseMBB, From: FalseMBB->begin(), |
1383 | To: FalseMBB->end()); |
1384 | MBB->removeSuccessor(Succ: FalseMBB, NormalizeSuccProbs: true); |
1385 | if (LandMBB && !FalseMBB->succ_empty()) |
1386 | FalseMBB->removeSuccessor(Succ: LandMBB, NormalizeSuccProbs: true); |
1387 | retireBlock(MBB: FalseMBB); |
1388 | MLI->removeBlock(BB: FalseMBB); |
1389 | } |
1390 | insertInstrBefore(I, NewOpcode: R600::ENDIF); |
1391 | |
1392 | BranchMI->eraseFromParent(); |
1393 | |
1394 | if (LandMBB && TrueMBB && FalseMBB) |
1395 | MBB->addSuccessor(Succ: LandMBB); |
1396 | } |
1397 | |
1398 | void R600MachineCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, |
1399 | MachineBasicBlock *LandMBB) { |
1400 | LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() |
1401 | << " land = BB" << LandMBB->getNumber() << "\n" ;); |
1402 | |
1403 | insertInstrBefore(MBB: DstBlk, NewOpcode: R600::WHILELOOP, DL: DebugLoc()); |
1404 | insertInstrEnd(MBB: DstBlk, NewOpcode: R600::ENDLOOP, DL: DebugLoc()); |
1405 | DstBlk->replaceSuccessor(Old: DstBlk, New: LandMBB); |
1406 | } |
1407 | |
1408 | void R600MachineCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, |
1409 | MachineBasicBlock *LandMBB) { |
1410 | LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" |
1411 | << ExitingMBB->getNumber() << " land = BB" |
1412 | << LandMBB->getNumber() << "\n" ;); |
1413 | MachineInstr *BranchMI = getLoopendBlockBranchInstr(MBB: ExitingMBB); |
1414 | assert(BranchMI && isCondBranch(BranchMI)); |
1415 | DebugLoc DL = BranchMI->getDebugLoc(); |
1416 | MachineBasicBlock *TrueBranch = getTrueBranch(MI: BranchMI); |
1417 | MachineBasicBlock::iterator I = BranchMI; |
1418 | if (TrueBranch != LandMBB) |
1419 | reversePredicateSetter(I, MBB&: *I->getParent()); |
1420 | insertCondBranchBefore(blk: ExitingMBB, I, NewOpcode: R600::IF_PREDICATE_SET, RegNum: R600::PREDICATE_BIT, DL); |
1421 | insertInstrBefore(I, NewOpcode: R600::BREAK); |
1422 | insertInstrBefore(I, NewOpcode: R600::ENDIF); |
1423 | //now branchInst can be erase safely |
1424 | BranchMI->eraseFromParent(); |
1425 | //now take care of successors, retire blocks |
1426 | ExitingMBB->removeSuccessor(Succ: LandMBB, NormalizeSuccProbs: true); |
1427 | } |
1428 | |
1429 | void R600MachineCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, |
1430 | MachineBasicBlock *ContMBB) { |
1431 | LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" |
1432 | << ContingMBB->getNumber() << ", cont = BB" |
1433 | << ContMBB->getNumber() << "\n" ;); |
1434 | |
1435 | MachineInstr *MI = getLoopendBlockBranchInstr(MBB: ContingMBB); |
1436 | if (MI) { |
1437 | assert(isCondBranch(MI)); |
1438 | MachineBasicBlock::iterator I = MI; |
1439 | MachineBasicBlock *TrueBranch = getTrueBranch(MI); |
1440 | int OldOpcode = MI->getOpcode(); |
1441 | DebugLoc DL = MI->getDebugLoc(); |
1442 | |
1443 | bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); |
1444 | |
1445 | if (!UseContinueLogical) { |
1446 | int BranchOpcode = |
1447 | TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : |
1448 | getBranchZeroOpcode(OldOpcode); |
1449 | insertCondBranchBefore(I, NewOpcode: BranchOpcode, DL); |
1450 | // insertEnd to ensure phi-moves, if exist, go before the continue-instr. |
1451 | insertInstrEnd(MBB: ContingMBB, NewOpcode: R600::CONTINUE, DL); |
1452 | insertInstrEnd(MBB: ContingMBB, NewOpcode: R600::ENDIF, DL); |
1453 | } else { |
1454 | int BranchOpcode = |
1455 | TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : |
1456 | getContinueZeroOpcode(OldOpcode); |
1457 | insertCondBranchBefore(I, NewOpcode: BranchOpcode, DL); |
1458 | } |
1459 | |
1460 | MI->eraseFromParent(); |
1461 | } else { |
1462 | // if we've arrived here then we've already erased the branch instruction |
1463 | // travel back up the basic block to see the last reference of our debug |
1464 | // location we've just inserted that reference here so it should be |
1465 | // representative insertEnd to ensure phi-moves, if exist, go before the |
1466 | // continue-instr. |
1467 | insertInstrEnd(MBB: ContingMBB, NewOpcode: R600::CONTINUE, |
1468 | DL: getLastDebugLocInBB(MBB: ContingMBB)); |
1469 | } |
1470 | } |
1471 | |
1472 | int R600MachineCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, |
1473 | MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { |
1474 | int Cloned = 0; |
1475 | assert(PreMBB->isSuccessor(SrcMBB)); |
1476 | while (SrcMBB && SrcMBB != DstMBB) { |
1477 | assert(SrcMBB->succ_size() == 1); |
1478 | if (SrcMBB->pred_size() > 1) { |
1479 | SrcMBB = cloneBlockForPredecessor(MBB: SrcMBB, PredMBB: PreMBB); |
1480 | ++Cloned; |
1481 | } |
1482 | |
1483 | PreMBB = SrcMBB; |
1484 | SrcMBB = *SrcMBB->succ_begin(); |
1485 | } |
1486 | |
1487 | return Cloned; |
1488 | } |
1489 | |
1490 | MachineBasicBlock * |
1491 | R600MachineCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, |
1492 | MachineBasicBlock *PredMBB) { |
1493 | assert(PredMBB->isSuccessor(MBB) && "succBlk is not a predecessor of curBlk" ); |
1494 | |
1495 | MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions |
1496 | replaceInstrUseOfBlockWith(SrcMBB: PredMBB, OldMBB: MBB, NewBlk: CloneMBB); |
1497 | //srcBlk, oldBlk, newBlk |
1498 | |
1499 | PredMBB->replaceSuccessor(Old: MBB, New: CloneMBB); |
1500 | |
1501 | // add all successor to cloneBlk |
1502 | cloneSuccessorList(DstMBB: CloneMBB, SrcMBB: MBB); |
1503 | |
1504 | numClonedInstr += MBB->size(); |
1505 | |
1506 | LLVM_DEBUG(dbgs() << "Cloned block: " |
1507 | << "BB" << MBB->getNumber() << "size " << MBB->size() |
1508 | << "\n" ;); |
1509 | |
1510 | SHOWNEWBLK(CloneMBB, "result of Cloned block: " ); |
1511 | |
1512 | return CloneMBB; |
1513 | } |
1514 | |
1515 | void R600MachineCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, |
1516 | MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { |
1517 | MachineBasicBlock::iterator SpliceEnd; |
1518 | //look for the input branchinstr, not the AMDGPU branchinstr |
1519 | MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB: SrcMBB); |
1520 | if (!BranchMI) { |
1521 | LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n" ;); |
1522 | SpliceEnd = SrcMBB->end(); |
1523 | } else { |
1524 | LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); |
1525 | SpliceEnd = BranchMI; |
1526 | } |
1527 | LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " |
1528 | << DstMBB->size() << "srcSize = " << SrcMBB->size() |
1529 | << "\n" ;); |
1530 | |
1531 | //splice insert before insertPos |
1532 | DstMBB->splice(Where: I, Other: SrcMBB, From: SrcMBB->begin(), To: SpliceEnd); |
1533 | |
1534 | LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " |
1535 | << DstMBB->size() << "srcSize = " << SrcMBB->size() |
1536 | << '\n';); |
1537 | } |
1538 | |
1539 | MachineBasicBlock * |
1540 | R600MachineCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { |
1541 | MachineBasicBlock * = LoopRep->getHeader(); |
1542 | MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); |
1543 | |
1544 | if (!LoopHeader || !LoopLatch) |
1545 | return nullptr; |
1546 | MachineInstr *BranchMI = getLoopendBlockBranchInstr(MBB: LoopLatch); |
1547 | // Is LoopRep an infinite loop ? |
1548 | if (!BranchMI || !isUncondBranch(MI: BranchMI)) |
1549 | return nullptr; |
1550 | |
1551 | MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); |
1552 | FuncRep->push_back(MBB: DummyExitBlk); //insert to function |
1553 | SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: " ); |
1554 | LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n" ;); |
1555 | LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); |
1556 | Ctx.emitError(ErrorStr: "Extra register needed to handle CFG" ); |
1557 | return nullptr; |
1558 | } |
1559 | |
1560 | void R600MachineCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { |
1561 | MachineInstr *BranchMI; |
1562 | |
1563 | // I saw two unconditional branch in one basic block in example |
1564 | // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. |
1565 | while ((BranchMI = getLoopendBlockBranchInstr(MBB)) |
1566 | && isUncondBranch(MI: BranchMI)) { |
1567 | LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); |
1568 | BranchMI->eraseFromParent(); |
1569 | } |
1570 | } |
1571 | |
1572 | void R600MachineCFGStructurizer::removeRedundantConditionalBranch( |
1573 | MachineBasicBlock *MBB) { |
1574 | if (MBB->succ_size() != 2) |
1575 | return; |
1576 | MachineBasicBlock *MBB1 = *MBB->succ_begin(); |
1577 | MachineBasicBlock *MBB2 = *std::next(x: MBB->succ_begin()); |
1578 | if (MBB1 != MBB2) |
1579 | return; |
1580 | |
1581 | MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); |
1582 | assert(BranchMI && isCondBranch(BranchMI)); |
1583 | LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); |
1584 | BranchMI->eraseFromParent(); |
1585 | SHOWNEWBLK(MBB1, "Removing redundant successor" ); |
1586 | MBB->removeSuccessor(Succ: MBB1, NormalizeSuccProbs: true); |
1587 | } |
1588 | |
1589 | void R600MachineCFGStructurizer::addDummyExitBlock( |
1590 | SmallVectorImpl<MachineBasicBlock*> &RetMBB) { |
1591 | MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); |
1592 | FuncRep->push_back(MBB: DummyExitBlk); //insert to function |
1593 | insertInstrEnd(MBB: DummyExitBlk, NewOpcode: R600::RETURN); |
1594 | |
1595 | for (MachineBasicBlock *MBB : RetMBB) { |
1596 | if (MachineInstr *MI = getReturnInstr(MBB)) |
1597 | MI->eraseFromParent(); |
1598 | MBB->addSuccessor(Succ: DummyExitBlk); |
1599 | LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() |
1600 | << " successors\n" ;); |
1601 | } |
1602 | SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: " ); |
1603 | } |
1604 | |
1605 | void R600MachineCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { |
1606 | while (MBB->succ_size()) |
1607 | MBB->removeSuccessor(Succ: *MBB->succ_begin()); |
1608 | } |
1609 | |
1610 | void R600MachineCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, |
1611 | int SccNum) { |
1612 | BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; |
1613 | if (!srcBlkInfo) |
1614 | srcBlkInfo = new BlockInformation(); |
1615 | srcBlkInfo->SccNum = SccNum; |
1616 | } |
1617 | |
1618 | void R600MachineCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { |
1619 | LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n" ;); |
1620 | |
1621 | BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; |
1622 | |
1623 | if (!SrcBlkInfo) |
1624 | SrcBlkInfo = new BlockInformation(); |
1625 | |
1626 | SrcBlkInfo->IsRetired = true; |
1627 | assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet" ); |
1628 | } |
1629 | |
1630 | INITIALIZE_PASS_BEGIN(R600MachineCFGStructurizer, "amdgpustructurizer" , |
1631 | "AMDGPU CFG Structurizer" , false, false) |
1632 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
1633 | INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) |
1634 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) |
1635 | INITIALIZE_PASS_END(R600MachineCFGStructurizer, "amdgpustructurizer" , |
1636 | "AMDGPU CFG Structurizer" , false, false) |
1637 | |
1638 | FunctionPass *llvm::createR600MachineCFGStructurizerPass() { |
1639 | return new R600MachineCFGStructurizer(); |
1640 | } |
1641 | |