1
2//===----- HexagonGlobalScheduler.cpp - Global Scheduler ------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// Basic infrastructure for the global scheduling + Hexagon pull-up pass.
11// Currently run at the very end of code generation for Hexagon, cleans
12// up lost scheduling opportunities. Currently breaks liveness, so no passes
13// that rely on liveness info should run afterwards. Will be fixed in future
14// versions.
15//
16//===----------------------------------------------------------------------===//
17#include "Hexagon.h"
18#include "HexagonGlobalRegion.h"
19#include "HexagonMachineFunctionInfo.h"
20#include "HexagonRegisterInfo.h"
21#include "HexagonSubtarget.h"
22#include "HexagonTargetMachine.h"
23#include "HexagonVLIWPacketizer.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/ValueTracking.h"
29#include "llvm/CodeGen/DFAPacketizer.h"
30#include "llvm/CodeGen/LatencyPriorityQueue.h"
31#include "llvm/CodeGen/LiveIntervals.h"
32#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
33#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
34#include "llvm/CodeGen/MachineDominators.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineLoopInfo.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/Passes.h"
41#include "llvm/CodeGen/PseudoSourceValue.h"
42#include "llvm/CodeGen/SchedulerRegistry.h"
43#include "llvm/CodeGen/TargetInstrInfo.h"
44#include "llvm/CodeGen/TargetRegisterInfo.h"
45#include "llvm/CodeGen/TargetSchedule.h"
46#include "llvm/IR/Operator.h"
47#include "llvm/InitializePasses.h"
48#include "llvm/MC/MCInstrItineraries.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/MathExtras.h"
53#include "llvm/Target/TargetMachine.h"
54#include "llvm/Transforms/Utils/BasicBlockUtils.h"
55
56#include <list>
57#include <map>
58
59#define DEBUG_TYPE "global_sched"
60
61using namespace llvm;
62
63STATISTIC(HexagonNumPullUps, "Number of instructions pull-ups");
64STATISTIC(HexagonNumDualJumps, "Number of dual jumps formed");
65
66static cl::opt<bool> DisablePullUp("disable-pull-up", cl::Hidden,
67 cl::desc("Disable Hexagon pull-up pass"));
68
69static cl::opt<bool> EnableSpeculativePullUp(
70 "enable-speculative-pull-up", cl::Hidden,
71 cl::desc("Enable speculation during Hexagon pull-up pass"));
72
73static cl::opt<bool> EnableLocalPullUp(
74 "enable-local-pull-up", cl::Hidden, cl::init(Val: true),
75 cl::desc("Enable same BB pull during Hexagon pull-up pass"));
76
77static cl::opt<bool> AllowSpeculateLoads(
78 "speculate-loads-on-pull-up", cl::Hidden, cl::init(Val: true),
79 cl::desc("Allow speculative loads during Hexagon pull-up pass"));
80
81static cl::opt<bool> AllowCmpBranchLoads(
82 "cmp-branch-loads-pull-up", cl::Hidden, cl::init(Val: true),
83 cl::desc("Allow compare-branch loads during Hexagon pull-up pass"));
84
85static cl::opt<bool> AllowUnlikelyPath("unlikely-path-pull-up", cl::Hidden,
86 cl::init(Val: true),
87 cl::desc("Allow unlikely path pull up"));
88
89static cl::opt<bool>
90 PerformDualJumps("dual-jump-in-pull-up", cl::Hidden, cl::init(Val: true),
91 cl::desc("Perform dual jump formation during pull up"));
92
93static cl::opt<bool> AllowDependentPullUp(
94 "enable-dependent-pull-up", cl::Hidden, cl::init(Val: true),
95 cl::desc("Perform dual jump formation during pull up"));
96
97static cl::opt<bool>
98 AllowBBPeelPullUp("enable-bb-peel-pull-up", cl::Hidden, cl::init(Val: true),
99 cl::desc("Peel a reg copy out of a BBloop"));
100
101static cl::opt<bool> PreventCompoundSeparation(
102 "prevent-compound-separation", cl::Hidden,
103 cl::desc("Do not destroy existing compounds during pull up"));
104
105static cl::opt<bool> PreventDuplexSeparation(
106 "prevent-duplex-separation", cl::Hidden, cl::init(Val: true),
107 cl::desc("Do not destroy existing duplexes during pull up"));
108
109static cl::opt<unsigned> MainCandidateQueueSize("pull-up-main-queue-size",
110 cl::Hidden, cl::init(Val: 8));
111
112static cl::opt<unsigned> SecondaryCandidateQueueSize("pull-up-sec-queue-size",
113 cl::Hidden, cl::init(Val: 2));
114
115static cl::opt<bool> PostPullUpOpt(
116 "post-pull-up-opt", cl::Hidden, cl::Optional, cl::init(Val: true),
117 cl::desc("Enable opt. exposed by pull-up e.g., remove redundant jumps"));
118
119static cl::opt<bool> SpeculateNonPredInsn(
120 "speculate-non-pred-insn", cl::Hidden, cl::Optional, cl::init(Val: true),
121 cl::desc("Speculate non-predicable instructions in parent BB"));
122
123static cl::opt<bool>
124 DisableCheckBundles("disable-hexagon-check-bundles", cl::Hidden,
125 cl::init(Val: true),
126 cl::desc("Disable Hexagon check bundles pass"));
127
128static cl::opt<bool>
129 WarnOnBundleSize("warn-on-bundle-size", cl::Hidden,
130 cl::desc("Hexagon check bundles and warn on size"));
131
132static cl::opt<bool>
133 ForceNoopHazards("force-noop-hazards", cl::Hidden, cl::init(Val: false),
134 cl::desc("Force noop hazards in scheduler"));
135static cl::opt<bool> OneFloatPerPacket(
136 "single-float-packet", cl::Hidden,
137 cl::desc("Allow only one single floating point instruction in a packet"));
138static cl::opt<bool> OneComplexPerPacket(
139 "single-complex-packet", cl::Hidden,
140 cl::desc("Allow only one complex instruction in a packet"));
141
142namespace llvm {
143FunctionPass *createHexagonGlobalScheduler();
144void initializeHexagonGlobalSchedulerPass(PassRegistry &);
145} // namespace llvm
146
147namespace {
148class HexagonGlobalSchedulerImpl;
149
150class HexagonGlobalScheduler : public MachineFunctionPass {
151public:
152 static char ID;
153 HexagonGlobalScheduler() : MachineFunctionPass(ID) {
154 initializeHexagonGlobalSchedulerPass(*PassRegistry::getPassRegistry());
155 }
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
158 AU.addRequiredID(ID&: MachineDominatorsID);
159 AU.addRequired<MachineLoopInfoWrapperPass>();
160 AU.addRequired<AAResultsWrapperPass>();
161 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
162 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
163 AU.addRequired<MachineDominatorTreeWrapperPass>();
164 MachineFunctionPass::getAnalysisUsage(AU);
165 }
166
167 StringRef getPassName() const override { return "Hexagon Global Scheduler"; }
168
169 bool runOnMachineFunction(MachineFunction &Fn) override;
170};
171char HexagonGlobalScheduler::ID = 0;
172
173// Describes a single pull-up candidate.
174class PullUpCandidate {
175 MachineBasicBlock::instr_iterator CandidateLocation;
176 MachineBasicBlock::iterator HomeBundle;
177 bool DependentOp;
178 signed BenefitCost;
179 std::vector<MachineInstr *> Backtrack;
180
181public:
182 PullUpCandidate(MachineBasicBlock::instr_iterator MII) {
183 CandidateLocation = MII;
184 BenefitCost = 0;
185 }
186
187 PullUpCandidate(MachineBasicBlock::instr_iterator MII,
188 MachineBasicBlock::iterator HomeBundle,
189 std::vector<MachineInstr *> &backtrack, bool DependentOp,
190 signed Cost)
191 : CandidateLocation(MII), HomeBundle(HomeBundle),
192 DependentOp(DependentOp), BenefitCost(Cost) {
193 // Copy of the backtrack.
194 Backtrack = backtrack;
195 }
196
197 void populate(MachineBasicBlock::instr_iterator &MII,
198 MachineBasicBlock::iterator &WorkPoint,
199 std::vector<MachineInstr *> &backtrack, bool &dependentOp) {
200 MII = CandidateLocation;
201 WorkPoint = HomeBundle;
202 backtrack = Backtrack;
203 dependentOp = DependentOp;
204 }
205
206 signed getCost() { return BenefitCost; }
207
208 MachineInstr *getCandidate() { return &*CandidateLocation; }
209
210 void dump() {
211 dbgs() << "Cost(" << BenefitCost;
212 dbgs() << ") Dependent(" << DependentOp;
213 dbgs() << ") backtrack size(" << Backtrack.size() << ")\t";
214 CandidateLocation->dump();
215 }
216};
217
218/// PullUpCandidateSorter - A Sort utility for pull-up candidates.
219struct PullUpCandidateSorter {
220 PullUpCandidateSorter() {}
221 bool operator()(PullUpCandidate *LHS, PullUpCandidate *RHS) {
222 return LHS->getCost() > RHS->getCost();
223 }
224};
225
226// Describes a single pull-up opportunity: location to which
227// pull-up is possible with additional information about it.
228// Also contains a list of pull-up candidates for this location.
229class PullUpState {
230 friend class HexagonGlobalSchedulerImpl;
231 // Available opportunity for pull-up.
232 // FAIAP a bundle with an empty slot.
233 MachineBasicBlock::iterator HomeLocation;
234 // Home bundle copy. This is here for speed of iteration.
235 SmallVector<MachineInstr *, HEXAGON_PACKET_SIZE> HomeBundle;
236 // Multiple candidates for the Home location.
237 SmallVector<PullUpCandidate *, 8> PullUpCandidates;
238
239 const HexagonInstrInfo *QII;
240
241public:
242 PullUpState(const HexagonInstrInfo *QII) : HomeLocation(NULL), QII(QII) {}
243
244 ~PullUpState() { reset(); }
245
246 void addPullUpCandidate(MachineBasicBlock::instr_iterator MII,
247 MachineBasicBlock::iterator HomeBundle,
248 std::vector<MachineInstr *> &backtrack,
249 bool DependentOp, signed Cost) {
250 LLVM_DEBUG(dbgs() << "\t[addPullUpCandidate]: "; (*MII).dump());
251 PullUpCandidate *PUI =
252 new PullUpCandidate(MII, HomeBundle, backtrack, DependentOp, Cost);
253 PullUpCandidates.push_back(Elt: PUI);
254 }
255
256 void dump() {
257 unsigned element = 0;
258 for (unsigned i = 0; i < HomeBundle.size(); i++) {
259 dbgs() << "[" << element++;
260 dbgs() << "] Home Duplex("
261 << QII->getDuplexCandidateGroup(MI: *HomeBundle[i]);
262 dbgs() << ") Compound (" << QII->getCompoundCandidateGroup(MI: *HomeBundle[i])
263 << ") ";
264 HomeBundle[i]->dump();
265 }
266 dbgs() << "\n";
267 element = 0;
268 for (SmallVector<PullUpCandidate *, 4>::iterator
269 I = PullUpCandidates.begin(),
270 E = PullUpCandidates.end();
271 I != E; ++I) {
272 dbgs() << "[" << element++ << "] Cand: Compound(";
273 dbgs() << QII->getCompoundCandidateGroup(MI: *(*I)->getCandidate()) << ") ";
274 (*I)->dump();
275 }
276 }
277
278 void reset() {
279 HomeLocation = NULL;
280 for (SmallVector<PullUpCandidate *, 4>::iterator
281 I = PullUpCandidates.begin(),
282 E = PullUpCandidates.end();
283 I != E; ++I)
284 delete *I;
285 PullUpCandidates.clear();
286 HomeBundle.clear();
287 }
288
289 void addHomeLocation(MachineBasicBlock::iterator WorkPoint) {
290 reset();
291 HomeLocation = WorkPoint;
292 }
293
294 unsigned haveCandidates() { return PullUpCandidates.size(); }
295};
296
297class HexagonGlobalSchedulerImpl : public HexagonPacketizerList {
298 // List of PullUp regions for this function.
299 std::vector<BasicBlockRegion *> PullUpRegions;
300 // Map of approximate distance for each BB from the
301 // function base.
302 DenseMap<MachineBasicBlock *, unsigned> BlockToInstOffset;
303 // Keep track of multiple pull-up candidates.
304 PullUpState CurrentState;
305 // Empty basic blocks as a result of pull-up.
306 std::vector<MachineBasicBlock *> EmptyBBs;
307 // Save all the Speculated MachineInstr that were moved
308 // FROM MachineBasicBlock because we don't want to have
309 // more than one speculated instructions pulled into one packet.
310 // TODO: This can be removed once we have a use-def dependency chain
311 // for all the instructions in a function.
312 std::map<MachineInstr *, MachineBasicBlock *> SpeculatedIns;
313 // All the regs and their aliases used by an instruction.
314 std::map<MachineInstr *, std::vector<unsigned>> MIUseSet;
315 // All the regs and their aliases defined by an instruction.
316 std::map<MachineInstr *, std::vector<unsigned>> MIDefSet;
317
318 AliasAnalysis *AA;
319 const MachineBranchProbabilityInfo *MBPI;
320 const MachineBlockFrequencyInfo *MBFI;
321 const MachineRegisterInfo *MRI;
322 const MachineFrameInfo &MFI;
323 const HexagonRegisterInfo *QRI;
324 const HexagonInstrInfo *QII;
325 MachineLoopInfo &MLI;
326 MachineDominatorTree &MDT;
327 MachineInstrBuilder Ext;
328 MachineInstrBuilder Nop;
329 const unsigned PacketSize;
330 TargetSchedModel TSchedModel;
331
332public:
333 // Ctor.
334 HexagonGlobalSchedulerImpl(MachineFunction &MF, MachineLoopInfo &MLI,
335 MachineDominatorTree &MDT, AliasAnalysis *AA,
336 const MachineBranchProbabilityInfo *MBPI,
337 const MachineBlockFrequencyInfo *MBFI,
338 const MachineRegisterInfo *MRI,
339 const MachineFrameInfo &MFI,
340 const HexagonRegisterInfo *QRI);
341 HexagonGlobalSchedulerImpl(const HexagonGlobalSchedulerImpl &) = delete;
342 HexagonGlobalSchedulerImpl &
343 operator=(const HexagonGlobalSchedulerImpl &) = delete;
344
345 ~HexagonGlobalSchedulerImpl() {
346 // Free regions.
347 for (std::vector<BasicBlockRegion *>::iterator I = PullUpRegions.begin(),
348 E = PullUpRegions.end();
349 I != E; ++I)
350 delete *I;
351 MF.deleteMachineInstr(MI: Ext);
352 MF.deleteMachineInstr(MI: Nop);
353 }
354
355 // initPacketizerState - initialize some internal flags.
356 void initPacketizerState() override;
357
358 // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
359 bool ignoreInstruction(MachineInstr *MI);
360
361 // isSoloInstruction - return true if instruction MI can not be packetized
362 // with any other instruction, which means that MI itself is a packet.
363 bool isSoloInstruction(const MachineInstr &MI) override;
364
365 // Add MI to packetizer state. Returns false if it cannot fit in the packet.
366 bool incrementalAddToPacket(MachineInstr &MI);
367
368 // formPullUpRegions - Top level call to form regions.
369 bool formPullUpRegions(MachineFunction &Fn);
370
371 // performPullUp - Top level call for pull-up.
372 bool performPullUp();
373
374 // performPullUpCFG - Top level call for pull-up CFG.
375 bool performPullUpCFG(MachineFunction &Fn);
376
377 // performExposedOptimizations -
378 // Look for optimization opportunities after pullup.
379 bool performExposedOptimizations(MachineFunction &Fn);
380
381 // optimizeBranching -
382 // 1. A conditional-jump transfers control to a BB with
383 // jump as the only instruction.
384 // if(p0) jump t1
385 // // ...
386 // t1: jump t2
387 // 2. When a BB with a single conditional jump, jumps to succ-of-succ and
388 // falls-through BB with only jump instruction.
389 // { if(p0) jump t1 }
390 // { jump t2 }
391 // t1: { ... }
392 MachineBasicBlock *optimizeBranches(MachineBasicBlock *MBB,
393 MachineBasicBlock *TBB,
394 MachineInstr *FirstTerm,
395 MachineBasicBlock *FBB);
396
397 // removeRedundantBranches -
398 // 1. Remove jump to the layout successor.
399 // 2. Remove multiple (dual) jump to the same target.
400 bool removeRedundantBranches(MachineBasicBlock *MBB, MachineBasicBlock *TBB,
401 MachineInstr *FirstTerm, MachineBasicBlock *FBB,
402 MachineInstr *SecondTerm);
403
404 // optimizeDualJumps - optimize dual jumps in a packet
405 // For now: Replace dual jump by single jump in case of a fall through.
406 bool optimizeDualJumps(MachineBasicBlock *MBB, MachineBasicBlock *TBB,
407 MachineInstr *FirstTerm, MachineBasicBlock *FBB,
408 MachineInstr *SecondTerm);
409
410 void GenUseDefChain(MachineFunction &Fn);
411
412 // Return region pointer or null if none found.
413 BasicBlockRegion *getRegionForMBB(std::vector<BasicBlockRegion *> &Regions,
414 MachineBasicBlock *MBB);
415
416 // Saves all the used-regs and their aliases in Uses.
417 // Saves all the defined-regs and their aliases in Defs.
418 void MIUseDefSet(MachineInstr *MI, std::vector<unsigned> &Defs,
419 std::vector<unsigned> &Uses);
420
421 // This is a very useful debug utility.
422 unsigned countCompounds(MachineFunction &Fn);
423
424 // Check bundle counts
425 void checkBundleCounts(MachineFunction &Fn);
426
427private:
428 // Get next BB to be included into the region.
429 MachineBasicBlock *getNextPURBB(MachineBasicBlock *MBB, bool SecondBest);
430
431 void setUsedRegs(BitVector &Set, unsigned Reg);
432 bool AliasingRegs(unsigned RegA, unsigned RegB);
433
434 // Test is true if the two MIs cannot be safely reordered.
435 bool ReorderDependencyTest(MachineInstr *MIa, MachineInstr *MIb);
436
437 bool canAddMIToThisPacket(
438 MachineInstr *MI,
439 SmallVector<MachineInstr *, HEXAGON_PACKET_SIZE> &Bundle);
440
441 bool CanPromoteToDotNew(MachineInstr *MI, unsigned Reg);
442
443 bool pullUpPeelBBLoop(MachineBasicBlock *PredBB, MachineBasicBlock *LoopBB);
444
445 MachineInstr *findBundleAndBranch(MachineBasicBlock *BB,
446 MachineBasicBlock::iterator &Bundle);
447
448 // Does this bundle have any slots left?
449 bool ResourcesAvailableInBundle(BasicBlockRegion *CurrentRegion,
450 MachineBasicBlock::iterator &TargetPacket);
451
452 // Perform the actual move.
453 MachineInstr *MoveAndUpdateLiveness(
454 BasicBlockRegion *CurrentRegion, MachineBasicBlock *HomeBB,
455 MachineInstr *InstrToMove, bool NeedToNewify, unsigned DepReg,
456 bool MovingDependentOp, MachineBasicBlock *OriginBB,
457 MachineInstr *OriginalInstruction, SmallVector<MachineOperand, 4> &Cond,
458 MachineBasicBlock::iterator &SourceLocation,
459 MachineBasicBlock::iterator &TargetPacket,
460 MachineBasicBlock::iterator &NextMI,
461 std::vector<MachineInstr *> &backtrack);
462
463 // Updates incremental kill patterns along the backtrack.
464 void updateKillAlongThePath(MachineBasicBlock *HomeBB,
465 MachineBasicBlock *OriginBB,
466 MachineBasicBlock::instr_iterator &Head,
467 MachineBasicBlock::instr_iterator &Tail,
468 MachineBasicBlock::iterator &SourcePacket,
469 MachineBasicBlock::iterator &TargetPacket,
470 std::vector<MachineInstr *> &backtrack);
471
472 // Gather list of pull-up candidates.
473 bool findPullUpCandidates(MachineBasicBlock::iterator &WorkPoint,
474 MachineBasicBlock::iterator &FromHere,
475 std::vector<MachineInstr *> &backtrack,
476 unsigned MaxCandidates);
477
478 // See if the instruction could be pulled up.
479 bool tryMultipleInstructions(
480 MachineBasicBlock::iterator &RetVal, /* output parameter */
481 std::vector<BasicBlockRegion *>::iterator &CurrentRegion,
482 MachineBasicBlock::iterator &NextMI,
483 MachineBasicBlock::iterator &ToThisBBEnd,
484 MachineBasicBlock::iterator &FromThisBBEnd, bool PathInRegion = true);
485
486 // Try to move MI into existing bundle.
487 bool MoveMItoBundle(BasicBlockRegion *CurrentRegion,
488 MachineBasicBlock::instr_iterator &InstrToMove,
489 MachineBasicBlock::iterator &NextMI,
490 MachineBasicBlock::iterator &TargetPacket,
491 MachineBasicBlock::iterator &SourceLocation,
492 std::vector<MachineInstr *> &backtrack,
493 bool MovingDependentOp, bool PathInRegion);
494
495 // Insert temporary MI copy into MBB.
496 MachineBasicBlock::instr_iterator
497 insertTempCopy(MachineBasicBlock *MBB,
498 MachineBasicBlock::iterator &TargetPacket, MachineInstr *MI,
499 bool DeleteOldCopy);
500
501 MachineBasicBlock::instr_iterator
502 findInsertPositionInBundle(MachineBasicBlock::iterator &Bundle,
503 MachineInstr *MI, bool &LastInBundle);
504
505 bool NeedToNewify(MachineBasicBlock::instr_iterator NewMI, unsigned *DepReg,
506 MachineInstr *TargetPacket);
507
508 bool CanNewifiedBeUsedInBundle(MachineBasicBlock::instr_iterator NewMI,
509 unsigned DepReg, MachineInstr *TargetPacket);
510
511 void addInstructionToExistingBundle(MachineBasicBlock *HomeBB,
512 MachineBasicBlock::instr_iterator &Head,
513 MachineBasicBlock::instr_iterator &Tail,
514 MachineBasicBlock::instr_iterator &NewMI,
515 MachineBasicBlock::iterator &TargetPacket,
516 MachineBasicBlock::iterator &NextMI,
517 std::vector<MachineInstr *> &backtrack);
518
519 void removeInstructionFromExistingBundle(
520 MachineBasicBlock *HomeBB, MachineBasicBlock::instr_iterator &Head,
521 MachineBasicBlock::instr_iterator &Tail,
522 MachineBasicBlock::iterator &SourceLocation,
523 MachineBasicBlock::iterator &NextMI, bool MovingDependentOp,
524 std::vector<MachineInstr *> &backtrack);
525
526 // Check for conditional register operaton.
527 bool MIsCondAssign(MachineInstr *BMI, MachineInstr *MI,
528 SmallVector<unsigned, 4> &Defs);
529
530 // Test all the conditions required for instruction to be
531 // speculative. These are just required conditions, cost
532 // or benefit should be computed elsewhere.
533 bool canMIBeSpeculated(MachineInstr *MI, MachineBasicBlock *ToBB,
534 MachineBasicBlock *FromBB,
535 std::vector<MachineInstr *> &backtrack);
536
537 // See if this branch target belongs to the current region.
538 bool isBranchWithinRegion(BasicBlockRegion *CurrentRegion, MachineInstr *MI);
539
540 // A collection of low level utilities.
541 bool MIsAreDependent(MachineInstr *MIa, MachineInstr *MIb);
542 bool MIsHaveTrueDependency(MachineInstr *MIa, MachineInstr *MIb);
543 bool canReorderMIs(MachineInstr *MIa, MachineInstr *MIb);
544 bool canCauseStall(MachineInstr *MI, MachineInstr *MJ);
545 bool canThisMIBeMoved(MachineInstr *MI,
546 MachineBasicBlock::iterator &WorkPoint,
547 bool &MovingDependentOp, int &Cost);
548 bool MIisDualJumpCandidate(MachineInstr *MI,
549 MachineBasicBlock::iterator &WorkPoint);
550 bool DemoteToDotOld(MachineInstr *MI);
551 bool isNewifiable(MachineBasicBlock::instr_iterator MII, unsigned DepReg,
552 MachineInstr *TargetPacket);
553 bool IsNewifyStore(MachineInstr *MI);
554 bool isJumpOutOfRange(MachineInstr *MI);
555 bool IsDualJumpFirstCandidate(MachineInstr *MI);
556 bool IsDualJumpFirstCandidate(MachineBasicBlock *MBB);
557 bool IsDualJumpFirstCandidate(MachineBasicBlock::iterator &TargetPacket);
558 bool IsNotDualJumpFirstCandidate(MachineInstr *MI);
559 bool isJumpOutOfRange(MachineInstr *UnCond, MachineInstr *Cond);
560 bool IsDualJumpSecondCandidate(MachineInstr *MI);
561 bool tryAllocateResourcesForConstExt(MachineInstr *MI, bool UpdateState);
562 bool isCompoundPair(MachineInstr *MIa, MachineInstr *MIb);
563 bool doesMIDefinesPredicate(MachineInstr *MI, SmallVector<unsigned, 4> &Defs);
564 bool AnalyzeBBBranches(MachineBasicBlock *MBB, MachineBasicBlock *&TBB,
565 MachineInstr *&FirstTerm, MachineBasicBlock *&FBB,
566 MachineInstr *&SecondTerm);
567 inline bool multipleBranchesFromToBB(MachineBasicBlock *BB) const;
568};
569} // namespace
570
571INITIALIZE_PASS_BEGIN(HexagonGlobalScheduler, "global-sched",
572 "Hexagon Global Scheduler", false, false)
573INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
574INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
575INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
576INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
577INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
578INITIALIZE_PASS_END(HexagonGlobalScheduler, "global-sched",
579 "Hexagon Global Scheduler", false, false)
580
581/// HexagonGlobalSchedulerImpl Ctor.
582HexagonGlobalSchedulerImpl::HexagonGlobalSchedulerImpl(
583 MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
584 AliasAnalysis *AA, const MachineBranchProbabilityInfo *MBPI,
585 const MachineBlockFrequencyInfo *MBFI, const MachineRegisterInfo *MRI,
586 const MachineFrameInfo &MFI, const HexagonRegisterInfo *QRI)
587 : HexagonPacketizerList(MF, MLI, AA, nullptr, false), PullUpRegions(0),
588 CurrentState((const HexagonInstrInfo *)TII), AA(AA), MBPI(MBPI),
589 MBFI(MBFI), MRI(MRI), MFI(MFI), QRI(QRI), MLI(MLI), MDT(MDT),
590 PacketSize(MF.getSubtarget().getSchedModel().IssueWidth) {
591 QII = (const HexagonInstrInfo *)TII;
592 Ext = BuildMI(MF, MIMD: DebugLoc(), MCID: QII->get(Opcode: Hexagon::A4_ext));
593 Nop = BuildMI(MF, MIMD: DebugLoc(), MCID: QII->get(Opcode: Hexagon::A2_nop));
594 TSchedModel.init(TSInfo: &MF.getSubtarget());
595}
596
597// Return bundle size without debug instructions.
598static unsigned nonDbgBundleSize(MachineBasicBlock::iterator &TargetPacket) {
599 MachineBasicBlock::instr_iterator MII = TargetPacket.getInstrIterator();
600 MachineBasicBlock::instr_iterator End = MII->getParent()->instr_end();
601 unsigned count = 0;
602 for (++MII; MII != End && MII->isInsideBundle(); ++MII) {
603 if (MII->isDebugInstr())
604 continue;
605 count++;
606 }
607 return count;
608}
609
610/// The pass main entry point.
611bool HexagonGlobalScheduler::runOnMachineFunction(MachineFunction &Fn) {
612 auto &HST = Fn.getSubtarget<HexagonSubtarget>();
613 if (DisablePullUp || !HST.usePackets() || skipFunction(F: Fn.getFunction()))
614 return false;
615
616 const MachineRegisterInfo *MRI = &Fn.getRegInfo();
617 const MachineFrameInfo &MFI = Fn.getFrameInfo();
618 const HexagonRegisterInfo *QRI = HST.getRegisterInfo();
619 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
620 MachineDominatorTree &MDT =
621 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
622 const MachineBranchProbabilityInfo *MBPI =
623 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
624 const MachineBlockFrequencyInfo *MBFI =
625 &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
626 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
627
628 // Preserve comounds if Opt Size.
629 const Function &F = Fn.getFunction();
630 if (F.hasOptSize() && PreventCompoundSeparation.getNumOccurrences() == 0)
631 PreventCompoundSeparation = true;
632
633 // Instantiate the Scheduler.
634 HexagonGlobalSchedulerImpl GlobalSchedulerState(Fn, MLI, MDT, AA, MBPI, MBFI,
635 MRI, MFI, QRI);
636
637 // DFA state table should not be empty.
638 assert(GlobalSchedulerState.getResourceTracker() && "Empty DFA table!");
639
640 // Loop over all of the basic blocks.
641 // PullUp regions are basically traces with no side entrances.
642 // Might want to traverse BB by frequency.
643 GlobalSchedulerState.checkBundleCounts(Fn);
644
645 // Pullup does not handle hazards yet.
646 if (!DisablePullUp.getPosition() && ForceNoopHazards)
647 return true;
648
649 LLVM_DEBUG(GlobalSchedulerState.countCompounds(Fn));
650 GlobalSchedulerState.GenUseDefChain(Fn);
651 GlobalSchedulerState.formPullUpRegions(Fn);
652 GlobalSchedulerState.performPullUp();
653 GlobalSchedulerState.performPullUpCFG(Fn);
654 if (PostPullUpOpt) {
655 GlobalSchedulerState.formPullUpRegions(Fn);
656 GlobalSchedulerState.performExposedOptimizations(Fn);
657 }
658 LLVM_DEBUG(GlobalSchedulerState.countCompounds(Fn));
659
660 return true;
661}
662
663/// Allocate resources (i.e. 4 bytes) for constant extender. If succeess, return
664/// true, otherwise, return false.
665bool HexagonGlobalSchedulerImpl::tryAllocateResourcesForConstExt(
666 MachineInstr *MI, bool UpdateState = true) {
667 if (ResourceTracker->canReserveResources(MI&: *Ext)) {
668 // We do not always want to change the state of ResourceTracker.
669 // When we do not want to change it, we need to test for additional
670 // corner cases.
671 if (UpdateState)
672 ResourceTracker->reserveResources(MI&: *Ext);
673 else if (CurrentPacketMIs.size() >= PacketSize - 1)
674 return false;
675 return true;
676 }
677
678 return false;
679}
680
681static bool IsSchedBarrier(const MachineInstr *MI) {
682 return MI->getOpcode() == Hexagon::Y2_barrier;
683}
684
685static bool IsIndirectCall(const MachineInstr *MI) {
686 return MI->getOpcode() == Hexagon::J2_callr;
687}
688
689#ifndef NDEBUG
690static void DumpLinked(MachineInstr *MI) {
691 if (MI->isBundledWithPred())
692 dbgs() << "^";
693 else
694 dbgs() << " ";
695 if (MI->isBundledWithSucc())
696 dbgs() << "v";
697 else
698 dbgs() << " ";
699 MI->dump();
700}
701
702static void DumpPacket(MachineBasicBlock::instr_iterator MII) {
703 if (MII == MachineBasicBlock::instr_iterator()) {
704 dbgs() << "\tNULL\n";
705 return;
706 }
707 MachineInstr *MI = &*MII;
708 MachineBasicBlock *MBB = MI->getParent();
709 // Uninserted instruction.
710 if (!MBB) {
711 dbgs() << "\tUnattached: ";
712 DumpLinked(MI);
713 return;
714 }
715 dbgs() << "\t";
716 DumpLinked(MI);
717 if (MI->isBundle()) {
718 MachineBasicBlock::instr_iterator MIE = MI->getParent()->instr_end();
719 for (++MII; MII != MIE && MII->isInsideBundle() && !MII->isBundle();
720 ++MII) {
721 dbgs() << "\t\t*";
722 DumpLinked(&*MII);
723 }
724 }
725}
726
727static void DumpPacket(MachineBasicBlock::instr_iterator MII,
728 MachineBasicBlock::instr_iterator BBEnd) {
729 if (MII == BBEnd) {
730 dbgs() << "\tBBEnd\n";
731 return;
732 }
733
734 DumpPacket(MII);
735}
736#endif
737
738static bool isBranch(MachineInstr *MI) {
739 if (MI->isBundle()) {
740 MachineBasicBlock::instr_iterator MII = MI->getIterator();
741 MachineBasicBlock::instr_iterator MIE = MI->getParent()->instr_end();
742 for (++MII; MII != MIE && MII->isInsideBundle() && !MII->isBundle();
743 ++MII) {
744 if (MII->isBranch())
745 return true;
746 }
747 } else
748 return MI->isBranch();
749 return false;
750}
751
752/// Any of those must not be first dual jump. Everything else is OK.
753bool HexagonGlobalSchedulerImpl::IsNotDualJumpFirstCandidate(MachineInstr *MI) {
754 if (MI->isCall() || (MI->isBranch() && !QII->isPredicated(MI: *MI)) ||
755 MI->isReturn() || QII->isEndLoopN(Opcode: MI->getOpcode()))
756 return true;
757 return false;
758}
759
760/// These four functions clearly belong in HexagonInstrInfo.cpp.
761/// Is this MI could be first dual jump instruction?
762bool HexagonGlobalSchedulerImpl::IsDualJumpFirstCandidate(MachineInstr *MI) {
763 if (!PerformDualJumps)
764 return false;
765 if (MI->isBranch() && QII->isPredicated(MI: *MI) && !QII->isNewValueJump(MI: *MI) &&
766 !MI->isIndirectBranch() && !QII->isEndLoopN(Opcode: MI->getOpcode()))
767 return true;
768 // Missing loopN here, but not sure if there will be any benefit from it.
769 return false;
770}
771
772/// This version covers the whole packet.
773bool HexagonGlobalSchedulerImpl::IsDualJumpFirstCandidate(
774 MachineBasicBlock::iterator &TargetPacket) {
775 if (!PerformDualJumps)
776 return false;
777 MachineInstr *MI = &*TargetPacket;
778
779 if (MI->isBundle()) {
780 // If this is a bundle, it must be the last bundle in BB.
781 if (&(*MI->getParent()->rbegin()) != MI)
782 return false;
783
784 MachineBasicBlock::instr_iterator MII = MI->getIterator();
785 MachineBasicBlock::instr_iterator BBEnd = MI->getParent()->instr_end();
786 // If there is a control flow op in this packet, this is the case
787 // we look for, even if they are dependent on other members.
788 for (++MII; MII != BBEnd && MII->isInsideBundle() && !MII->isBundle();
789 ++MII)
790 if (IsNotDualJumpFirstCandidate(MI: &*MII))
791 return false;
792 } else
793 return IsDualJumpFirstCandidate(MI);
794
795 return true;
796}
797
798/// This version cover whole BB. There could be a BB
799/// with no control flow in it. In this case we can still pull-up a jump
800/// into it. Negative proof.
801bool HexagonGlobalSchedulerImpl::IsDualJumpFirstCandidate(
802 MachineBasicBlock *MBB) {
803 if (!PerformDualJumps)
804 return false;
805
806 for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
807 MBBEnd = MBB->instr_end();
808 MII != MBBEnd; ++MII) {
809 MachineInstr *MI = &*MII;
810 if (MI->isDebugInstr())
811 continue;
812 if (!MI->isBundle() && IsNotDualJumpFirstCandidate(MI))
813 return false;
814 }
815 return true;
816}
817
818/// Is this MI could be second dual jump instruction?
819bool HexagonGlobalSchedulerImpl::IsDualJumpSecondCandidate(MachineInstr *MI) {
820 if (!PerformDualJumps)
821 return false;
822 if ((MI->isBranch() && !QII->isNewValueJump(MI: *MI) && !MI->isIndirectBranch() &&
823 !QII->isEndLoopN(Opcode: MI->getOpcode())) ||
824 (MI->isCall() && !IsIndirectCall(MI)))
825 return true;
826 return false;
827}
828
829// Since we have no exact knowledge of code layout,
830// allow some safety buffer for jump target.
831// This is measured in bytes.
832static const unsigned SafetyBuffer = 200;
833
834static MachineBasicBlock::instr_iterator
835getHexagonFirstInstrTerminator(MachineBasicBlock *MBB) {
836 MachineBasicBlock::instr_iterator MIB = MBB->instr_begin();
837 MachineBasicBlock::instr_iterator MIE = MBB->instr_end();
838 MachineBasicBlock::instr_iterator MII = MIB;
839 while (MII != MIE) {
840 if (!MII->isBundle() && MII->isTerminator())
841 return MII;
842 ++MII;
843 }
844 return MIE;
845}
846
847/// Check if a given instruction is:
848/// - a jump to a distant target
849/// - that exceeds its immediate range
850/// If both conditions are true, it requires constant extension.
851bool HexagonGlobalSchedulerImpl::isJumpOutOfRange(MachineInstr *MI) {
852 if (!MI || !MI->isBranch())
853 return false;
854 MachineBasicBlock *MBB = MI->getParent();
855 auto FirstTerm = getHexagonFirstInstrTerminator(MBB);
856 if (FirstTerm == MBB->instr_end())
857 return false;
858
859 unsigned InstOffset = BlockToInstOffset[MBB];
860 unsigned Distance = 0;
861 MachineBasicBlock::instr_iterator FTMII = FirstTerm;
862
863 // To save time, estimate exact position of a branch instruction
864 // as one at the end of the MBB.
865 // Number of instructions times typical instruction size.
866 InstOffset += (QII->nonDbgBBSize(BB: MBB) * HEXAGON_INSTR_SIZE);
867
868 MachineBasicBlock *TBB = NULL, *FBB = NULL;
869 SmallVector<MachineOperand, 4> Cond;
870
871 // Try to analyze this branch.
872 if (QII->analyzeBranch(MBB&: *MBB, TBB, FBB, Cond, AllowModify: false)) {
873 // Could not analyze it. See if this is something we can recognize.
874 // If it is a NVJ, it should always have its target in
875 // a fixed location.
876 if (QII->isNewValueJump(MI: *FirstTerm))
877 TBB = FirstTerm->getOperand(i: QII->getCExtOpNum(MI: *FirstTerm)).getMBB();
878 }
879 if (TBB && (MI == &*FirstTerm)) {
880 Distance =
881 (unsigned)std::abs(x: (long long)InstOffset - BlockToInstOffset[TBB]) +
882 SafetyBuffer;
883 LLVM_DEBUG(dbgs() << "\tFirst term offset(" << Distance << "): ";
884 FirstTerm->dump());
885 return !QII->isJumpWithinBranchRange(MI: *FirstTerm, offset: Distance);
886 }
887 if (FBB) {
888 // Look for second terminator.
889 FTMII++;
890 MachineInstr *SecondTerm = &*FTMII;
891 assert(FTMII != MBB->instr_end() &&
892 (SecondTerm->isBranch() || SecondTerm->isCall()) &&
893 "Bad second terminator");
894 if (MI != SecondTerm)
895 return false;
896 // Analyze the second branch in the BB.
897 Distance =
898 (unsigned)std::abs(x: (long long)InstOffset - BlockToInstOffset[FBB]) +
899 SafetyBuffer;
900 LLVM_DEBUG(dbgs() << "\tSecond term offset(" << Distance << "): ";
901 FirstTerm->dump());
902 return !QII->isJumpWithinBranchRange(MI: *SecondTerm, offset: Distance);
903 }
904 return false;
905}
906
907/// Returns true if an instruction can be promoted to .new predicate
908/// or new-value store.
909/// Performs implicit version checking.
910bool HexagonGlobalSchedulerImpl::isNewifiable(
911 MachineBasicBlock::instr_iterator MII, unsigned DepReg,
912 MachineInstr *TargetPacket) {
913 MachineInstr *MI = &*MII;
914 if (QII->isDotNewInst(MI: *MI) ||
915 !CanNewifiedBeUsedInBundle(NewMI: MII, DepReg, TargetPacket))
916 return false;
917 return (QII->isPredicated(MI: *MI) && QII->getDotNewPredOp(MI: *MI, MBPI: nullptr) > 0) ||
918 QII->mayBeNewStore(MI: *MI);
919}
920
921bool HexagonGlobalSchedulerImpl::DemoteToDotOld(MachineInstr *MI) {
922 int NewOpcode = QII->getDotOldOp(MI: *MI);
923 MI->setDesc(QII->get(Opcode: NewOpcode));
924 return true;
925}
926
927// initPacketizerState - Initialize packetizer flags
928void HexagonGlobalSchedulerImpl::initPacketizerState(void) {
929 CurrentPacketMIs.clear();
930 return;
931}
932
933// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
934bool HexagonGlobalSchedulerImpl::ignoreInstruction(MachineInstr *MI) {
935 if (MI->isDebugInstr())
936 return true;
937
938 // We must print out inline assembly
939 if (MI->isInlineAsm())
940 return false;
941
942 // We check if MI has any functional units mapped to it.
943 // If it doesn't, we ignore the instruction.
944 const MCInstrDesc &TID = MI->getDesc();
945 unsigned SchedClass = TID.getSchedClass();
946 const InstrStage *IS =
947 ResourceTracker->getInstrItins()->beginStage(ItinClassIndx: SchedClass);
948 unsigned FuncUnits = IS->getUnits();
949 return !FuncUnits;
950}
951
952// isSoloInstruction: - Returns true for instructions that must be
953// scheduled in their own packet.
954bool HexagonGlobalSchedulerImpl::isSoloInstruction(const MachineInstr &MI) {
955 if (MI.isInlineAsm())
956 return true;
957
958 if (MI.isEHLabel())
959 return true;
960
961 // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
962 // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
963 // They must not be grouped with other instructions in a packet.
964 if (IsSchedBarrier(MI: &MI))
965 return true;
966
967 if (MI.getOpcode() == Hexagon::A2_nop)
968 return true;
969
970 return false;
971}
972
973/// Return region ptr or null if non found.
974BasicBlockRegion *HexagonGlobalSchedulerImpl::getRegionForMBB(
975 std::vector<BasicBlockRegion *> &Regions, MachineBasicBlock *MBB) {
976 for (std::vector<BasicBlockRegion *>::iterator I = Regions.begin(),
977 E = Regions.end();
978 I != E; ++I) {
979 if ((*I)->findMBB(MBB))
980 return *I;
981 }
982 return NULL;
983}
984
985/// Select best candidate to form regions.
986static inline bool selectBestBB(BlockFrequency &BBaFreq, unsigned BBaSize,
987 BlockFrequency &BBbFreq, unsigned BBbSize) {
988 if (BBaFreq.getFrequency() > BBbFreq.getFrequency())
989 return true;
990 // TODO: This needs fine tuning.
991 // if (BBaSize < BBbSize)
992 // return true;
993 if (BBaFreq.getFrequency() == BBbFreq.getFrequency())
994 return true;
995 return false;
996}
997
998/// Returns BB pointer if one of MBB successors should be added to the
999/// current PullUp Region, NULL otherwise.
1000/// If SecondBest is defined, get next one after Best match.
1001/// Most of the time, since we practically always have only two successors,
1002/// this is "the other" BB successor which still matches original
1003/// selection criterion.
1004MachineBasicBlock *
1005HexagonGlobalSchedulerImpl::getNextPURBB(MachineBasicBlock *MBB,
1006 bool SecondBest = false) {
1007 if (!MBB)
1008 return NULL;
1009
1010 BlockFrequency BestBlockFreq = BlockFrequency(0);
1011 unsigned BestBlockSize = 0;
1012 MachineBasicBlock *BestBB = NULL;
1013 MachineBasicBlock *SecondBestBB = NULL;
1014
1015 // Catch single BB loops.
1016 for (MachineBasicBlock *Succ : MBB->successors())
1017 if (Succ == MBB)
1018 return NULL;
1019
1020 // Iterate through successors to MBB.
1021 for (MachineBasicBlock *Succ : MBB->successors()) {
1022 BlockFrequency BlockFreq = MBFI->getBlockFreq(MBB: Succ);
1023
1024 LLVM_DEBUG(dbgs() << "\tsucc BB(" << Succ->getNumber() << ") freq("
1025 << BlockFreq.getFrequency() << ")");
1026
1027 if (!SecondBest && getRegionForMBB(Regions&: PullUpRegions, MBB: Succ))
1028 continue;
1029
1030 // If there is more then one predecessor to this block, do not include it.
1031 // It means there is a side entrance to it.
1032 if (Succ->pred_size() > 1)
1033 continue;
1034
1035 // If this block is a target of an indirect branch, it should
1036 // also not be included.
1037 if (Succ->isEHPad() || Succ->hasAddressTaken())
1038 continue;
1039
1040 // Get BB edge frequency.
1041 BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(Src: MBB, Dst: Succ);
1042 LLVM_DEBUG(dbgs() << "\tedge with freq(" << EdgeFreq.getFrequency()
1043 << ")\n");
1044
1045 if (selectBestBB(BBaFreq&: EdgeFreq, BBaSize: QII->nonDbgBBSize(BB: Succ), BBbFreq&: BestBlockFreq,
1046 BBbSize: BestBlockSize)) {
1047 BestBlockFreq = EdgeFreq;
1048 BestBlockSize = QII->nonDbgBBSize(BB: Succ);
1049 SecondBestBB = BestBB;
1050 BestBB = Succ;
1051 } else if (!SecondBestBB) {
1052 SecondBestBB = Succ;
1053 }
1054 }
1055 if (SecondBest)
1056 return SecondBestBB;
1057 else
1058 return BestBB;
1059}
1060
1061/// Form region to perform pull-up.
1062bool HexagonGlobalSchedulerImpl::formPullUpRegions(MachineFunction &Fn) {
1063 const Function &F = Fn.getFunction();
1064 // Check for single-block functions and skip them.
1065 if (std::next(x: F.begin()) == F.end())
1066 return false;
1067
1068 // Compute map for BB distances.
1069 // Offset of the current instruction from the start.
1070 unsigned InstOffset = 0;
1071
1072 LLVM_DEBUG(dbgs() << "****** Form PullUpRegions **************\n");
1073 // Loop over all basic blocks.
1074 // PullUp regions are basically traces with no side entrances.
1075 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe;
1076 ++MBB) {
1077 if (MBB->getAlignment() > llvm::Align(1)) {
1078 // Although we don't know the exact layout of the final code, we need
1079 // to account for alignment padding somehow. This heuristic pads each
1080 // aligned basic block according to the alignment value.
1081 int ByteAlign = MBB->getAlignment().value() - 1;
1082 InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
1083 }
1084 // Remember BB layout offset.
1085 BlockToInstOffset[&*MBB] = InstOffset;
1086 for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
1087 MIE = MBB->instr_end();
1088 MII != MIE; ++MII)
1089 if (!MII->isBundle())
1090 InstOffset += QII->getSize(MI: *MII);
1091
1092 // If this BB is already in a region, move on.
1093 if (getRegionForMBB(Regions&: PullUpRegions, MBB: &*MBB))
1094 continue;
1095
1096 LLVM_DEBUG(dbgs() << "\nRoot BB(" << MBB->getNumber() << ") name("
1097 << MBB->getName() << ") size(" << QII->nonDbgBBSize(&*MBB)
1098 << ") freq(" << printBlockFreq(*MBFI, *MBB)
1099 << ") pred_size(" << MBB->pred_size() << ") in_func("
1100 << MBB->getParent()->getFunction().getName() << ")\n");
1101
1102 BasicBlockRegion *PUR = new BasicBlockRegion(TII, QRI, &*MBB);
1103 PullUpRegions.push_back(x: PUR);
1104
1105 for (MachineBasicBlock *MBBR = getNextPURBB(MBB: &*MBB); MBBR;
1106 MBBR = getNextPURBB(MBB: MBBR)) {
1107 LLVM_DEBUG(dbgs() << "Add BB(" << MBBR->getNumber() << ") name("
1108 << MBBR->getName() << ") size("
1109 << QII->nonDbgBBSize(MBBR) << ") freq("
1110 << printBlockFreq(*MBFI, *MBBR) << ") in_func("
1111 << MBBR->getParent()->getFunction().getName() << ")\n");
1112 PUR->addBBtoRegion(MBB: MBBR);
1113 }
1114 }
1115 return true;
1116}
1117
1118/// Return true if MI is an instruction we are unable to reason about
1119/// (like something with unmodeled memory side effects).
1120static inline bool isGlobalMemoryObject(MachineInstr *MI) {
1121 if (MI->hasUnmodeledSideEffects() || MI->hasOrderedMemoryRef() ||
1122 MI->isCall() ||
1123 (MI->getOpcode() == Hexagon::J2_jump && !MI->getOperand(i: 0).isMBB()))
1124 return true;
1125 return false;
1126}
1127
1128// This MI might have either incomplete info, or known to be unsafe
1129// to deal with (i.e. volatile object).
1130static inline bool isUnsafeMemoryObject(MachineInstr *MI) {
1131 if (!MI || MI->memoperands_empty())
1132 return true;
1133
1134 // We purposefully do no check for hasOneMemOperand() here
1135 // in hope to trigger an assert downstream in order to
1136 // finish implementation.
1137 if ((*MI->memoperands_begin())->isVolatile() || MI->hasUnmodeledSideEffects())
1138 return true;
1139
1140 if (!(*MI->memoperands_begin())->getValue())
1141 return true;
1142
1143 return false;
1144}
1145
1146/// This returns true if the two MIs could be memory dependent.
1147static bool MIsNeedChainEdge(AliasAnalysis *AA, const TargetInstrInfo *TII,
1148 MachineInstr *MIa, MachineInstr *MIb) {
1149 // Cover a trivial case - no edge is need to itself.
1150 if (MIa == MIb)
1151 return false;
1152
1153 if (TII->areMemAccessesTriviallyDisjoint(MIa: *MIa, MIb: *MIb))
1154 return false;
1155
1156 if (isUnsafeMemoryObject(MI: MIa) || isUnsafeMemoryObject(MI: MIb))
1157 return true;
1158
1159 // If we are dealing with two "normal" loads, we do not need an edge
1160 // between them - they could be reordered.
1161 if (!MIa->mayStore() && !MIb->mayStore())
1162 return false;
1163
1164 // To this point analysis is generic. From here on we do need AA.
1165 if (!AA)
1166 return true;
1167
1168 MachineMemOperand *MMOa = *MIa->memoperands_begin();
1169 MachineMemOperand *MMOb = *MIb->memoperands_begin();
1170
1171 // TODO: Need to handle multiple memory operands.
1172 // if either instruction has more than one memory operand, punt.
1173 if (!(MIa->hasOneMemOperand() && MIb->hasOneMemOperand()))
1174 return true;
1175
1176 if (!MMOa->getSize().hasValue() || !MMOb->getSize().hasValue())
1177 return true;
1178
1179 assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
1180 assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
1181 assert((MMOa->getSize().hasValue() && MMOb->getSize().hasValue()) &&
1182 "Size 0 memory access");
1183
1184 // If the base address of the two memoperands is the same. For instance,
1185 // x and x+4, then we can easily reason about them using the offset and size
1186 // of access.
1187 if (MMOa->getValue() == MMOb->getValue()) {
1188 if (MMOa->getOffset() > MMOb->getOffset()) {
1189 uint64_t offDiff = MMOa->getOffset() - MMOb->getOffset();
1190 return !(MMOb->getSize().getValue() <= offDiff);
1191 } else if (MMOa->getOffset() < MMOb->getOffset()) {
1192 uint64_t offDiff = MMOb->getOffset() - MMOa->getOffset();
1193 return !(MMOa->getSize().getValue() <= offDiff);
1194 }
1195 // MMOa->getOffset() == MMOb->getOffset()
1196 return true;
1197 }
1198
1199 int64_t MinOffset = std::min(a: MMOa->getOffset(), b: MMOb->getOffset());
1200 int64_t Overlapa = MMOa->getSize().getValue() + MMOa->getOffset() - MinOffset;
1201 int64_t Overlapb = MMOb->getSize().getValue() + MMOb->getOffset() - MinOffset;
1202
1203 AliasResult AAResult =
1204 AA->alias(LocA: MemoryLocation(MMOa->getValue(), Overlapa, MMOa->getAAInfo()),
1205 LocB: MemoryLocation(MMOb->getValue(), Overlapb, MMOb->getAAInfo()));
1206
1207 return (AAResult != AliasResult::NoAlias);
1208}
1209
1210/// Gather register def/uses from MI.
1211/// This treats possible (predicated) defs
1212/// as actually happening ones (conservatively).
1213static inline void parseOperands(MachineInstr *MI,
1214 SmallVector<unsigned, 4> &Defs,
1215 SmallVector<unsigned, 8> &Uses) {
1216 Defs.clear();
1217 Uses.clear();
1218
1219 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1220 const MachineOperand &MO = MI->getOperand(i);
1221
1222 if (MO.isReg()) {
1223 unsigned Reg = MO.getReg();
1224 if (!Reg)
1225 continue;
1226 assert(Register::isPhysicalRegister(Reg));
1227 if (MO.isUse())
1228 Uses.push_back(Elt: MO.getReg());
1229 if (MO.isDef())
1230 Defs.push_back(Elt: MO.getReg());
1231 } else if (MO.isRegMask()) {
1232 for (unsigned R = 1, NR = Hexagon::NUM_TARGET_REGS; R != NR; ++R)
1233 if (MO.clobbersPhysReg(PhysReg: R))
1234 Defs.push_back(Elt: R);
1235 }
1236 }
1237}
1238
1239void HexagonGlobalSchedulerImpl::MIUseDefSet(MachineInstr *MI,
1240 std::vector<unsigned> &Defs,
1241 std::vector<unsigned> &Uses) {
1242 Defs.clear();
1243 Uses.clear();
1244 assert(!MI->isBundle() && "Cannot parse regs of a bundle.");
1245 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1246 const MachineOperand &MO = MI->getOperand(i);
1247
1248 if (MO.isReg()) {
1249 unsigned Reg = MO.getReg();
1250 if (!Reg)
1251 continue;
1252 assert(Register::isPhysicalRegister(Reg));
1253 std::vector<unsigned> &Refs = MO.isUse() ? Uses : Defs;
1254 for (MCRegAliasIterator AI(MO.getReg(), QRI, true); AI.isValid(); ++AI)
1255 Refs.push_back(x: *AI);
1256 } else if (MO.isRegMask()) {
1257 for (unsigned R = 1, NR = Hexagon::NUM_TARGET_REGS; R != NR; ++R)
1258 if (MO.clobbersPhysReg(PhysReg: R))
1259 Defs.push_back(x: R);
1260 }
1261 }
1262}
1263
1264/// Some apparent dependencies are not actually restricting us since there
1265/// is a delay between assignment and actual usage, like in case of a call.
1266/// There could be more cases here, but this one seems the most obvious.
1267static bool isDelayedUseException(MachineInstr *MIa, MachineInstr *MIb) {
1268 if (MIa->isCall() && !MIb->isCall())
1269 return true;
1270 if (!MIa->isCall() && MIb->isCall())
1271 return true;
1272 return false;
1273}
1274
1275/// This is a check for resources availability and dependency
1276/// for an MI being tried for an existing bundle.
1277/// This is needed because we can:
1278/// - save time by filtering out trivial cases
1279/// - we want to reuse infrastructure that does not really knows
1280/// how to deal with parallel semantics of a bundle that already
1281/// exists. For instance, the following case:
1282/// SI %R6<def> = L2_ploadrif_io %P0<kill>, %R7, 4;
1283/// SJ %R6<def> = A2_tfr %R0;
1284/// will be happily allowed by isLegalToPacketizeTogether since in serial
1285/// semantics it never happens, and even if it does, it is legal. Not so
1286/// for when we __speculatively__ trying and MI for a bundle.
1287///
1288/// Note: This is not equivalent to MIsAreDependent().
1289/// MIsAreDependent only understands serial semantics.
1290/// These are OK to packetize together:
1291/// %R0<def> = L2_loadri_io %R18, 76; mem:LD4[%sunkaddr226](tbaa=!"int")
1292/// %R2<def> = ASL %R0<kill>, 3; flags: Inside bundle
1293///
1294bool HexagonGlobalSchedulerImpl::canAddMIToThisPacket(
1295 MachineInstr *MI,
1296 SmallVector<MachineInstr *, HEXAGON_PACKET_SIZE> &Bundle) {
1297 if (!MI)
1298 return false;
1299 LLVM_DEBUG(dbgs() << "\n\t[canAddMIToThisPacket]: "; MI->dump());
1300
1301 // Const extenders need custom resource checking...
1302 // Should be OK if we can update the check everywhere.
1303 if ((QII->isConstExtended(MI: *MI) || QII->isExtended(MI: *MI) ||
1304 isJumpOutOfRange(MI)) &&
1305 !tryAllocateResourcesForConstExt(MI, UpdateState: false))
1306 return false;
1307
1308 // Ask DFA if machine resource is available for MI.
1309 if (!ResourceTracker->canReserveResources(MI&: *MI) || !shouldAddToPacket(MI: *MI)) {
1310 LLVM_DEBUG(dbgs() << "\tNo DFA resources.\n");
1311 return false;
1312 }
1313
1314 SmallVector<unsigned, 4> BundleDefs;
1315 SmallVector<unsigned, 8> BundleUses;
1316 SmallVector<unsigned, 4> Defs;
1317 SmallVector<unsigned, 8> Uses;
1318 MachineInstr *FirstCompound = NULL, *SecondCompound = NULL;
1319 MachineInstr *FirstDuplex = NULL, *SecondDuplex = NULL;
1320
1321 parseOperands(MI, Defs, Uses);
1322 for (SmallVector<MachineInstr *, HEXAGON_PACKET_SIZE>::iterator
1323 BI = Bundle.begin(),
1324 BE = Bundle.end();
1325 BI != BE; ++BI) {
1326 BundleDefs.clear();
1327 BundleUses.clear();
1328 parseOperands(MI: *BI, Defs&: BundleDefs, Uses&: BundleUses);
1329
1330 MachineInstr *Inst1 = *BI;
1331 MachineInstr *Inst2 = MI;
1332
1333 if (Inst1->getParent() && OneFloatPerPacket && QII->isFloat(MI: *Inst1) &&
1334 QII->isFloat(MI: *Inst2))
1335 return false;
1336
1337 if (Inst1->getParent() && OneComplexPerPacket && QII->isComplex(MI: *Inst1) &&
1338 QII->isComplex(MI: *Inst2))
1339 return false;
1340
1341 if (PreventCompoundSeparation)
1342 if (QII->getCompoundCandidateGroup(MI: **BI)) {
1343 if (!FirstCompound)
1344 FirstCompound = *BI;
1345 else {
1346 SecondCompound = *BI;
1347 if (isCompoundPair(MIa: FirstCompound, MIb: SecondCompound)) {
1348 if (MI->mayLoad() || MI->mayStore()) {
1349 LLVM_DEBUG(dbgs() << "\tPrevent compound destruction.\n");
1350 return false;
1351 }
1352 }
1353 }
1354 }
1355 if (PreventDuplexSeparation)
1356 if (QII->getDuplexCandidateGroup(MI: **BI)) {
1357 if (!FirstDuplex)
1358 FirstDuplex = *BI;
1359 else {
1360 SecondDuplex = *BI;
1361 if (QII->isDuplexPair(MIa: *FirstDuplex, MIb: *SecondDuplex)) {
1362 if (MI->mayLoad() || MI->mayStore()) {
1363 LLVM_DEBUG(dbgs() << "\tPrevent duplex destruction.\n");
1364 return false;
1365 }
1366 }
1367 }
1368 }
1369
1370 for (unsigned i = 0; i < Defs.size(); i++) {
1371 // Check for multiple definitions in the same packet.
1372 for (unsigned j = 0; j < BundleDefs.size(); j++)
1373 // Multiple defs in the same packet.
1374 // Calls are OK here.
1375 // Also if we have multiple defs of PC, this simply means we are
1376 // dealing with dual jumps.
1377 if (AliasingRegs(RegA: Defs[i], RegB: BundleDefs[j]) &&
1378 !isDelayedUseException(MIa: MI, MIb: *BI) &&
1379 !(IsDualJumpFirstCandidate(MI: *BI) && IsDualJumpSecondCandidate(MI))) {
1380 LLVM_DEBUG(dbgs() << "\tMultiple defs.\n\t"; MI->dump();
1381 dbgs() << "\t"; (*BI)->dump());
1382 return false;
1383 }
1384
1385 // See if we are creating a swap case as we go, and disallow
1386 // it for now.
1387 // Also, this is not OK:
1388 // if (!p0) r7 = r5
1389 // if (!p0) r5 = #0
1390 // But this is fine:
1391 // if (!p0) r7 = r5
1392 // if (p0) r5 = #0
1393 // Aslo - this is not a swap, but an opportunity to newify:
1394 // %P1<def> = C2_cmpeqi %R0, 0; flags:
1395 // %R0<def> = L2_ploadrif_io %P1<kill>, %R29, 8;
1396 // TODO: Handle this.
1397 for (unsigned j = 0; j < BundleUses.size(); j++)
1398 if (AliasingRegs(RegA: Defs[i], RegB: BundleUses[j])) {
1399 for (unsigned k = 0; k < BundleDefs.size(); k++)
1400 for (unsigned l = 0; l < Uses.size(); l++) {
1401 if (AliasingRegs(RegA: BundleDefs[k], RegB: Uses[l]) &&
1402 !isDelayedUseException(MIa: MI, MIb: *BI)) {
1403 LLVM_DEBUG(dbgs() << "\tSwap detected:\n\t"; MI->dump();
1404 dbgs() << "\t"; (*BI)->dump());
1405 return false;
1406 }
1407 }
1408 }
1409 }
1410
1411 for (unsigned i = 0; i < Uses.size(); i++) {
1412 // Check for true data dependency.
1413 for (unsigned j = 0; j < BundleDefs.size(); j++)
1414 if (AliasingRegs(RegA: Uses[i], RegB: BundleDefs[j]) &&
1415 !isDelayedUseException(MIa: MI, MIb: *BI)) {
1416 LLVM_DEBUG(dbgs() << "\tImmediate Use detected on reg("
1417 << printReg(Uses[i], QRI) << ")\n\t";
1418 MI->dump(); dbgs() << "\t"; (*BI)->dump());
1419 // TODO: This could be an opportunity for newifying:
1420 // %P0<def> = C2_cmpeqi %R26, 0
1421 // %R26<def> = A2_tfr %R0<kill>
1422 // if (CanPromoteToDotNew(MI, Uses[i]))
1423 // LLVM_DEBUG(dbgs() << "\tCan promoto to .new form.\n");
1424 // else
1425 return false;
1426 }
1427 }
1428
1429 // For calls we also check callee save regs.
1430 if ((*BI)->isCall()) {
1431 for (const uint16_t *I = QRI->getCalleeSavedRegs(MF: &MF); *I; ++I) {
1432 for (unsigned i = 0; i < Defs.size(); i++) {
1433 if (AliasingRegs(RegA: Defs[i], RegB: *I)) {
1434 LLVM_DEBUG(dbgs() << "\tAlias with call.\n");
1435 return false;
1436 }
1437 }
1438 }
1439 }
1440
1441 // If this is return, we are probably speculating (otherwise
1442 // we could not pull in there) and will not win from pulling
1443 // into this location anyhow.
1444 // Example: a side exit.
1445 // if (!p0) dealloc_return
1446 // TODO: Can check that we do not overwrite return value
1447 // and proceed.
1448 if ((*BI)->isBarrier()) {
1449 LLVM_DEBUG(dbgs() << "\tBarrier interference.\n");
1450 return false;
1451 }
1452
1453 // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
1454 // contain a speculative indirect jump,
1455 // a new-value compare jump or a dealloc_return.
1456 // Speculative indirect jumps (predicate + .new + indirect):
1457 // if ([!]Ps.new) jumpr:t Rs
1458 // if ([!]Ps.new) jumpr:nt Rs
1459 // @note: We don't want to pull across a call to be on the safe side.
1460 if (QII->isLoopN(MI: *MI) &&
1461 ((QII->isPredicated(MI: **BI) && QII->isPredicatedNew(MI: **BI) &&
1462 QII->isJumpR(MI: **BI)) ||
1463 QII->isNewValueJump(MI: **BI) || QII->isDeallocRet(MI: **BI) ||
1464 (*BI)->isCall())) {
1465 LLVM_DEBUG(dbgs() << "\tLoopN pull interference.\n");
1466 return false;
1467 }
1468
1469 // The opposite is also true.
1470 if (QII->isLoopN(MI: **BI) &&
1471 ((QII->isPredicated(MI: *MI) && QII->isPredicatedNew(MI: *MI) &&
1472 QII->isJumpR(MI: *MI)) ||
1473 QII->isNewValueJump(MI: *MI) || QII->isDeallocRet(MI: *MI) || MI->isCall())) {
1474 LLVM_DEBUG(dbgs() << "\tResident LoopN.\n");
1475 return false;
1476 }
1477
1478 // @todo \ref-manual 7.6.1
1479 // Presence of NVJ adds more restrictions.
1480 if (QII->isNewValueJump(MI: **BI) &&
1481 (MI->mayStore() || MI->getOpcode() == Hexagon::S2_allocframe ||
1482 MI->isCall())) {
1483 LLVM_DEBUG(dbgs() << "\tNew val Jump.\n");
1484 return false;
1485 }
1486
1487 // For memory operations, check aliasing.
1488 // First, be conservative on these objects. Might be overly constraining,
1489 // so recheck.
1490 if (isGlobalMemoryObject(MI: *BI) || isGlobalMemoryObject(MI))
1491 // Currently it catches things like this:
1492 // S2_storerinew_io %R29, 32, %R16
1493 // S2_storeri_io %R29, 68, %R0
1494 // which we can reason about.
1495 // TODO: revisit.
1496 return false;
1497
1498 // If packet has a new-value store, MI can't be a store instruction.
1499 if (QII->isNewValueStore(MI: **BI) && MI->mayStore()) {
1500 LLVM_DEBUG(dbgs() << "\tNew Value Store to store.\n");
1501 return false;
1502 }
1503
1504 if ((QII->isMemOp(MI: **BI) && MI->mayStore()) ||
1505 (QII->isMemOp(MI: *MI) && (*BI)->mayStore())) {
1506 LLVM_DEBUG(
1507 dbgs() << "\tSlot 0 not available for store because of memop.\n");
1508 return false;
1509 }
1510
1511 // If any of these is true, check aliasing.
1512 if ((MI->mayLoad() && (*BI)->mayStore()) ||
1513 (MI->mayStore() && (*BI)->mayLoad()) ||
1514 (MI->mayStore() && (*BI)->mayStore())) {
1515 if (MIsNeedChainEdge(AA, TII, MIa: MI, MIb: *BI)) {
1516 LLVM_DEBUG(dbgs() << "\tAliasing detected:\n\t"; MI->dump();
1517 dbgs() << "\t"; (*BI)->dump());
1518 return false;
1519 }
1520 }
1521 // Do not move an instruction to this packet if this packet
1522 // already contains a speculated instruction.
1523 std::map<MachineInstr *, MachineBasicBlock *>::iterator MIMoved;
1524 MIMoved = SpeculatedIns.find(x: *BI);
1525 if ((MIMoved != SpeculatedIns.end()) &&
1526 (MIMoved->second != (*BI)->getParent())) {
1527 LLVM_DEBUG(
1528 dbgs() << "This packet already contains a speculated instruction";
1529 (*BI)->dump(););
1530 return false;
1531 }
1532 }
1533
1534 // Do not pull-up vector instructions because these instructions have
1535 // multi-cycle latencies, and the pull-up pass doesn't correctly account
1536 // for instructions that stall for more than one cycle.
1537 if (QII->isHVXVec(MI: *MI))
1538 return false;
1539
1540 return true;
1541}
1542
1543/// Test is true if the two MIs cannot be safely reordered.
1544bool HexagonGlobalSchedulerImpl::ReorderDependencyTest(MachineInstr *MIa,
1545 MachineInstr *MIb) {
1546 SmallVector<unsigned, 4> DefsA;
1547 SmallVector<unsigned, 4> DefsB;
1548 SmallVector<unsigned, 8> UsesA;
1549 SmallVector<unsigned, 8> UsesB;
1550
1551 parseOperands(MI: MIa, Defs&: DefsA, Uses&: UsesA);
1552 parseOperands(MI: MIb, Defs&: DefsB, Uses&: UsesB);
1553
1554 for (SmallVector<unsigned, 4>::iterator IDA = DefsA.begin(),
1555 IDAE = DefsA.end();
1556 IDA != IDAE; ++IDA) {
1557 for (SmallVector<unsigned, 8>::iterator IUB = UsesB.begin(),
1558 IUBE = UsesB.end();
1559 IUB != IUBE; ++IUB)
1560 // True data dependency.
1561 if (AliasingRegs(RegA: *IDA, RegB: *IUB))
1562 return true;
1563
1564 for (SmallVector<unsigned, 4>::iterator IDB = DefsB.begin(),
1565 IDBE = DefsB.end();
1566 IDB != IDBE; ++IDB)
1567 // Output dependency.
1568 if (AliasingRegs(RegA: *IDA, RegB: *IDB))
1569 return true;
1570 }
1571
1572 for (SmallVector<unsigned, 4>::iterator IDB = DefsB.begin(),
1573 IDBE = DefsB.end();
1574 IDB != IDBE; ++IDB) {
1575 for (SmallVector<unsigned, 8>::iterator IUA = UsesA.begin(),
1576 IUAE = UsesA.end();
1577 IUA != IUAE; ++IUA)
1578 // True data dependency.
1579 if (AliasingRegs(RegA: *IDB, RegB: *IUA))
1580 return true;
1581 }
1582
1583 // Do not reorder two calls...
1584 if (MIa->isCall() && MIb->isCall())
1585 return true;
1586
1587 // For calls we also check callee save regs.
1588 if (MIa->isCall())
1589 for (const uint16_t *I = QRI->getCalleeSavedRegs(MF: &MF); *I; ++I) {
1590 for (unsigned i = 0; i < DefsB.size(); i++) {
1591 if (AliasingRegs(RegA: DefsB[i], RegB: *I))
1592 return true;
1593 }
1594 }
1595
1596 if (MIb->isCall())
1597 for (const uint16_t *I = QRI->getCalleeSavedRegs(MF: &MF); *I; ++I) {
1598 for (unsigned i = 0; i < DefsA.size(); i++) {
1599 if (AliasingRegs(RegA: DefsA[i], RegB: *I))
1600 return true;
1601 }
1602 }
1603
1604 // For memory operations, check aliasing.
1605 // First, be conservative on these objects.
1606 // Might be overly constraining, so recheck.
1607 if ((isGlobalMemoryObject(MI: MIa)) || (isGlobalMemoryObject(MI: MIb)))
1608 return true;
1609
1610 // If any of these is true, check aliasing.
1611 if (((MIa->mayLoad() && MIb->mayStore()) ||
1612 (MIa->mayStore() && MIb->mayLoad()) ||
1613 (MIa->mayStore() && MIb->mayStore())) &&
1614 MIsNeedChainEdge(AA, TII, MIa, MIb))
1615 return true;
1616
1617 return false;
1618}
1619
1620/// Serial semantics.
1621bool HexagonGlobalSchedulerImpl::MIsAreDependent(MachineInstr *MIa,
1622 MachineInstr *MIb) {
1623 if (MIa == MIb)
1624 return false;
1625
1626 if (ReorderDependencyTest(MIa, MIb)) {
1627 LLVM_DEBUG(dbgs() << "\t\t[MIsAreDependent]:\n\t\t"; MIa->dump();
1628 dbgs() << "\t\t"; MIb->dump());
1629 return true;
1630 }
1631 return false;
1632}
1633
1634/// Serial semantics.
1635bool HexagonGlobalSchedulerImpl::MIsHaveTrueDependency(MachineInstr *MIa,
1636 MachineInstr *MIb) {
1637 if (MIa == MIb)
1638 return false;
1639
1640 SmallVector<unsigned, 4> DefsA;
1641 SmallVector<unsigned, 4> DefsB;
1642 SmallVector<unsigned, 8> UsesA;
1643 SmallVector<unsigned, 8> UsesB;
1644
1645 parseOperands(MI: MIa, Defs&: DefsA, Uses&: UsesA);
1646 parseOperands(MI: MIb, Defs&: DefsB, Uses&: UsesB);
1647
1648 for (SmallVector<unsigned, 4>::iterator IDA = DefsA.begin(),
1649 IDAE = DefsA.end();
1650 IDA != IDAE; ++IDA) {
1651 for (SmallVector<unsigned, 8>::iterator IUB = UsesB.begin(),
1652 IUBE = UsesB.end();
1653 IUB != IUBE; ++IUB)
1654 // True data dependency.
1655 if (AliasingRegs(RegA: *IDA, RegB: *IUB))
1656 return true;
1657 }
1658 return false;
1659}
1660
1661/// Sequential semantics. Can these two MIs be reordered?
1662/// Moving MIa from "behind" to "in front" of MIb.
1663bool HexagonGlobalSchedulerImpl::canReorderMIs(MachineInstr *MIa,
1664 MachineInstr *MIb) {
1665 if (!MIa || !MIb)
1666 return false;
1667
1668 // Within bundle semantics are parallel.
1669 if (MIa->isBundle()) {
1670 MachineBasicBlock::instr_iterator MII = MIa->getIterator();
1671 MachineBasicBlock::instr_iterator MIIE = MIa->getParent()->instr_end();
1672 for (++MII; MII != MIIE && MII->isInsideBundle(); ++MII) {
1673 if (MII->isDebugInstr())
1674 continue;
1675 if (MIsAreDependent(MIa: &*MII, MIb))
1676 return false;
1677 }
1678 return true;
1679 }
1680 return !MIsAreDependent(MIa, MIb);
1681}
1682
1683static inline bool MIMustNotBePulledUp(MachineInstr *MI) {
1684 if (MI->isInlineAsm() || MI->isEHLabel() || IsSchedBarrier(MI))
1685 return true;
1686 return false;
1687}
1688
1689static inline bool MIShouldNotBePulledUp(MachineInstr *MI) {
1690 if (MI->isBranch() || MI->isReturn() || MI->isCall() || MI->isBarrier() ||
1691 MI->isTerminator() || MIMustNotBePulledUp(MI))
1692 return true;
1693 return false;
1694}
1695
1696// Only approve dual jump candidate:
1697// It is a branch, and we move it to last packet of the target location.
1698bool HexagonGlobalSchedulerImpl::MIisDualJumpCandidate(
1699 MachineInstr *MI, MachineBasicBlock::iterator &WorkPoint) {
1700 if (!PerformDualJumps || !IsDualJumpSecondCandidate(MI) ||
1701 MIMustNotBePulledUp(MI) || ignoreInstruction(MI))
1702 return false;
1703
1704 MachineBasicBlock *FromThisBB = MI->getParent();
1705 MachineBasicBlock *ToThisBB = WorkPoint->getParent();
1706
1707 LLVM_DEBUG(dbgs() << "\t\t[MIisDualJumpCandidate] To BB("
1708 << ToThisBB->getNumber() << ") From BB("
1709 << FromThisBB->getNumber() << ")\n");
1710 // If the question is about the same BB, we do not want to get
1711 // dual jump involved - it is a different case.
1712 if (FromThisBB == ToThisBB)
1713 return false;
1714
1715 // Dual jump could only be done on neigboring BBs.
1716 // The FromThisBB must only have one predecessor - the basic
1717 // block we are trying to merge.
1718 if ((*(FromThisBB->pred_begin()) != ToThisBB) ||
1719 (std::next(x: FromThisBB->pred_begin()) != FromThisBB->pred_end()))
1720 return false;
1721
1722 // If this block is a target of an indirect branch, it should
1723 // also not be included.
1724 if (FromThisBB->isEHPad() || FromThisBB->hasAddressTaken())
1725 return false;
1726
1727 // Now we must preserve original fall through paths. In fact we
1728 // might be dealing with 3way branching.
1729 MachineBasicBlock *ToTBB = NULL, *ToFBB = NULL;
1730
1731 if (ToThisBB->succ_size() == 2) {
1732 // Check the branch from target block.
1733 // If we have two successors, we must understand the branch.
1734 SmallVector<MachineOperand, 4> ToCond;
1735 if (!QII->analyzeBranch(MBB&: *ToThisBB, TBB&: ToTBB, FBB&: ToFBB, Cond&: ToCond, AllowModify: false)) {
1736 // Have the branch. Check the topology.
1737 LLVM_DEBUG(dbgs() << "\t\tToThisBB has two successors: TBB("
1738 << ToTBB->getNumber() << ") and FBB(";
1739 if (ToFBB) dbgs() << ToFBB->getNumber() << ").\n";
1740 else dbgs() << "None"
1741 << ").\n";);
1742 if (ToTBB == FromThisBB) {
1743 // If the from BB is not the fall through, we can only handle case
1744 // when second branch is unconditional jump.
1745 return false;
1746 } else if (ToFBB == FromThisBB || !ToFBB) {
1747 // If the fall through path of ToBB is our FromBB, we have more freedom
1748 // of operation.
1749 LLVM_DEBUG(dbgs() << "\t\tFall through jump target.\n");
1750 }
1751 } else {
1752 LLVM_DEBUG(dbgs() << "\t\tUnable to analyze first branch.\n");
1753 return false;
1754 }
1755 } else if (ToThisBB->succ_size() == 1) {
1756 ToFBB = *ToThisBB->succ_begin();
1757 assert(ToFBB == FromThisBB && "Bad CFG layout");
1758 } else
1759 return false;
1760
1761 // First unbundled control flow instruction in the BB.
1762 if (!MI->isBundled() && MI == &*FromThisBB->getFirstNonDebugInstr())
1763 return IsDualJumpFirstCandidate(TargetPacket&: WorkPoint);
1764
1765 return false;
1766}
1767
1768// Check whether moving MI to MJ's packet would cause a stall from a previous
1769// packet.
1770bool HexagonGlobalSchedulerImpl::canCauseStall(MachineInstr *MI,
1771 MachineInstr *MJ) {
1772 SmallVector<unsigned, 4> DefsMJI;
1773 SmallVector<unsigned, 8> UsesMJI;
1774 SmallVector<unsigned, 4> DefsMI;
1775 SmallVector<unsigned, 8> UsesMI;
1776 parseOperands(MI, Defs&: DefsMI, Uses&: UsesMI);
1777
1778 for (auto Use : UsesMI) {
1779 int UseIdx = MI->findRegisterUseOperandIdx(Reg: Use, /*TRI=*/nullptr);
1780 if (UseIdx == -1)
1781 continue;
1782 bool ShouldBreak = false;
1783 int BundleCount = 0;
1784 for (MachineBasicBlock::instr_iterator
1785 Begin = MJ->getParent()->instr_begin(),
1786 MJI = MJ->getIterator();
1787 MJI != Begin; --MJI) {
1788 if (MJI->isBundle()) {
1789 ++BundleCount;
1790 continue;
1791 }
1792 parseOperands(MI: &*MJI, Defs&: DefsMJI, Uses&: UsesMJI);
1793 for (auto Def : DefsMJI) {
1794 if (Def == Use || AliasingRegs(RegA: Def, RegB: Use)) {
1795 int DefIdx = MJI->findRegisterDefOperandIdx(Reg: Def, /*TRI=*/nullptr);
1796 if (DefIdx >= 0) {
1797 int Latency =
1798 TSchedModel.computeOperandLatency(DefMI: &*MJI, DefOperIdx: DefIdx, UseMI: MI, UseOperIdx: UseIdx);
1799 if (Latency > BundleCount)
1800 // There will be a stall if MI is moved to MJ's packet.
1801 return true;
1802 // We found the def for the use and it does not cause a stall.
1803 // Continue checking the next use for a potential stall.
1804 ShouldBreak = true;
1805 break;
1806 }
1807 }
1808 }
1809 if (ShouldBreak)
1810 break;
1811 if (!MJI->isBundled() && !MJI->isDebugInstr())
1812 ++BundleCount;
1813 }
1814 }
1815 return false;
1816}
1817
1818/// Analyze this instruction. If this is an unbundled instruction, see
1819/// if it in theory could be packetized.
1820/// If it is already part of a packet, see if it has internal
1821/// dependencies to this packet.
1822bool HexagonGlobalSchedulerImpl::canThisMIBeMoved(
1823 MachineInstr *MI, MachineBasicBlock::iterator &WorkPoint,
1824 bool &MovingDependentOp, int &Cost) {
1825 if (!MI)
1826 return false;
1827 // By default, it is a normal move.
1828 MovingDependentOp = false;
1829 Cost = 0;
1830 // If MI is a 'formed' compound not potential compound, bail out.
1831 if (QII->isCompoundBranchInstr(MI: *MI))
1832 return false;
1833 // See if we can potentially break potential compound candidates,
1834 // and do not do it.
1835 if (PreventCompoundSeparation && MI->isBundled()) {
1836 enum HexagonII::CompoundGroup MICG = QII->getCompoundCandidateGroup(MI: *MI);
1837 if (MICG != HexagonII::HCG_None) {
1838 // Check internal dependencies in the bundle.
1839 // First, find the bundle header.
1840 MachineBasicBlock::instr_iterator MII = MI->getIterator();
1841 for (--MII; MII->isBundled(); --MII)
1842 if (MII->isBundle())
1843 break;
1844
1845 MachineBasicBlock::instr_iterator BBEnd = MI->getParent()->instr_end();
1846 for (++MII; MII != BBEnd && MII->isInsideBundle() && !MII->isBundle();
1847 ++MII) {
1848 if (&(*MII) == MI)
1849 continue;
1850 if (isCompoundPair(MIa: &*MII, MIb: MI)) {
1851 LLVM_DEBUG(dbgs() << "\tPrevent Compound separation.\n");
1852 return false;
1853 }
1854 }
1855 }
1856 }
1857 // Same thing for duplex candidates.
1858 if (PreventDuplexSeparation && MI->isBundled()) {
1859 if (QII->getDuplexCandidateGroup(MI: *MI) != HexagonII::HSIG_None) {
1860 // Check internal dependencies in the bundle.
1861 // First, find the bundle header.
1862 MachineBasicBlock::instr_iterator MII = MI->getIterator();
1863 for (--MII; MII->isBundled(); --MII)
1864 if (MII->isBundle())
1865 break;
1866
1867 MachineBasicBlock::instr_iterator BBEnd = MI->getParent()->instr_end();
1868 for (++MII; MII != BBEnd && MII->isInsideBundle() && !MII->isBundle();
1869 ++MII) {
1870 if ((&(*MII) != MI) && QII->isDuplexPair(MIa: *MII, MIb: *MI)) {
1871 LLVM_DEBUG(dbgs() << "\tPrevent Duplex separation.\n");
1872 return false;
1873 }
1874 }
1875 }
1876 }
1877
1878 // If we perform dual jump formation during the pull-up,
1879 // then we want to consider several additional situations.
1880 // a) Allow moving of dependent instruction from a packet
1881 // b) Allow moving some control flow instructions if they meet
1882 // dual jump criteria.
1883 if (MIisDualJumpCandidate(MI, WorkPoint)) {
1884 LLVM_DEBUG(dbgs() << "\t\tDual jump candidate:\t"; MI->dump());
1885 // Here we are breaking our general assumption about not moving dependent
1886 // instructions. To save us two more expensive checks down the line,
1887 // propagate the information directly.
1888 MovingDependentOp = true;
1889 return true;
1890 }
1891
1892 // Any of these should not even be tried.
1893 if (MIShouldNotBePulledUp(MI) || ignoreInstruction(MI))
1894 return false;
1895 // Pulling up these instructions could put them
1896 // out of jump range/offset size.
1897 if (QII->isLoopN(MI: *MI)) {
1898 unsigned dist_looplabel =
1899 BlockToInstOffset.find(Val: MI->getOperand(i: 0).getMBB())->second;
1900 unsigned dist_newloop0 =
1901 BlockToInstOffset.find(Val: WorkPoint->getParent())->second;
1902 // Check if the jump in the last instruction is within range.
1903 unsigned Distance =
1904 (unsigned)std::abs(x: (long long)dist_looplabel - dist_newloop0) +
1905 QII->nonDbgBBSize(BB: WorkPoint->getParent()) * 4 + SafetyBuffer;
1906 const HexagonInstrInfo *HII = (const HexagonInstrInfo *)TII;
1907 if (!HII->isJumpWithinBranchRange(MI: *MI, offset: Distance)) {
1908 LLVM_DEBUG(dbgs() << "\nloopN cannot be moved since Distance: "
1909 << Distance << " outside branch range.";);
1910 return false;
1911 }
1912 LLVM_DEBUG(dbgs() << "\nloopN can be moved since Distance: " << Distance
1913 << " within branch range.";);
1914 }
1915 // If the def-set of an MI is one of the live-ins then MI should
1916 // kill that reg and no instruction before MI should use it.
1917 // For simplicity, allow only if MI is the first instruction in the MBB.
1918 std::map<MachineInstr *, std::vector<unsigned>>::const_iterator DefIter =
1919 MIDefSet.find(x: MI);
1920 MachineBasicBlock *MBB = MI->getParent();
1921 for (unsigned i = 0; DefIter != MIDefSet.end() && i < DefIter->second.size();
1922 ++i) {
1923 if (MBB->isLiveIn(Reg: DefIter->second[i]) &&
1924 &*MBB->getFirstNonDebugInstr() != MI)
1925 return false;
1926 }
1927 // If it is part of a bundle, analyze it.
1928 if (MI->isBundled()) {
1929 // Cannot move bundle header itself. This function is about
1930 // individual MI move.
1931 if (MI->isBundle())
1932 return false;
1933
1934 // Check internal dependencies in the bundle.
1935 // First, find the bundle header.
1936 MachineBasicBlock::instr_iterator MII = MI->getIterator();
1937 for (--MII; MII->isBundled(); --MII)
1938 if (MII->isBundle())
1939 break;
1940
1941 MachineBasicBlock::instr_iterator BBEnd = MI->getParent()->instr_end();
1942 for (++MII; MII != BBEnd && MII->isInsideBundle() && !MII->isBundle();
1943 ++MII) {
1944 if (MII->isDebugInstr())
1945 continue;
1946 if (MIsAreDependent(MIa: &*MII, MIb: MI)) {
1947 if (!AllowDependentPullUp) {
1948 LLVM_DEBUG(dbgs() << "\t\tDependent.\n");
1949 return false;
1950 } else {
1951 // There are a few cases that we can safely move a dependent
1952 // instruction away from this packet.
1953 // One example is an instruction setting a call operands.
1954 if ((MII->isCall() && !IsIndirectCall(MI: &*MII)) ||
1955 IsDualJumpSecondCandidate(MI: &*MII) || MI->isBranch()) {
1956 LLVM_DEBUG(dbgs() << "\t\tDependent, but allow to move.\n");
1957 MovingDependentOp = true;
1958 Cost -= 10;
1959 continue;
1960 } else {
1961 LLVM_DEBUG(dbgs() << "\t\tDependent, and do not allow for now.\n");
1962 return false;
1963 }
1964 }
1965 }
1966 }
1967 }
1968 return true;
1969}
1970
1971/// Return true if MI defines a predicate and parse all defs.
1972bool HexagonGlobalSchedulerImpl::doesMIDefinesPredicate(
1973 MachineInstr *MI, SmallVector<unsigned, 4> &Defs) {
1974 bool defsPredicate = false;
1975 Defs.clear();
1976
1977 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1978 const MachineOperand &MO = MI->getOperand(i);
1979
1980 // Regmasks are considered "implicit".
1981 if (!MO.isReg())
1982 continue;
1983 unsigned Reg = MO.getReg();
1984
1985 if (!Reg || QRI->isFakeReg(Reg))
1986 continue;
1987
1988 assert(Register::isPhysicalRegister(Reg));
1989
1990 if (MO.isDef() && !MO.isImplicit()) {
1991 const TargetRegisterClass *RC = QRI->getMinimalPhysRegClass(Reg);
1992 if (RC == &Hexagon::PredRegsRegClass) {
1993 defsPredicate = true;
1994 Defs.push_back(Elt: MO.getReg());
1995 }
1996 }
1997 }
1998 return defsPredicate;
1999}
2000
2001/// We have just tentatively added a predicated MI to an existing packet.
2002/// Now we need to determine if it needs to be changed to .new form.
2003/// It only handles compare/predicate right now.
2004/// TODO - clean this logic up.
2005/// TODO - generalize to handle any .new
2006bool HexagonGlobalSchedulerImpl::NeedToNewify(
2007 MachineBasicBlock::instr_iterator NewMI, unsigned *DepReg,
2008 MachineInstr *TargetPacket = NULL) {
2009 MachineBasicBlock::instr_iterator MII = NewMI;
2010 SmallVector<unsigned, 4> DefsA;
2011 SmallVector<unsigned, 4> DefsB;
2012 SmallVector<unsigned, 8> UsesB;
2013
2014 // If this is not a normal bundle, we are probably
2015 // trying to size two lonesome instructions together,
2016 // and trying to say if one of them will need to be
2017 // newified. In this is the case we have something like this:
2018 // BB#5:
2019 // %P0<def> = CMPGEri %R4, 2
2020 // S2_pstorerif_io %P0<kill>, %R29, 16, %R21<kill>
2021 // BUNDLE %R7<imp-def>, %R4<imp-def>, %R7<imp-use>
2022 parseOperands(MI: &*NewMI, Defs&: DefsB, Uses&: UsesB);
2023 if (TargetPacket && !TargetPacket->isBundled()) {
2024 if (doesMIDefinesPredicate(MI: TargetPacket, Defs&: DefsA)) {
2025 for (SmallVector<unsigned, 4>::iterator IA = DefsA.begin(),
2026 IAE = DefsA.end();
2027 IA != IAE; ++IA)
2028 for (SmallVector<unsigned, 8>::iterator IB = UsesB.begin(),
2029 IBE = UsesB.end();
2030 IB != IBE; ++IB)
2031 if (*IA == *IB) {
2032 *DepReg = *IA;
2033 return true;
2034 }
2035 }
2036 return false;
2037 }
2038
2039 // Find bundle header.
2040 for (--MII; MII->isBundled(); --MII)
2041 if (MII->isBundle())
2042 break;
2043
2044 // Iterate down, if there is data dependent cmp found, need to .newify.
2045 // Also, we can have the following:
2046 // {
2047 // p0 = r7
2048 // if (!p0.new) jump:t .LBB4_18
2049 // if (p0.new) r8 = zxth(r12)
2050 // }
2051 MachineBasicBlock::instr_iterator BBEnd = MII->getParent()->instr_end();
2052 for (++MII; MII != BBEnd && MII->isBundled() && !MII->isBundle(); ++MII) {
2053 if (MII == NewMI)
2054 continue;
2055 if (doesMIDefinesPredicate(MI: &*MII, Defs&: DefsA)) {
2056 for (SmallVector<unsigned, 4>::iterator IA = DefsA.begin(),
2057 IAE = DefsA.end();
2058 IA != IAE; ++IA)
2059 for (SmallVector<unsigned, 8>::iterator IB = UsesB.begin(),
2060 IBE = UsesB.end();
2061 IB != IBE; ++IB)
2062 // We do not have multiple predicate regs defined in any instruction,
2063 // if we ever will, this needs to be generalized.
2064 if (*IA == *IB) {
2065 *DepReg = *IA;
2066 return true;
2067 }
2068 DefsA.clear();
2069 }
2070 }
2071 LLVM_DEBUG(dbgs() << "\nNo need to newify:"; NewMI->dump());
2072 return false;
2073}
2074
2075/// We know this instruction needs to be newified to be added to the packet,
2076/// but not all combinations are legal.
2077/// It is a complimentary check to NeedToNewify().
2078/// The packet actually contains the new instruction during the check.
2079bool HexagonGlobalSchedulerImpl::CanNewifiedBeUsedInBundle(
2080 MachineBasicBlock::instr_iterator NewMI, unsigned DepReg,
2081 MachineInstr *TargetPacket) {
2082 MachineBasicBlock::instr_iterator MII = NewMI;
2083 if (!TargetPacket || !TargetPacket->isBundled())
2084 return true;
2085
2086 // Find the bundle header.
2087 for (--MII; MII->isBundled(); --MII)
2088 if (MII->isBundle())
2089 break;
2090
2091 MachineBasicBlock::instr_iterator BBEnd = MII->getParent()->instr_end();
2092 for (++MII; MII != BBEnd && MII->isBundled() && !MII->isBundle(); ++MII) {
2093 // Effectively we look for the case of late predicates.
2094 // No additional checks at the time.
2095 if (MII == NewMI || !QII->isPredicateLate(Opcode: MII->getOpcode()))
2096 continue;
2097 SmallVector<unsigned, 4> DefsA;
2098 if (!doesMIDefinesPredicate(MI: &*MII, Defs&: DefsA))
2099 continue;
2100 for (auto &IA : DefsA)
2101 if (IA == DepReg)
2102 return false;
2103 }
2104 return true;
2105}
2106
2107/// setUsed - Set the register and its sub-registers as being used.
2108/// Similar to RegScavenger::setUsed().
2109void HexagonGlobalSchedulerImpl::setUsedRegs(BitVector &Set, unsigned Reg) {
2110 Set.reset(Idx: Reg);
2111 for (MCSubRegIterator SubRegs(Reg, QRI); SubRegs.isValid(); ++SubRegs)
2112 Set.reset(Idx: *SubRegs);
2113}
2114
2115/// Are these two registers overlaping?
2116bool HexagonGlobalSchedulerImpl::AliasingRegs(unsigned RegA, unsigned RegB) {
2117 if (RegA == RegB)
2118 return true;
2119
2120 for (MCSubRegIterator SubRegs(RegA, QRI); SubRegs.isValid(); ++SubRegs)
2121 if (RegB == *SubRegs)
2122 return true;
2123
2124 for (MCSubRegIterator SubRegs(RegB, QRI); SubRegs.isValid(); ++SubRegs)
2125 if (RegA == *SubRegs)
2126 return true;
2127
2128 return false;
2129}
2130
2131/// Find use with this reg, and unmark the kill flag.
2132static inline void unmarkKillReg(MachineInstr *MI, unsigned Reg) {
2133 if (MI->isDebugInstr())
2134 return;
2135
2136 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2137 MachineOperand &MO = MI->getOperand(i);
2138
2139 if (!MO.isReg())
2140 continue;
2141
2142 if (MO.isKill() && (MO.getReg() == Reg))
2143 MO.setIsKill(false);
2144 }
2145}
2146
2147/// Find use with this reg, and unmark the kill flag.
2148static inline void markKillReg(MachineInstr *MI, unsigned Reg) {
2149 if (MI->isDebugInstr())
2150 return;
2151
2152 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2153 MachineOperand &MO = MI->getOperand(i);
2154
2155 if (!MO.isReg())
2156 continue;
2157
2158 if (MO.isUse() && (MO.getReg() == Reg))
2159 MO.setIsKill(true);
2160 }
2161}
2162
2163/// We have just moved an instruction that could have changed kill patterns
2164/// along the path it was moved. We need to update it.
2165void HexagonGlobalSchedulerImpl::updateKillAlongThePath(
2166 MachineBasicBlock *HomeBB, MachineBasicBlock *OriginBB,
2167 MachineBasicBlock::instr_iterator &Head,
2168 MachineBasicBlock::instr_iterator &Tail,
2169 MachineBasicBlock::iterator &SourcePacket,
2170 MachineBasicBlock::iterator &TargetPacket,
2171 std::vector<MachineInstr *> &backtrack) {
2172 // This is the instruction being moved.
2173 MachineInstr *MI = &*Head;
2174 MachineBasicBlock *CurrentBB = OriginBB;
2175 SmallSet<unsigned, 8> KilledUseSet;
2176
2177 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2178 const MachineOperand &MO = MI->getOperand(i);
2179 if (!MO.isReg())
2180 continue;
2181 unsigned Reg = MO.getReg();
2182 if (!Reg)
2183 continue;
2184
2185 if (MO.isKill())
2186 KilledUseSet.insert(V: Reg);
2187 }
2188
2189 // If there are no kills here, we are done.
2190 if (KilledUseSet.empty())
2191 return;
2192
2193 LLVM_DEBUG(dbgs() << "\n[updateKillAlongThePath]\n");
2194 LLVM_DEBUG(dbgs() << "\t\tInstrToMove :\t"; MI->dump());
2195 LLVM_DEBUG(dbgs() << "\t\tSourceLocation:\n";
2196 DumpPacket(SourcePacket.getInstrIterator()));
2197 LLVM_DEBUG(dbgs() << "\t\tTargetPacket :\n";
2198 DumpPacket(TargetPacket.getInstrIterator()));
2199 LLVM_DEBUG(dbgs() << "\tUpdate Kills. Need to update (" << KilledUseSet.size()
2200 << ")kills. From BB (" << OriginBB->getNumber() << ")\n");
2201 LLVM_DEBUG(dbgs() << "\tMove path:\n");
2202 assert(!backtrack.empty() && "Empty back track");
2203
2204 // We have pulled up an instruction, with one of its uses marked as kill.
2205 // If there is any other use of the same register along the move path,
2206 // and there are no side exits with killed register live-in along them,
2207 // we need to mark last use of that reg as kill.
2208 for (signed i = backtrack.size() - 1; i >= 0; --i) {
2209 LLVM_DEBUG(dbgs() << "\t\t[" << i << "]BB("
2210 << backtrack[i]->getParent()->getNumber() << ")\t";
2211 backtrack[i]->dump());
2212 if (CurrentBB != backtrack[i]->getParent()) {
2213 LLVM_DEBUG(dbgs() << "\t\tChange BB from (" << CurrentBB->getNumber()
2214 << ") to(" << backtrack[i]->getParent()->getNumber()
2215 << ")\n");
2216 for (MachineBasicBlock::const_succ_iterator
2217 SI = backtrack[i]->getParent()->succ_begin(),
2218 SE = backtrack[i]->getParent()->succ_end();
2219 SI != SE; ++SI) {
2220 if (*SI == CurrentBB)
2221 continue;
2222
2223 LLVM_DEBUG(dbgs() << "\t\tSide Exit:\n\t"; (*SI)->dump());
2224 // If any reg kill is live along this side exit, it is not
2225 // a kill any more.
2226 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
2227 E = (*SI)->livein_end();
2228 I != E; ++I) {
2229 if (KilledUseSet.count(V: (*I).PhysReg)) {
2230 LLVM_DEBUG(dbgs() << "\t\tReg (" << printReg((*I).PhysReg, QRI)
2231 << ") is LiveIn along side exit.\n");
2232 KilledUseSet.erase(V: (*I).PhysReg);
2233 unmarkKillReg(MI, Reg: (*I).PhysReg);
2234 }
2235 if (KilledUseSet.empty())
2236 return;
2237 }
2238 }
2239 CurrentBB = backtrack[i]->getParent();
2240 }
2241
2242 // Done with the whole path.
2243 if (backtrack[i] == &*TargetPacket)
2244 return;
2245
2246 // Starting the tracking. Do not update source bundle.
2247 // If TargetPacket == SourcePacket we have returned
2248 // in the previous check.
2249 if (backtrack[i] == &*SourcePacket)
2250 continue;
2251
2252 // Ignore DBG_VALUE.
2253 if (backtrack[i]->isDebugInstr())
2254 continue;
2255
2256 // Encountered an intermediary bundle. Process it.
2257 // Beware, sometimes check for backtrack[i] == TargetPacket
2258 // does not work, so this instruction could be one from the target bundle.
2259 SmallVector<unsigned, 4> Defs;
2260 SmallVector<unsigned, 8> Uses;
2261 MachineInstr *MIU = backtrack[i];
2262 parseOperands(MI: MIU, Defs, Uses);
2263
2264 for (SmallVector<unsigned, 8>::iterator IA = Uses.begin(), IAE = Uses.end();
2265 IA != IAE; ++IA) {
2266 if (KilledUseSet.count(V: *IA)) {
2267 // Now this is new kill point for this Reg.
2268 // Update the bundle, and any local uses.
2269 markKillReg(MI: MIU, Reg: *IA);
2270
2271 // Unmark the current MI.
2272 unmarkKillReg(MI, Reg: *IA);
2273
2274 if (MIU->isBundle()) {
2275 // TODO: Can do this cleaner and faster.
2276 MachineBasicBlock::instr_iterator MII = MIU->getIterator();
2277 MachineBasicBlock::instr_iterator End = CurrentBB->instr_end();
2278 for (++MII; MII != End && MII->isInsideBundle(); ++MII)
2279 markKillReg(MI: &*MII, Reg: *IA);
2280 }
2281
2282 // We have updated this kill reg, if there are more, keep on going.
2283 KilledUseSet.erase(V: *IA);
2284
2285 // If the set is exhausted, just leave.
2286 if (KilledUseSet.empty())
2287 return;
2288 }
2289 }
2290 }
2291}
2292
2293/// This is houskeeping for bundle with instruction just added to it.
2294void HexagonGlobalSchedulerImpl::addInstructionToExistingBundle(
2295 MachineBasicBlock *HomeBB, MachineBasicBlock::instr_iterator &Head,
2296 MachineBasicBlock::instr_iterator &Tail,
2297 MachineBasicBlock::instr_iterator &NewMI,
2298 MachineBasicBlock::iterator &TargetPacket,
2299 MachineBasicBlock::iterator &NextMI,
2300 std::vector<MachineInstr *> &backtrack) {
2301 Tail = getBundleEnd(I: Head);
2302 LLVM_DEBUG(dbgs() << "\t\t\t[Add] Head home: "; DumpPacket(Head));
2303
2304 // Old header to be deleted shortly.
2305 MachineBasicBlock::instr_iterator Outcast = Head;
2306 // Unbundle old header.
2307 if (Outcast->isBundle() && Outcast->isBundledWithSucc())
2308 Outcast->unbundleFromSucc();
2309
2310 bool memShufDisabled = QII->getBundleNoShuf(MIB: *Outcast);
2311
2312 // Create new bundle header and update MI flags.
2313 finalizeBundle(MBB&: *HomeBB, FirstMI: ++Head, LastMI: Tail);
2314 MachineBasicBlock::instr_iterator BundleMII = std::prev(x: Head);
2315 if (memShufDisabled)
2316 QII->setBundleNoShuf(BundleMII);
2317 --Head;
2318
2319 LLVM_DEBUG(dbgs() << "\t\t\t[Add] New Head : "; DumpPacket(Head));
2320
2321 // The old header could be listed in the back tracking,
2322 // so if it is, we need to update it.
2323 for (unsigned i = 0; i < backtrack.size(); ++i)
2324 if (backtrack[i] == &*Outcast)
2325 backtrack[i] = &*Head;
2326
2327 // Same for top MI iterator.
2328 if (NextMI == Outcast)
2329 NextMI = Head;
2330
2331 TargetPacket = Head;
2332 HomeBB->erase(I: Outcast);
2333}
2334
2335/// This handles houskeeping for bundle with instruction just deleted from it.
2336/// We do not see the original moved instruction in here.
2337void HexagonGlobalSchedulerImpl::removeInstructionFromExistingBundle(
2338 MachineBasicBlock *HomeBB, MachineBasicBlock::instr_iterator &Head,
2339 MachineBasicBlock::instr_iterator &Tail,
2340 MachineBasicBlock::iterator &SourceLocation,
2341 MachineBasicBlock::iterator &NextMI, bool MovingDependentOp,
2342 std::vector<MachineInstr *> &backtrack) {
2343 // Empty BBs will be deleted shortly.
2344 if (HomeBB->empty()) {
2345 Head = MachineBasicBlock::instr_iterator();
2346 Tail = MachineBasicBlock::instr_iterator();
2347 return;
2348 }
2349
2350 if (!SourceLocation->isBundle()) {
2351 LLVM_DEBUG(dbgs() << "\t\t\tOriginal instruction was not bundled.\n\t\t\t";
2352 SourceLocation->dump());
2353 // If original instruction was not bundled, and we have moved it
2354 // and it is in the back track, we probably want to remove it from there.
2355 LLVM_DEBUG(dbgs() << "\t\t\t[Rem] New head: "; backtrack.back()->dump());
2356
2357 for (unsigned i = 0; i < backtrack.size(); ++i) {
2358 if (backtrack[i] == &*SourceLocation) {
2359 // By definition, this should be the last instruction in the backtrack.
2360 assert((backtrack[i] == backtrack.back()) && "Lost back track");
2361 backtrack.pop_back();
2362 }
2363 // Point the main iterator to the next instruction.
2364 if (NextMI == SourceLocation)
2365 NextMI++;
2366 }
2367 SourceLocation = MachineBasicBlock::iterator();
2368 Head = MachineBasicBlock::instr_iterator();
2369 Tail = MachineBasicBlock::instr_iterator();
2370 return;
2371 }
2372
2373 // The old header, soon to be deleted.
2374 MachineBasicBlock::instr_iterator Outcast = SourceLocation.getInstrIterator();
2375 LLVM_DEBUG(dbgs() << "\t\t\t[Rem] SourceLocation after bundle update: ";
2376 DumpPacket(Outcast));
2377
2378 // If bundle has been already destroyed. BB->splat seems to do it some times
2379 // but not the other.
2380 // We already know that SourceLocation is bundle header.
2381 if (!SourceLocation->isBundledWithSucc()) {
2382 assert(!Head->isBundledWithSucc() && !Head->isBundledWithPred() &&
2383 "Bad bundle");
2384 } else {
2385 Head = SourceLocation.getInstrIterator();
2386 Tail = getBundleEnd(I: Head);
2387 unsigned Size = 0;
2388 unsigned BBSizeWithDbg = 0;
2389 MachineBasicBlock::const_instr_iterator I(Head);
2390 MachineBasicBlock::const_instr_iterator E = Head->getParent()->instr_end();
2391
2392 for (++I; I != E && I->isBundledWithPred(); ++I) {
2393 ++BBSizeWithDbg;
2394 if (!I->isDebugInstr())
2395 ++Size;
2396 }
2397
2398 LLVM_DEBUG(dbgs() << "\t\t\t[Rem] Size(" << Size << ") Head orig: ";
2399 DumpPacket(Head));
2400 // The old header, soon to be deleted.
2401 Outcast = Head;
2402
2403 // The old Header is still counted here.
2404 if (Size > 1) {
2405 if (Outcast->isBundle() && Outcast->isBundledWithSucc())
2406 Outcast->unbundleFromSucc();
2407
2408 bool memShufDisabled = QII->getBundleNoShuf(MIB: *Outcast);
2409 // The finalizeBundle() assumes that "original" sequence
2410 // it is finalizing is sequentially correct. That basically
2411 // means that swap case might not be handled properly.
2412 // I find insert point for the pull-up instruction myself,
2413 // and I should try to catch that swap case there, and refuse
2414 // to insert if I cannot guarantee correct serial semantics.
2415 // In the future, I need my own incremental "inserToBundle"
2416 // function.
2417 finalizeBundle(MBB&: *HomeBB, FirstMI: ++Head, LastMI: Tail);
2418 MachineBasicBlock::instr_iterator BundleMII = std::prev(x: Head);
2419 if (memShufDisabled)
2420 QII->setBundleNoShuf(BundleMII);
2421
2422 --Head;
2423 } else if (Size == 1) {
2424 // There is only one non-debug instruction in the bundle.
2425 if (BBSizeWithDbg > 1) {
2426 // There are some debug instructions that should be unbundled too.
2427 MachineBasicBlock::instr_iterator I(Head);
2428 MachineBasicBlock::instr_iterator E = Head->getParent()->instr_end();
2429 for (++I; I != E && I->isBundledWithPred(); ++I) {
2430 I->unbundleFromPred();
2431 // Set Head to the non-debug instruction.
2432 if (!I->isDebugInstr())
2433 Head = I;
2434 }
2435 } else {
2436 // This means that only one original instruction is
2437 // left in the bundle. We need to "unbundle" it because the
2438 // rest of API will not like it.
2439 ++Head;
2440 if (Head->isBundledWithPred())
2441 Head->unbundleFromPred();
2442 if (Head->isBundledWithSucc())
2443 Head->unbundleFromSucc();
2444 }
2445 } else
2446 llvm_unreachable("Corrupt bundle");
2447 }
2448
2449 LLVM_DEBUG(dbgs() << "\t\t\t[Rem] New Head : "; DumpPacket(Head));
2450 SourceLocation = Head;
2451
2452 // The old header could be listed in the back tracking,
2453 // so if it is, we need to update it.
2454 for (unsigned i = 0; i < backtrack.size(); ++i)
2455 if (backtrack[i] == &*Outcast)
2456 backtrack[i] = &*Head;
2457
2458 // Same for top MI iterator.
2459 if (NextMI == Outcast)
2460 NextMI = Head;
2461
2462 HomeBB->erase(I: Outcast);
2463}
2464
2465#ifndef NDEBUG
2466static void debugLivenessForBB(const MachineBasicBlock *MBB,
2467 const TargetRegisterInfo *TRI) {
2468 LLVM_DEBUG(dbgs() << "\tLiveness for BB:\n"; MBB->dump());
2469 for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
2470 SE = MBB->succ_end();
2471 SI != SE; ++SI) {
2472 LLVM_DEBUG(dbgs() << "\tSuccessor BB (" << (*SI)->getNumber() << "):");
2473 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
2474 E = (*SI)->livein_end();
2475 I != E; ++I)
2476 LLVM_DEBUG(dbgs() << "\t" << printReg((*I).PhysReg, TRI));
2477 LLVM_DEBUG(dbgs() << "\n");
2478 }
2479}
2480#endif
2481
2482// Blocks should be considered empty if they contain only debug info;
2483// else the debug info would affect codegen.
2484static bool IsEmptyBlock(MachineBasicBlock *MBB) {
2485 if (MBB->empty())
2486 return true;
2487 for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
2488 MBBI != MBBE; ++MBBI) {
2489 if (!MBBI->isDebugInstr())
2490 return false;
2491 }
2492 return true;
2493}
2494
2495/// Treat given instruction as a branch, go through its operands
2496/// and see if any of them is a BB address. If so, return it.
2497/// Return NULL otherwise.
2498static inline MachineBasicBlock *getBranchDestination(MachineInstr *MI) {
2499 if (!MI || !MI->isBranch() || MI->isBundle())
2500 return NULL;
2501
2502 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2503 const MachineOperand &MO = MI->getOperand(i);
2504 if (MO.isMBB())
2505 return MO.getMBB();
2506 }
2507 return NULL;
2508}
2509
2510/// Similar to HexagonInstrInfo::analyzeBranch but handles
2511/// serveral more general cases including parsing empty BBs when possible.
2512bool HexagonGlobalSchedulerImpl::AnalyzeBBBranches(MachineBasicBlock *MBB,
2513 MachineBasicBlock *&TBB,
2514 MachineInstr *&FirstTerm,
2515 MachineBasicBlock *&FBB,
2516 MachineInstr *&SecondTerm) {
2517 // Hexagon allowes up to two jumps in MBB.
2518 FirstTerm = NULL;
2519 SecondTerm = NULL;
2520
2521 LLVM_DEBUG(dbgs() << "\n\t\tAnalyze Branches in BB(" << MBB->getNumber()
2522 << ")\n");
2523 if (MBB->succ_size() == 0) {
2524 LLVM_DEBUG(dbgs() << "\n\t\tBlock has no successors.\n");
2525 return true;
2526 }
2527 // Find both jumps.
2528 // We largely rely on implied assumption that BB branching always
2529 // looks like this:
2530 // J2_jumpf %P0, <BB#60>, %PC<imp-def>;
2531 // J2_jump <BB#49>
2532 // Branches also could be in different packets.
2533 MachineBasicBlock::instr_iterator MIB = MBB->instr_begin();
2534 MachineBasicBlock::instr_iterator MIE = MBB->instr_end();
2535 MachineBasicBlock::instr_iterator MII = MIB;
2536
2537 if (QII->nonDbgBBSize(BB: MBB) == 1) {
2538 MII = MBB->getFirstNonDebugInstr().getInstrIterator();
2539 if (MII->isBranch())
2540 FirstTerm = &*MII;
2541 } else {
2542 // We have already eliminated the case when MIB == MIE.
2543 while (MII != MIE) {
2544 if (!MII->isBundle() && MII->isBranch()) {
2545 if (!FirstTerm)
2546 FirstTerm = &*MII;
2547 else
2548 SecondTerm = &*MII;
2549 }
2550 ++MII;
2551 }
2552 }
2553 if ((FirstTerm && FirstTerm->isIndirectBranch()) ||
2554 (SecondTerm && SecondTerm->isIndirectBranch())) {
2555 LLVM_DEBUG(dbgs() << "\n\t\tCannot analyze BB with indirect branch.");
2556 return true;
2557 }
2558 if ((FirstTerm && FirstTerm->getOpcode() == Hexagon::J2_jump &&
2559 !FirstTerm->getOperand(i: 0).isMBB()) ||
2560 (SecondTerm && SecondTerm->getOpcode() == Hexagon::J2_jump &&
2561 !SecondTerm->getOperand(i: 0).isMBB())) {
2562 LLVM_DEBUG(
2563 dbgs() << "\n\t\tCannot analyze BB with a branch out of function.");
2564 return true;
2565 }
2566
2567 // Now try to analyze this branch.
2568 SmallVector<MachineOperand, 4> Cond;
2569 if (QII->analyzeBranch(MBB&: *MBB, TBB, FBB, Cond, AllowModify: false)) {
2570 LLVM_DEBUG(dbgs() << "\t\tFail to analyze with analyzeBranch.\n");
2571 LLVM_DEBUG(dbgs() << "\t\tFirst term: "; if (FirstTerm) FirstTerm->dump();
2572 else dbgs() << "None\n";);
2573 // Could not analyze it. See if this is something we can recognize.
2574 TBB = getBranchDestination(MI: FirstTerm);
2575 }
2576 // There are several cases not handled by HexagonInstrInfo::analyzeBranch.
2577 if (!TBB) {
2578 LLVM_DEBUG(dbgs() << "\t\tMissing TBB.\n");
2579 // There is a branch, but TBB is not found.
2580 // The BB could also be empty at this point. See if it is a trivial
2581 // layout case.
2582 if (MBB->succ_size() == 1) {
2583 TBB = *MBB->succ_begin();
2584 LLVM_DEBUG(dbgs() << "\t\tFall through TBB(" << TBB->getNumber()
2585 << ").\n");
2586 return false;
2587 } else if (MBB->succ_size() == 2) {
2588 // This should cover majority of remaining cases.
2589 if (FirstTerm && SecondTerm &&
2590 (QII->isPredicated(MI: *FirstTerm) || QII->isNewValueJump(MI: *FirstTerm)) &&
2591 !QII->isPredicated(MI: *SecondTerm)) {
2592 TBB = getBranchDestination(MI: FirstTerm);
2593 FBB = getBranchDestination(MI: SecondTerm);
2594 LLVM_DEBUG(dbgs() << "\t\tCanonical dual jump layout: TBB("
2595 << TBB->getNumber() << ") FBB(" << FBB->getNumber()
2596 << ").\n");
2597 return false;
2598 } else if (SecondTerm && SecondTerm->getOpcode() == Hexagon::J2_jump &&
2599 SecondTerm->getOperand(i: 0).isMBB()) {
2600 // Look at the second term if I know it, to find out what is the fall
2601 // through for this BB.
2602 FBB = SecondTerm->getOperand(i: 0).getMBB();
2603 assert(MBB->succ_size() == 2 && "Expected exactly 2 successors");
2604 MachineBasicBlock *Succ0 = *MBB->succ_begin();
2605 MachineBasicBlock *Succ1 = *std::next(x: MBB->succ_begin());
2606 if (FBB == Succ0)
2607 TBB = Succ1;
2608 else
2609 TBB = Succ0;
2610 LLVM_DEBUG(dbgs() << "\t\tSecond br is J2_jump TBB(" << TBB->getNumber()
2611 << ") FBB(" << FBB->getNumber() << ").\n");
2612 return false;
2613 } else {
2614 // This might be an empty BB but still with two
2615 // successors set. Try to use CFG layout to sort it out.
2616 // This could happen when last jump was pulled up from a BB, and
2617 // CFG is being updated. At that point this method is called and
2618 // returns best guess possible for TBB/FBB. Fortunately order of those
2619 // is irrelevant, and rather used a worklist for CFG update.
2620 MachineFunction::iterator MBBIter = MBB->getIterator();
2621 MachineFunction &MF = *MBB->getParent();
2622 (void)MF; // supress compiler warning
2623 // If there are no other clues, assume next sequential BB
2624 // in CFG as FBB.
2625 ++MBBIter;
2626 assert(MBBIter != MF.end() && "I give up.");
2627 FBB = &(*MBBIter);
2628 assert(MBB->succ_size() == 2 && "Expected exactly 2 successors");
2629 MachineBasicBlock *S0 = *MBB->succ_begin();
2630 MachineBasicBlock *S1 = *std::next(x: MBB->succ_begin());
2631 if (FBB == S0)
2632 TBB = S1;
2633 else if (FBB == S1) {
2634 TBB = S0;
2635 } else {
2636 // This case can arise when the layout successor basic block (++IMBB)
2637 // got empty during pull-up.
2638 // As a result, ++IMBB is not one of MBB's successors.
2639 MBBIter = MF.begin();
2640 while (!MBB->isSuccessor(MBB: &*MBBIter) && (MBBIter != MF.end()))
2641 ++MBBIter;
2642 assert(MBBIter != MF.end() && "Malformed BB with invalid successors");
2643 FBB = &*MBBIter;
2644 if (FBB == S0)
2645 TBB = S1;
2646 else
2647 TBB = S0;
2648 }
2649 LLVM_DEBUG(dbgs() << "\t\tUse layout TBB(" << TBB->getNumber()
2650 << ") FBB(" << FBB->getNumber() << ").\n");
2651 return false;
2652 }
2653 }
2654 assert(!FirstTerm && "Bad BB");
2655 return true;
2656 }
2657 // Ok, we have TBB, but maybe missing FBB.
2658 if (!FBB && SecondTerm) {
2659 LLVM_DEBUG(dbgs() << "\t\tMissing FBB.\n");
2660 // analyzeBranch could lie to us, ignore it in this case.
2661 // For the canonical case simply take known branch targets.
2662 if ((QII->isPredicated(MI: *FirstTerm) || QII->isNewValueJump(MI: *FirstTerm)) &&
2663 !QII->isPredicated(MI: *SecondTerm)) {
2664 FBB = getBranchDestination(MI: SecondTerm);
2665 } else {
2666 // Second term is also predicated.
2667 // Use CFG layout. Assign layout successor as FBB.
2668 for (MachineBasicBlock *Succ : MBB->successors()) {
2669 if (MBB->isLayoutSuccessor(MBB: Succ))
2670 FBB = Succ;
2671 }
2672 if (FBB == NULL) {
2673 LLVM_DEBUG(dbgs() << "\nNo layout successor found.");
2674 LLVM_DEBUG(dbgs() << "Possibly the layout successor is an empty BB");
2675 return true;
2676 }
2677 if (TBB == FBB)
2678 LLVM_DEBUG(dbgs() << "Malformed branch with useless branch condition";);
2679 }
2680 LLVM_DEBUG(dbgs() << "\t\tSecond term: "; SecondTerm->dump());
2681 } else if (TBB && !FBB) {
2682 // If BB ends in endloop, and it is a single BB hw loop,
2683 // we will have a single terminator, but we can figure FBB
2684 // easily from CFG.
2685 if (MBB->succ_size() == 2) {
2686 MachineBasicBlock *S0 = *MBB->succ_begin();
2687 MachineBasicBlock *S1 = *std::next(x: MBB->succ_begin());
2688 if (TBB == S0)
2689 FBB = S1;
2690 else
2691 FBB = S0;
2692 }
2693 }
2694
2695 LLVM_DEBUG(dbgs() << "\t\tFinal TBB(" << TBB->getNumber() << ").\n";
2696 if (FBB) dbgs() << "\t\tFinal FBB(" << FBB->getNumber() << ").\n";
2697 else dbgs() << "\t\tFinal FBB(None)\n";);
2698 return false;
2699}
2700
2701/// updateBranches - Updates all branches to \p From in the basic block \p
2702/// InBlock to branches to \p To.
2703static void updateBranches(MachineBasicBlock &InBlock, MachineBasicBlock *From,
2704 MachineBasicBlock *To) {
2705 for (MachineBasicBlock::instr_iterator BI = InBlock.instr_begin(),
2706 E = InBlock.instr_end();
2707 BI != E; ++BI) {
2708 MachineInstr *Inst = &*BI;
2709 // Ignore anything that is not a branch.
2710 if (!Inst->isBranch())
2711 continue;
2712 for (MachineInstr::mop_iterator OI = Inst->operands_begin(),
2713 OE = Inst->operands_end();
2714 OI != OE; ++OI) {
2715 MachineOperand &Opd = *OI;
2716 // Look for basic block "From".
2717 if (!Opd.isMBB() || Opd.getMBB() != From)
2718 continue;
2719 // Update it.
2720 Opd.setMBB(To);
2721 }
2722 }
2723}
2724
2725/// Rewrite all predecessors of the old block to go to the fallthrough
2726/// instead.
2727/// NB: Collect predecessors into a snapshot vector before iterating to
2728/// avoid iterator invalidation on MBB's predecessor list. Each call to
2729/// ReplaceUsesOfBlockWith modifies both the successor list of Pred and
2730/// the predecessor list of MBB, which invalidates debug-mode iterators
2731/// (detected by _GLIBCXX_DEBUG).
2732static void updatePredecessors(MachineBasicBlock &MBB,
2733 MachineBasicBlock *MFBB) {
2734 MachineFunction &MF = *MBB.getParent();
2735
2736 if (MFBB->getIterator() == MF.end())
2737 return;
2738
2739 // Snapshot the predecessor list to avoid iterator invalidation.
2740 SmallVector<MachineBasicBlock *, 4> Preds(MBB.pred_begin(), MBB.pred_end());
2741 for (MachineBasicBlock *Pred : Preds) {
2742 if (!Pred->isSuccessor(MBB: &MBB))
2743 continue;
2744 Pred->ReplaceUsesOfBlockWith(Old: &MBB, New: MFBB);
2745 updateBranches(InBlock&: *Pred, From: &MBB, To: MFBB);
2746 }
2747}
2748
2749static void UpdateCFG(MachineBasicBlock *HomeBB, MachineBasicBlock *OriginBB,
2750 MachineInstr *MII, MachineBasicBlock *HomeTBB,
2751 MachineBasicBlock *HomeFBB, MachineInstr *FTA,
2752 MachineInstr *STA,
2753 const MachineBranchProbabilityInfo *MBPI) {
2754 MachineBasicBlock *S2Add = NULL, *S2Remove = NULL;
2755 bool RemoveLSIfPresent = false;
2756 if ((&*MII == FTA) && MII->isConditionalBranch()) {
2757 LLVM_DEBUG(dbgs() << "\nNew firstterm conditional jump added to HomeBB";);
2758 S2Add = HomeTBB;
2759 S2Remove = HomeTBB;
2760 } else if ((&*MII == STA) && MII->isConditionalBranch()) {
2761 LLVM_DEBUG(dbgs() << "\nNew secondterm conditional jump added to HomeBB";);
2762 // AnalyzeBBBranches might not give correct information in this case.
2763 // The branch destination may be a symbol, not necessarily a block.
2764 if (MachineBasicBlock *Dest = getBranchDestination(MI: MII)) {
2765 LLVM_DEBUG(dbgs() << "\nBranch destination for pulled instruction is BB#"
2766 << Dest->getNumber(););
2767 S2Add = Dest;
2768 S2Remove = Dest;
2769 }
2770 } else if ((&*MII == FTA) && MII->isUnconditionalBranch()) {
2771 LLVM_DEBUG(dbgs() << "\nNew firstterm unconditional jump added to HomeBB";);
2772 S2Add = HomeTBB;
2773 S2Remove = HomeTBB;
2774 RemoveLSIfPresent = true;
2775 } else if ((&*MII == STA) && MII->isUnconditionalBranch()) {
2776 LLVM_DEBUG(
2777 dbgs() << "\nNew secondterm unconditional jump added to HomeBB";);
2778 S2Add = HomeFBB;
2779 S2Remove = HomeFBB;
2780 RemoveLSIfPresent = true;
2781 }
2782 if (S2Add && !HomeBB->isSuccessor(MBB: S2Add)) {
2783 HomeBB->addSuccessor(Succ: S2Add, Prob: MBPI->getEdgeProbability(Src: OriginBB, Dst: S2Add));
2784 }
2785 if (S2Remove)
2786 OriginBB->removeSuccessor(Succ: S2Remove);
2787 if (RemoveLSIfPresent) {
2788 MachineFunction::iterator HomeBBLS = HomeBB->getIterator();
2789 ++HomeBBLS;
2790 if (HomeBBLS != HomeBB->getParent()->end() &&
2791 HomeBB->isLayoutSuccessor(MBB: &*HomeBBLS)) {
2792 LLVM_DEBUG(dbgs() << "\nRemoving LayoutSucc BB#" << HomeBBLS->getNumber()
2793 << "from list of successors";);
2794 HomeBB->removeSuccessor(Succ: &*HomeBBLS);
2795 }
2796 }
2797}
2798
2799/// Move instruction from/to BB, Update liveness info,
2800/// return pointer to the newly inserted and modified
2801/// instruction.
2802MachineInstr *HexagonGlobalSchedulerImpl::MoveAndUpdateLiveness(
2803 BasicBlockRegion *CurrentRegion, MachineBasicBlock *HomeBB,
2804 MachineInstr *InstrToMove, bool NeedToNewify, unsigned DepReg,
2805 bool MovingDependentOp, MachineBasicBlock *OriginBB,
2806 MachineInstr *OriginalInstruction, SmallVector<MachineOperand, 4> &Cond,
2807 MachineBasicBlock::iterator &SourceLocation,
2808 MachineBasicBlock::iterator &TargetPacket,
2809 MachineBasicBlock::iterator &NextMI,
2810 std::vector<MachineInstr *> &backtrack) {
2811 LLVM_DEBUG(
2812 dbgs() << "\n...............[MoveAndUpdateLiveness]..............\n");
2813 LLVM_DEBUG(dbgs() << "\t\tInstrToMove :\t"; InstrToMove->dump());
2814 LLVM_DEBUG(dbgs() << "\t\tOriginalInstruction:\t";
2815 OriginalInstruction->dump());
2816 LLVM_DEBUG(dbgs() << "\t\tSourceLocation :\t";
2817 DumpPacket(SourceLocation.getInstrIterator()));
2818 LLVM_DEBUG(dbgs() << "\t\tTargetPacket :\t";
2819 DumpPacket(TargetPacket.getInstrIterator()));
2820
2821 MachineBasicBlock::instr_iterator OriginalHead =
2822 SourceLocation.getInstrIterator();
2823 MachineBasicBlock::instr_iterator OriginalTail = getBundleEnd(I: OriginalHead);
2824 MachineBasicBlock::instr_iterator OutcastFrom =
2825 OriginalInstruction->getIterator();
2826
2827 // Remove our temporary instruction.
2828 MachineBasicBlock::instr_iterator kill_it(InstrToMove);
2829 HomeBB->erase(I: kill_it);
2830
2831 MachineBasicBlock::instr_iterator TargetHead(TargetPacket.getInstrIterator());
2832 MachineBasicBlock::instr_iterator TargetTail = getBundleEnd(I: TargetHead);
2833
2834 LLVM_DEBUG(dbgs() << "\n\tTo BB before:\n"; debugLivenessForBB(HomeBB, QRI));
2835 LLVM_DEBUG(dbgs() << "\n\tFrom BB before:\n";
2836 debugLivenessForBB(OriginBB, QRI));
2837
2838 // Before we perform the move, we need to collect the worklist
2839 // of BBs for liveness updated.
2840 std::list<MachineBasicBlock *> WorkList;
2841
2842 // Insert into the work list all BBs along the backtrace.
2843 for (std::vector<MachineInstr *>::iterator RI = backtrack.begin(),
2844 RIE = backtrack.end();
2845 RI != RIE; RI++)
2846 WorkList.push_back(x: (*RI)->getParent());
2847
2848 // Only keep unique entries.
2849 // TODO: Use a different container here.
2850 WorkList.unique();
2851
2852 // Move the original instruction.
2853 // If this instruction is inside a bundle, update the bundle.
2854 MachineBasicBlock::instr_iterator BBEnd =
2855 TargetHead->getParent()->instr_end();
2856 bool LastInstructionInBundle = false;
2857 MachineBasicBlock::instr_iterator MII = findInsertPositionInBundle(
2858 Bundle&: TargetPacket, MI: &*OutcastFrom, LastInBundle&: LastInstructionInBundle);
2859
2860 (void)BBEnd;
2861 LLVM_DEBUG(dbgs() << "\n\t\t\tHead target : "; DumpPacket(TargetHead));
2862 LLVM_DEBUG(dbgs() << "\t\t\tTail target : ";
2863 DumpPacket(TargetTail, BBEnd));
2864 LLVM_DEBUG(dbgs() << "\t\t\tInsert right before: "; DumpPacket(MII, BBEnd));
2865
2866 MIBundleBuilder Bundle(&*TargetHead);
2867
2868 // Actual move. One day liveness might be updated here.
2869 if (OriginalInstruction->isBundled()) {
2870 Bundle.insert(I: MII, MI: OriginalInstruction->removeFromBundle());
2871 --MII;
2872 } else {
2873 // This is one case currently unhandled by Bundle.insert
2874 // and needs to be fixed upstream. Meanwhile use old way to handle
2875 // this odd case.
2876 if (OriginalInstruction->getIterator() == TargetTail) {
2877 LLVM_DEBUG(dbgs() << "\t\t\tSpecial case move.\n");
2878 MachineBasicBlock::instr_iterator MIIToPred = MII;
2879 --MIIToPred;
2880 LLVM_DEBUG(dbgs() << "\t\t\tInser after : ";
2881 DumpPacket(MIIToPred, BBEnd));
2882 // Unbundle it in its current location.
2883 if (OutcastFrom->isBundledWithSucc()) {
2884 OutcastFrom->clearFlag(Flag: MachineInstr::BundledSucc);
2885 OutcastFrom->clearFlag(Flag: MachineInstr::BundledPred);
2886 } else if (OutcastFrom->isBundledWithPred()) {
2887 OutcastFrom->unbundleFromPred();
2888 }
2889 HomeBB->splice(Where: MII, Other: OriginBB, From: OutcastFrom);
2890 if (!MII->isBundledWithPred())
2891 MII->bundleWithPred();
2892 if (!LastInstructionInBundle && !MII->isBundledWithSucc())
2893 MII->bundleWithSucc();
2894 // This is the instruction after which we have inserted.
2895 if (!MIIToPred->isBundledWithSucc())
2896 MIIToPred->bundleWithSucc();
2897 } else {
2898 Bundle.insert(I: MII, MI: OriginalInstruction->removeFromParent());
2899 --MII;
2900 }
2901 }
2902 // Source location bundle is updated later in the
2903 // removeInstructionFromExistingBundle().
2904
2905 LLVM_DEBUG(dbgs() << "\t\t\tNew packet head: "; DumpPacket(TargetHead));
2906 LLVM_DEBUG(dbgs() << "\t\t\tInserted op : "; MII->dump());
2907 LLVM_DEBUG(dbgs() << "\n\tTo BB after move:\n";
2908 debugLivenessForBB(HomeBB, QRI));
2909 LLVM_DEBUG(dbgs() << "\n\tFrom BB after:\n";
2910 debugLivenessForBB(OriginBB, QRI));
2911
2912 // Update kill patterns. Do it before we have predicated the moved
2913 // instruction.
2914 updateKillAlongThePath(HomeBB, OriginBB, Head&: MII, Tail&: TargetTail, SourcePacket&: SourceLocation,
2915 TargetPacket, backtrack);
2916 // I need to know:
2917 // - true/false predication
2918 // - do I need to .new it?
2919 // - do I need to .old it?
2920 // If the original instruction used new value operands,
2921 // it might need to be changed to the generic form
2922 // before further processing.
2923 if (QII->isDotNewInst(MI: *MII)) {
2924 DemoteToDotOld(MI: &*MII);
2925 LLVM_DEBUG(dbgs() << "\t\t\tDemoted to .old\t:"; MII->dump());
2926 }
2927
2928 // We have previously checked whether this instruction could
2929 // be placed in this packet, including all possible transformations
2930 // it might need, so if any request will fail now, something is wrong.
2931 //
2932 // Need for predication and the exact condition is determined by
2933 // the path between original and current instruction location.
2934 if (!Cond.empty()) { // To be predicated
2935 LLVM_DEBUG(dbgs() << "\t\t\tPredicating:"; MII->dump());
2936 assert(TII->isPredicable(*MII) && "MII is not predicable");
2937 TII->PredicateInstruction(MI&: *MII, Pred: Cond);
2938 if (NeedToNewify) {
2939 assert((DepReg < std::numeric_limits<unsigned>::max()) &&
2940 "Invalid pred reg value");
2941 LLVM_DEBUG(dbgs() << "\t\t\tNeeds to NEWify on Reg("
2942 << printReg(DepReg, QRI) << ").\n");
2943 int NewOpcode = QII->getDotNewPredOp(MI: *MII, MBPI);
2944 MII->setDesc(QII->get(Opcode: NewOpcode));
2945
2946 // Now we need to mark newly created predicate operand as
2947 // internal read.
2948 // TODO: Better look for predicate operand.
2949 for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
2950 MachineOperand &MO = MII->getOperand(i);
2951 if (!MO.isReg())
2952 continue;
2953 if (MO.isDef())
2954 continue;
2955 if (DepReg == MO.getReg())
2956 MO.setIsInternalRead();
2957 }
2958 }
2959 LLVM_DEBUG(dbgs() << "\t\t\tNew predicated form:\t"; MII->dump());
2960 // If the predicate has changed kill pattern, now we need to propagate
2961 // that again. This is important for liveness computation.
2962 updateKillAlongThePath(HomeBB, OriginBB, Head&: MII, Tail&: TargetTail, SourcePacket&: SourceLocation,
2963 TargetPacket, backtrack);
2964 }
2965
2966 // Create new bundle header, remove the old one.
2967 addInstructionToExistingBundle(HomeBB, Head&: TargetHead, Tail&: TargetTail, NewMI&: MII,
2968 TargetPacket, NextMI, backtrack);
2969
2970 // If moved instruction was inside a bundle, update that bundle.
2971 removeInstructionFromExistingBundle(HomeBB: OriginBB, Head&: ++OriginalHead, Tail&: OriginalTail,
2972 SourceLocation, NextMI, MovingDependentOp,
2973 backtrack);
2974
2975 // If removed instruction could have been dependent on any
2976 // of the remaining ops, we need to oldify possible affected ones.
2977 LLVM_DEBUG(dbgs() << "\t\tTargetHead:\t"; DumpPacket(TargetHead, BBEnd));
2978 LLVM_DEBUG(dbgs() << "\t\tOriginalHead:\t"; DumpPacket(OriginalHead, BBEnd));
2979 LLVM_DEBUG(dbgs() << "\t\tOriginalInstruction:\t";
2980 DumpPacket(OriginalInstruction->getIterator(), BBEnd));
2981 LLVM_DEBUG(dbgs() << "\t\tOutcastFrom:\t"; DumpPacket(OutcastFrom, BBEnd));
2982
2983 // Clean up the original source bundle on a global scope.
2984 if (OriginalHead != MachineBasicBlock::instr_iterator() &&
2985 QII->isEndLoopN(Opcode: OriginalHead->getOpcode())) {
2986 // Single endloop left. Since it is not a real instruction,
2987 // we can simply add it to a non empty previous bundle, if one exist,
2988 // or let assembler to produce a fake bundle for it.
2989 LLVM_DEBUG(dbgs() << "\t\tOnly endloop in packet.\n");
2990 MachineBasicBlock::instr_iterator I(OriginalHead);
2991 if (OriginBB->begin() != I) {
2992 --I;
2993 if (I->isBundled()) {
2994 if (!I->isBundledWithSucc())
2995 I->bundleWithSucc();
2996 if (!OriginalHead->isBundledWithPred())
2997 OriginalHead->bundleWithPred();
2998 }
2999 // else we probably need to create a new bundle here.
3000 // SourceLocation = NULL;
3001 }
3002 } else if (MovingDependentOp &&
3003 OriginalHead != MachineBasicBlock::instr_iterator()) {
3004 if (OriginalHead->isBundled()) {
3005 for (MachineBasicBlock::instr_iterator J = ++OriginalHead;
3006 J != OriginalTail && J->isInsideBundle() && !J->isBundle(); ++J) {
3007 // Need to oldify it.
3008 if (MIsHaveTrueDependency(MIa: OriginalInstruction, MIb: &*J) &&
3009 QII->isDotNewInst(MI: *J)) {
3010 LLVM_DEBUG(dbgs() << "\t\tDemoting to .old:\t"; J->dump());
3011 DemoteToDotOld(MI: &*J);
3012 }
3013 }
3014 } else {
3015 // Single instruction left.
3016 if (MIsHaveTrueDependency(MIa: OriginalInstruction, MIb: &*OriginalHead) &&
3017 QII->isDotNewInst(MI: *OriginalHead)) {
3018 LLVM_DEBUG(dbgs() << "\t\tDemoting to .old op:\t";
3019 OriginalHead->dump());
3020 DemoteToDotOld(MI: &*OriginalHead);
3021 }
3022 }
3023 }
3024
3025 // Now we need to update liveness to all BBs involved
3026 // including those we might have "passed" through on the way here.
3027 LLVM_DEBUG(dbgs() << "\n\tTo BB after bundle update:\n"; HomeBB->dump());
3028 LLVM_DEBUG(dbgs() << "\n\n\tFrom BB after bundle update:\n";
3029 OriginBB->dump());
3030
3031 // Update global liveness.
3032 LLVM_DEBUG(dbgs() << "\n\tWorkList:\t");
3033 for (std::list<MachineBasicBlock *>::iterator BBI = WorkList.begin(),
3034 BBIE = WorkList.end();
3035 BBI != BBIE; BBI++) {
3036 LLVM_DEBUG(dbgs() << "BB#" << (*BBI)->getNumber() << " ");
3037 }
3038 LLVM_DEBUG(dbgs() << "\n");
3039
3040 do {
3041 MachineBasicBlock *BB = WorkList.back();
3042 WorkList.pop_back();
3043 CurrentRegion->getLivenessInfoForBB(MBB: BB)->UpdateLiveness(MBB: BB);
3044 } while (!WorkList.empty());
3045
3046 // No need to analyze for empty BB or update CFG for same BB pullup.
3047 if (OriginBB == HomeBB)
3048 return &*TargetHead;
3049 // If the instruction moved was a branch we need to update the
3050 // successor/predecessor of OriginBB and HomeBB accordingly.
3051 MachineBasicBlock *HomeTBB, *HomeFBB;
3052 MachineInstr *FTA = NULL, *STA = NULL;
3053 bool HomeBBAnalyzed = !AnalyzeBBBranches(MBB: HomeBB, TBB&: HomeTBB, FirstTerm&: FTA, FBB&: HomeFBB, SecondTerm&: STA);
3054 if (MII->isBranch()) {
3055 if (HomeBBAnalyzed) {
3056 UpdateCFG(HomeBB, OriginBB, MII: &*MII, HomeTBB, HomeFBB, FTA, STA, MBPI);
3057 } else {
3058 llvm_unreachable("Underimplememted AnalyzeBBBranches");
3059 }
3060 }
3061 // If we have exhausted the OriginBB clean it up.
3062 // Beware that we could have created dual conditional jumps, which
3063 // ultimately means we can have three way jumps.
3064 if (IsEmptyBlock(MBB: OriginBB) && !OriginBB->isEHPad() &&
3065 !OriginBB->hasAddressTaken() && !OriginBB->succ_empty()) {
3066 // Dead block? Unlikely, but check.
3067 LLVM_DEBUG(dbgs() << "Empty BB(" << OriginBB->getNumber() << ").\n");
3068 // Update region map.
3069 CurrentRegion->RemoveBBFromRegion(MBB: OriginBB);
3070 // Keep the list of empty basic blocks to be freed later.
3071 EmptyBBs.push_back(x: OriginBB);
3072 if (OriginBB->pred_empty() || OriginBB->succ_empty())
3073 return &*TargetHead;
3074
3075 if (OriginBB->succ_size() == 1) {
3076 // Find empty block's successor.
3077 MachineBasicBlock *CommonFBB = *OriginBB->succ_begin();
3078 updatePredecessors(MBB&: *OriginBB, MFBB: CommonFBB);
3079 // Remove the only successor entry for empty BB.
3080 OriginBB->removeSuccessor(Succ: CommonFBB);
3081 } else {
3082 // Three way branching is not yet fully supported.
3083 assert((OriginBB->succ_size() == 2) && "Underimplemented 3way branch.");
3084 MachineBasicBlock *OriginTBB, *OriginFBB;
3085 MachineInstr *FTB = NULL, *STB = NULL;
3086
3087 LLVM_DEBUG(dbgs() << "\tComplex case.\n");
3088 if (HomeBBAnalyzed &&
3089 !AnalyzeBBBranches(MBB: OriginBB, TBB&: OriginTBB, FirstTerm&: FTB, FBB&: OriginFBB, SecondTerm&: STB)) {
3090 assert(OriginFBB && "Missing Origin FBB");
3091 if (HomeFBB == OriginBB) {
3092 // OriginBB is FBB for HomeBB.
3093 if (HomeTBB == OriginTBB) {
3094 // Shared TBB target, common FBB.
3095 updatePredecessors(MBB&: *OriginBB, MFBB: OriginFBB);
3096 } else if (HomeTBB == OriginFBB) {
3097 // Shared TBB target, common FBB.
3098 updatePredecessors(MBB&: *OriginBB, MFBB: OriginTBB);
3099 } else {
3100 // Three way branch. Add new successor to HomeBB.
3101 updatePredecessors(MBB&: *OriginBB, MFBB: OriginFBB);
3102 // TODO: Update the weight as well.
3103 // Adding the successor to make updatePredecessor happy.
3104 HomeBB->addSuccessor(Succ: OriginBB);
3105 updatePredecessors(MBB&: *OriginBB, MFBB: OriginTBB);
3106 }
3107 } else if (HomeTBB == OriginBB) {
3108 // OriginBB is TBB for HomeBB.
3109 if (HomeFBB == OriginTBB) {
3110 // Shared TBB target, common FBB.
3111 updatePredecessors(MBB&: *OriginBB, MFBB: OriginFBB);
3112 } else if (HomeFBB == OriginFBB) {
3113 // Shared TBB target, common FBB.
3114 updatePredecessors(MBB&: *OriginBB, MFBB: OriginTBB);
3115 } else {
3116 // Three way branch. Add new successor to HomeBB.
3117 updatePredecessors(MBB&: *OriginBB, MFBB: OriginFBB);
3118 // TODO: Update the weight as well.
3119 // Adding the successor to make updatePredecessor happy.
3120 HomeBB->addSuccessor(Succ: OriginBB);
3121 updatePredecessors(MBB&: *OriginBB, MFBB: OriginTBB);
3122 }
3123 } else
3124 llvm_unreachable("CFG update failed");
3125 // The empty BB can now be relieved of its successors.
3126 OriginBB->removeSuccessor(Succ: OriginFBB);
3127 OriginBB->removeSuccessor(Succ: OriginTBB);
3128 } else
3129 llvm_unreachable("Underimplemented analyzeBranch");
3130 }
3131 LLVM_DEBUG(dbgs() << "Updated BB(" << HomeBB->getNumber() << ").\n";
3132 HomeBB->dump());
3133 }
3134 return &*TargetHead;
3135}
3136
3137// Find where inside a given bundle current instruction should be inserted.
3138// Instruction will be inserted _before_ this position.
3139MachineBasicBlock::instr_iterator
3140HexagonGlobalSchedulerImpl::findInsertPositionInBundle(
3141 MachineBasicBlock::iterator &Bundle, MachineInstr *MI, bool &LastInBundle) {
3142 MachineBasicBlock::instr_iterator MII = Bundle.getInstrIterator();
3143 MachineBasicBlock *MBB = MII->getParent();
3144 MachineBasicBlock::instr_iterator BBEnd = MBB->instr_end();
3145 MachineBasicBlock::instr_iterator FirstBranch = BBEnd;
3146 MachineBasicBlock::instr_iterator LastBundledInstruction = BBEnd;
3147 MachineBasicBlock::instr_iterator DualJumpFirstCandidate = BBEnd;
3148
3149 assert(MII->isBundle() && "Missing insert location");
3150 bool isDualJumpSecondCandidate = IsDualJumpSecondCandidate(MI);
3151 LastInBundle = false;
3152
3153 for (++MII; MII != BBEnd && MII->isInsideBundle() && !MII->isBundle();
3154 ++MII) {
3155 if (MII->isBranch() && (FirstBranch == BBEnd))
3156 FirstBranch = MII;
3157 // If what we insert is a dual jump, we need to find
3158 // first jump, and insert new instruction after it.
3159 if (isDualJumpSecondCandidate && IsDualJumpFirstCandidate(MI: &*MII))
3160 DualJumpFirstCandidate = MII;
3161 LastBundledInstruction = MII;
3162 }
3163
3164 if (DualJumpFirstCandidate != BBEnd) {
3165 // First respect dual jumps.
3166 ++DualJumpFirstCandidate;
3167 if (DualJumpFirstCandidate == BBEnd ||
3168 DualJumpFirstCandidate == LastBundledInstruction)
3169 LastInBundle = true;
3170 return DualJumpFirstCandidate;
3171 } else if (FirstBranch != BBEnd) {
3172 // If we have no dual jumps, but do have a single
3173 // branch in the bundle, add our new instruction
3174 // right before it.
3175 return FirstBranch;
3176 } else if (LastBundledInstruction != BBEnd) {
3177 LastInBundle = true;
3178 return ++LastBundledInstruction;
3179 } else
3180 llvm_unreachable("Lost in bundle");
3181 return MBB->instr_begin();
3182}
3183
3184/// This function for now needs to try to insert new instruction
3185/// in correct serial semantics fashion - i.e. find "correct" insert
3186/// point for instruction as if inserting in serial sequence.
3187MachineBasicBlock::instr_iterator HexagonGlobalSchedulerImpl::insertTempCopy(
3188 MachineBasicBlock *MBB, MachineBasicBlock::iterator &TargetPacket,
3189 MachineInstr *MI, bool DeleteOldCopy) {
3190 MachineBasicBlock::instr_iterator MII;
3191 MachineBasicBlock *CurrentBB = MI->getParent();
3192
3193 assert(CurrentBB && "Corrupt instruction");
3194 // Create a temporary copy of the instruction we are considering.
3195 // LLVM refuses to deal with an instruction which was not inserted
3196 // to any BB. We can visit multiple BBs on the way "up", so we
3197 // create a temp copy of the original instruction and delete it later.
3198 // It is way cheaper than using splice and then
3199 // needing to undo it most of the time.
3200 MachineInstr *NewMI = MI->getParent()->getParent()->CloneMachineInstr(Orig: MI);
3201 // Make sure all bundling flags are cleared.
3202 if (NewMI->isBundledWithPred())
3203 NewMI->unbundleFromPred();
3204 if (NewMI->isBundledWithSucc())
3205 NewMI->unbundleFromSucc();
3206
3207 if (DeleteOldCopy) {
3208 // Remove our temporary instruction.
3209 // MachineBasicBlock::erase method calls unbundleSingleMI()
3210 // prior to deletion, so we do not have to do it here.
3211 MachineBasicBlock::instr_iterator kill_it(MI);
3212 CurrentBB->erase(I: kill_it);
3213 }
3214
3215 // If the original instruction used new value operands,
3216 // it might need to be changed to generic form
3217 // before further processing.
3218 if (QII->isDotNewInst(MI: *NewMI))
3219 DemoteToDotOld(MI: NewMI);
3220
3221 // Insert new temporary instruction.
3222 // If this is the destination packet, insert the tmp after
3223 // its header. Otherwise, as second instr in BB.
3224 if (TargetPacket->getParent() == MBB) {
3225 MII = TargetPacket.getInstrIterator();
3226
3227 if (MII->isBundled()) {
3228 bool LastInBundle = false;
3229 MachineBasicBlock::instr_iterator InsertBefore =
3230 findInsertPositionInBundle(Bundle&: TargetPacket, MI: NewMI, LastInBundle);
3231 MIBundleBuilder Bundle(&*TargetPacket);
3232 Bundle.insert(I: InsertBefore, MI: NewMI);
3233 } else
3234 MBB->insertAfter(I: MII, MI: NewMI);
3235 } else {
3236 MII = MBB->instr_begin();
3237
3238 // Skip debug instructions.
3239 while (MII->isDebugInstr())
3240 MII++;
3241
3242 if (MII->isBundled()) {
3243 MIBundleBuilder Bundle(&*MII);
3244 Bundle.insert(I: ++MII, MI: NewMI);
3245 } else
3246 MBB->insertAfter(I: MII, MI: NewMI);
3247 }
3248 return NewMI->getIterator();
3249}
3250
3251// Check for a conditionally assigned register within the block.
3252bool HexagonGlobalSchedulerImpl::MIsCondAssign(MachineInstr *BMI,
3253 MachineInstr *MI,
3254 SmallVector<unsigned, 4> &Defs) {
3255 if (!QII->isPredicated(MI: *BMI))
3256 return false;
3257 // Its a conditional instruction, now is it the same registers as MI?
3258 SmallVector<unsigned, 4> CondDefs;
3259 SmallVector<unsigned, 8> CondUses;
3260 parseOperands(MI: BMI, Defs&: CondDefs, Uses&: CondUses);
3261
3262 for (SmallVector<unsigned, 4>::iterator ID = Defs.begin(), IDE = Defs.end();
3263 ID != IDE; ++ID) {
3264 for (SmallVector<unsigned, 4>::iterator CID = CondDefs.begin(),
3265 CIDE = CondDefs.end();
3266 CID != CIDE; ++CID) {
3267 if (AliasingRegs(RegA: *CID, RegB: *ID)) {
3268 LLVM_DEBUG(dbgs() << "\tFound conditional def, can't move\n";
3269 BMI->dump());
3270 return true;
3271 }
3272 }
3273 }
3274 return false;
3275}
3276
3277// Returns the Union of all the elements in Set1 and
3278// Union of all the elements in Set2 separately.
3279// Constraints:
3280// Set1 and Set2 should contain an entry for each element in Range.
3281template <typename ElemType, typename IndexType>
3282void Unify(std::vector<ElemType> Range,
3283 std::map<ElemType, std::vector<IndexType>> &Set1,
3284 std::map<ElemType, std::vector<IndexType>> &Set2,
3285 std::pair<std::vector<IndexType>, std::vector<IndexType>> &UnionSet,
3286 unsigned union_size = 100) {
3287 typedef
3288 typename std::map<ElemType, std::vector<IndexType>>::iterator PosIter_t;
3289 typedef typename std::vector<IndexType>::iterator IndexIter_t;
3290 std::vector<IndexType> &Union1 = UnionSet.first;
3291 std::vector<IndexType> &Union2 = UnionSet.second;
3292 Union1.resize(union_size, 0);
3293 Union2.resize(union_size, 0);
3294 LLVM_DEBUG(dbgs() << "\n\t\tElements in the range:\n";);
3295 typename std::vector<ElemType>::iterator iter = Range.begin();
3296 while (iter != Range.end()) {
3297 if ((*iter)->isDebugInstr()) {
3298 ++iter;
3299 continue;
3300 }
3301 LLVM_DEBUG((*iter)->dump());
3302 PosIter_t set1_pos = Set1.find(*iter);
3303 assert(set1_pos != Set1.end() &&
3304 "Set1 should contain an entry for each element in Range.");
3305 IndexIter_t set1idx = set1_pos->second.begin();
3306 while (set1idx != set1_pos->second.end()) {
3307 Union1[*set1idx] = 1;
3308 ++set1idx;
3309 }
3310 PosIter_t set2_pos = Set2.find(*iter);
3311 assert(set2_pos != Set2.end() &&
3312 "Set2 should contain an entry for each element in Range.");
3313 IndexIter_t set2idx = set2_pos->second.begin();
3314 while (set2idx != set2_pos->second.end()) {
3315 Union2[*set2idx] = 1;
3316 ++set2idx;
3317 }
3318 ++iter;
3319 }
3320}
3321
3322static void UpdateBundle(MachineInstr *BundleHead) {
3323 assert(BundleHead->isBundle() && "Not a bundle header");
3324 if (!BundleHead)
3325 return;
3326 unsigned Size = BundleHead->getBundleSize();
3327 if (Size >= 2)
3328 return;
3329 if (Size == 1) {
3330 MachineBasicBlock::instr_iterator MIter = BundleHead->getIterator();
3331 MachineInstr *MI = &*(++MIter);
3332 MI->unbundleFromPred();
3333 }
3334 BundleHead->eraseFromParent();
3335}
3336
3337/// Gatekeeper for instruction speculation.
3338/// If all MI defs are dead (not live-in) to any other
3339/// BB but the one we are moving into, and it could not cause
3340/// exception by early execution, allow it to be pulled up.
3341bool HexagonGlobalSchedulerImpl::canMIBeSpeculated(
3342 MachineInstr *MI, MachineBasicBlock *ToBB, MachineBasicBlock *FromBB,
3343 std::vector<MachineInstr *> &backtrack) {
3344 // For now disallow memory accesses from speculation.
3345 // Generally we can check if they potentially may trap/cause an exception.
3346 if (!EnableSpeculativePullUp || !MI || MI->mayStore())
3347 return false;
3348
3349 LLVM_DEBUG(dbgs() << "\t[canMIBeSpeculated] From BB(" << FromBB->getNumber()
3350 << "):\t";
3351 MI->dump());
3352 LLVM_DEBUG(dbgs() << "\tTo this BB:\n"; ToBB->dump());
3353
3354 if (!ToBB->isSuccessor(MBB: FromBB))
3355 return false;
3356
3357 // This is a very tricky topic. Speculating arithmetic instructions with
3358 // results dead out of a loop more times then required by number of
3359 // iterations is safe, while speculating loads can cause an exception.
3360 // Simplest of checks is to not cross loop exit edge, or in our case
3361 // do not pull-in to a loop exit BB, but there are implications for
3362 // non-natural loops (not recognized by LLVM as loops) and multi-threaded
3363 // code.
3364 if (AllowSpeculateLoads && MI->mayLoad()) {
3365 // Invariant loads should always be safe.
3366 if (!MI->isDereferenceableInvariantLoad())
3367 return false;
3368 LLVM_DEBUG(dbgs() << "\tSpeculating a Load.\n");
3369 }
3370
3371 SmallVector<unsigned, 4> Defs;
3372 SmallVector<unsigned, 8> Uses;
3373 parseOperands(MI, Defs, Uses);
3374
3375 // Do not speculate instructions that modify reserved global registers.
3376 for (unsigned R : Defs)
3377 if (MRI->isReserved(PhysReg: R) && QRI->isGlobalReg(Reg: R))
3378 return false;
3379
3380 for (MachineBasicBlock::const_succ_iterator SI = ToBB->succ_begin(),
3381 SE = ToBB->succ_end();
3382 SI != SE; ++SI) {
3383 // TODO: Allow an instruction (I) which 'defines' the live-in reg (R)
3384 // along the path when I is the first instruction to use the R.
3385 // i.e., I kills R before any other instruction in the BB uses it.
3386 // TODO: We have already parsed live sets - reuse them.
3387 if (*SI == FromBB)
3388 continue;
3389 LLVM_DEBUG(dbgs() << "\tTarget succesor BB to check:\n"; (*SI)->dump());
3390 LLVM_DEBUG(
3391 for (MachineBasicBlock::const_succ_iterator SII = (*SI)->succ_begin(),
3392 SIE = (*SI)->succ_end();
3393 SII != SIE; ++SII)(*SII)
3394 ->dump());
3395 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
3396 E = (*SI)->livein_end();
3397 I != E; ++I)
3398 for (SmallVector<unsigned, 4>::iterator ID = Defs.begin(),
3399 IDE = Defs.end();
3400 ID != IDE; ++ID) {
3401 if (AliasingRegs(RegA: (*I).PhysReg, RegB: *ID))
3402 return false;
3403 }
3404
3405 // Check the successor blocks for conditional define.
3406 // TODO: We should really test the whole path here.
3407 for (MachineBasicBlock::instr_iterator BI = (*SI)->instr_begin(),
3408 E = (*SI)->instr_end();
3409 BI != E; ++BI) {
3410 if (BI->isBundle() || BI->isDebugInstr())
3411 continue;
3412 LLVM_DEBUG(dbgs() << "\t\tcheck against:\t"; BI->dump());
3413 if (MIsCondAssign(BMI: &*BI, MI, Defs))
3414 return false;
3415 }
3416 }
3417 // Taking a very conservative approach during speculation.
3418 // Traverse the path (FromBB, ToBB] and make sure
3419 // that the def-use set of the instruction to be moved
3420 // are not modified.
3421 std::vector<MachineBasicBlock *> PathBB;
3422 for (unsigned i = 0; i < backtrack.size(); ++i) {
3423 // Insert unique BB along the path but skip FromBB
3424 MachineBasicBlock *MBB = backtrack[i]->getParent();
3425 if ((MBB != FromBB) &&
3426 (std::find(first: PathBB.begin(), last: PathBB.end(), val: MBB) == PathBB.end()))
3427 PathBB.push_back(x: MBB);
3428 }
3429 bool WaitingForTargetPacket = true;
3430 MachineBasicBlock::instr_iterator MII;
3431 std::vector<MachineInstr *> TraversalRange;
3432 LLVM_DEBUG(dbgs() << "\n\tElements in the range:");
3433 // TODO: Use just the backtrack to get TraversalRange because it
3434 // contains the path (only when speculated from a path in region).
3435 // Note: We check the dependency of instruction-to-move with
3436 // all the instructions (starting from backtrack[0]) in the parent BBs
3437 // because a BB might have a branching from in between due to packetization
3438 // and just checking packets in the backtrack won't be comprehensive.
3439 for (unsigned i = 0; i < PathBB.size(); ++i) {
3440 for (MII = PathBB[i]->instr_begin(); MII != PathBB[i]->instr_end(); ++MII) {
3441 // Skip instructions until the target packet is found.
3442 // although target packet is already checked for correctness,
3443 // it is good to check here to validate intermediate pullups.
3444 if (backtrack[0] == &*MII)
3445 WaitingForTargetPacket = false;
3446 if (WaitingForTargetPacket)
3447 continue;
3448 if (MII->isBundle())
3449 continue;
3450 // TODO: Ideally we should check that there is a `linear' control flow
3451 // in the TraversalRange in all possible manner. For e.g.,
3452 // BB0 { packet1: if(p0) indirect_jump BB1;
3453 // packet2: jump BB2 }
3454 // BB1 { i1 }. In this case we should not pull `i1' into packet2.
3455 if (MII->isCall() || MII->isReturn() ||
3456 (MII->getOpcode() == Hexagon::J2_jump && !MII->getOperand(i: 0).isMBB()))
3457 return false;
3458 if (MI != &*MII) {
3459 TraversalRange.push_back(x: &*MII);
3460 LLVM_DEBUG(MII->dump(););
3461 }
3462 }
3463 }
3464 // Get the union of def/use set of all the instructions along TraversalRange.
3465 std::pair<std::vector<unsigned>, std::vector<unsigned>> RangeDefUse;
3466 Unify(Range: TraversalRange, Set1&: MIDefSet, Set2&: MIUseSet, UnionSet&: RangeDefUse, union_size: QRI->getNumRegs());
3467 // No instruction (along TraversalRange) should 'define' the use set of MI
3468 for (unsigned j = 0; j < Uses.size(); ++j)
3469 if (RangeDefUse.first[Uses[j]]) {
3470 LLVM_DEBUG(dbgs() << "\n\t\tUnresolved dependency along path to HOME for "
3471 << printReg(Uses[j], QRI););
3472 return false;
3473 }
3474 // No instruction (along TraversalRange) should 'define' or 'use'
3475 // the def set of MI
3476 for (unsigned j = 0; j < Defs.size(); ++j)
3477 if (RangeDefUse.first[Defs[j]] || RangeDefUse.second[Defs[j]]) {
3478 LLVM_DEBUG(dbgs() << "\n\t\tUnresolved dependency along path to HOME for "
3479 << printReg(Defs[j], QRI););
3480 return false;
3481 }
3482 return true;
3483}
3484
3485/// Try to move InstrToMove to TargetPacket using path stored in backtrack.
3486/// SourceLocation is current iterator point. It must be updated to the new
3487/// iteration location after all updates.
3488/// Alogrithm:
3489/// To move an instruction (I) from OriginBB through HomeBB via backtrack.
3490/// for each packet (i) in backtrack, analyzeBranch
3491/// case 1 (success)
3492/// case Pulling from conditional branch:
3493/// if I is predicable
3494/// Try to predicate on the branch condition
3495/// else
3496/// Try to speculate I to backtrack[i].
3497/// case Pulling from unconditional branch:
3498/// Just pullup. (TODO: Speculate here as well)
3499/// case 2 (fails)
3500/// Try to speculate I backtrack[i].
3501bool HexagonGlobalSchedulerImpl::MoveMItoBundle(
3502 BasicBlockRegion *CurrentRegion,
3503 MachineBasicBlock::instr_iterator &InstrToMove,
3504 MachineBasicBlock::iterator &NextMI,
3505 MachineBasicBlock::iterator &TargetPacket,
3506 MachineBasicBlock::iterator &SourceLocation,
3507 std::vector<MachineInstr *> &backtrack, bool MovingDependentOp,
3508 bool PathInRegion) {
3509 MachineBasicBlock *HomeBB = TargetPacket->getParent();
3510 MachineBasicBlock *OriginBB = InstrToMove->getParent();
3511 MachineBasicBlock *CurrentBB = OriginBB;
3512 MachineBasicBlock *CleanupBB = OriginBB;
3513 MachineBasicBlock *PreviousBB = OriginBB;
3514 MachineInstr *OriginalInstructionToMove = &*InstrToMove;
3515
3516 assert(HomeBB && "Missing HomeBB");
3517 assert(OriginBB && "Missing OriginBB");
3518
3519 LLVM_DEBUG(dbgs() << "\n.........[MoveMItoBundle]..............\n");
3520 LLVM_DEBUG(dbgs() << "\t\tInstrToMove :\t"; InstrToMove->dump());
3521 LLVM_DEBUG(dbgs() << "\t\tTargetPacket :\t";
3522 DumpPacket(TargetPacket.getInstrIterator()));
3523 LLVM_DEBUG(dbgs() << "\t\tSourceLocation:\t";
3524 DumpPacket(SourceLocation.getInstrIterator()));
3525
3526 // We do not allow to move instructions in the same BB.
3527 if (HomeBB == OriginBB) {
3528 LLVM_DEBUG(dbgs() << "\t\tSame BB pull-up.\n");
3529 if (!EnableLocalPullUp)
3530 return false;
3531 }
3532
3533 if (OneFloatPerPacket && QII->isFloat(MI: *TargetPacket) &&
3534 QII->isFloat(MI: *InstrToMove))
3535 return false;
3536
3537 if (OneComplexPerPacket && QII->isComplex(MI: *TargetPacket) &&
3538 QII->isComplex(MI: *InstrToMove))
3539 return false;
3540
3541 LLVM_DEBUG(dbgs() << "\t\tWay home:\n");
3542 // Test integrity of the back track.
3543 for (unsigned i = 0; i < backtrack.size(); ++i) {
3544 assert(backtrack[i]->getParent() && "Messed back track.");
3545 LLVM_DEBUG(dbgs() << "\t\t[" << i << "] BB("
3546 << backtrack[i]->getParent()->getNumber() << ")\t";
3547 backtrack[i]->dump());
3548 }
3549 LLVM_DEBUG(dbgs() << "\n");
3550
3551 bool NeedCleanup = false;
3552 bool NeedToPredicate = false;
3553 bool MINeedToNewify = false;
3554 unsigned DepReg = std::numeric_limits<unsigned>::max();
3555 bool isDualJump = false;
3556 SmallVector<MachineOperand, 4> Cond;
3557 SmallVector<MachineOperand, 4> PredCond;
3558 std::vector<MachineInstr *> PullUpPath;
3559 if (PathInRegion)
3560 PullUpPath = backtrack;
3561 else {
3562 PullUpPath.push_back(x: &*TargetPacket);
3563 PullUpPath.push_back(x: &*InstrToMove);
3564 }
3565
3566 // Now start iterating over all instructions
3567 // preceeding the one we are trying to move,
3568 // and see if they could be reodered/bypassed.
3569 for (std::vector<MachineInstr *>::reverse_iterator RI = backtrack.rbegin(),
3570 RIE = backtrack.rend();
3571 RI < RIE; ++RI) {
3572 // Once most of debug will be gone, this will be a real assert.
3573 // assert((backtrack.front() == ToThisBundle) && "Lost my way home.");
3574 MachineInstr *MIWH = *RI;
3575 if (QII->isDotNewInst(MI: *InstrToMove)) {
3576 LLVM_DEBUG(dbgs() << "Cannot move a dot new instruction:";
3577 InstrToMove->dump());
3578 if (NeedCleanup)
3579 CleanupBB->erase(I: InstrToMove);
3580 return false;
3581 }
3582 if (canCauseStall(MI: &*InstrToMove, MJ: MIWH)) {
3583 if (NeedCleanup)
3584 CleanupBB->erase(I: InstrToMove);
3585 return false;
3586 }
3587 LLVM_DEBUG(dbgs() << "\t> Step home BB(" << MIWH->getParent()->getNumber()
3588 << "):\t";
3589 DumpPacket(MIWH->getIterator()));
3590
3591 // See if we cross a jump, and possibly change the form of instruction.
3592 // Passing through BBs with dual jumps in different packets
3593 // takes extra care.
3594 bool isBranchMIWH = isBranch(MI: MIWH);
3595 if (((&*SourceLocation != MIWH) && isBranchMIWH) ||
3596 (CurrentBB != MIWH->getParent())) {
3597 LLVM_DEBUG(dbgs() << "\tChange BB from(" << CurrentBB->getNumber()
3598 << ") to (" << MIWH->getParent()->getNumber() << ")\n");
3599 PreviousBB = CurrentBB;
3600 CurrentBB = MIWH->getParent();
3601
3602 // See what kind of branch we are dealing with.
3603 MachineBasicBlock *PredTBB = NULL;
3604 MachineBasicBlock *PredFBB = NULL;
3605
3606 if (QII->analyzeBranch(MBB&: *CurrentBB, TBB&: PredTBB, FBB&: PredFBB, Cond, AllowModify: false)) {
3607 // We currently do not handle NV jumps of this kind:
3608 // if (cmp.eq(r0.new, #0)) jump:t .LBB12_69
3609 // TODO: Need to handle them.
3610 LLVM_DEBUG(dbgs() << "\tCould not analyze branch.\n");
3611
3612 // This is the main point of lost performance.
3613 // We could try to speculate here, but for that we need accurate
3614 // liveness info, and it is not ready yet.
3615 if (!canMIBeSpeculated(MI: &*InstrToMove, ToBB: CurrentBB, FromBB: PreviousBB,
3616 backtrack&: PullUpPath)) {
3617 if (NeedCleanup)
3618 CleanupBB->erase(I: InstrToMove);
3619 return false;
3620 } else {
3621 // Save speculated instruction moved.
3622 SpeculatedIns.insert(
3623 x: std::make_pair(x&: OriginalInstructionToMove, y&: OriginBB));
3624 LLVM_DEBUG(dbgs() << "\nSpeculatedInsToMove"; InstrToMove->dump());
3625 }
3626
3627 LLVM_DEBUG(dbgs() << "\tSpeculating.\n");
3628 // If we are speculating, we can come through a predication
3629 // into an unconditional branch...
3630 // For now simply bail out.
3631 // TODO: See if this ever happens.
3632 if (NeedToPredicate) {
3633 LLVM_DEBUG(dbgs()
3634 << "\tUnderimplemented pred for speculative move.\n");
3635 if (NeedCleanup)
3636 CleanupBB->erase(I: InstrToMove);
3637 return false;
3638 }
3639 InstrToMove =
3640 insertTempCopy(MBB: CurrentBB, TargetPacket, MI: &*InstrToMove, DeleteOldCopy: NeedCleanup);
3641 NeedCleanup = true;
3642 NeedToPredicate = false;
3643 assert(!NeedToPredicate && "Need to handle predication for this case");
3644 CleanupBB = CurrentBB;
3645 // No need to recheck for resources - instruction did not change.
3646 LLVM_DEBUG(dbgs() << "\tUpdated BB:\n"; CurrentBB->dump());
3647 } else {
3648 bool LocalNeedPredication = true;
3649 // We were able to analyze the branch.
3650 if (!isBranchMIWH && !PredTBB) {
3651 LLVM_DEBUG(dbgs() << "\tDo not need predicate for this case.\n");
3652 LocalNeedPredication = false;
3653 }
3654 // First see if this is a potential dual jump situation.
3655 if (IsDualJumpSecondCandidate(MI: &*InstrToMove) &&
3656 IsDualJumpFirstCandidate(TargetPacket)) {
3657 LLVM_DEBUG(dbgs() << "\tPerforming unrestricted dual jump.\n");
3658 isDualJump = true;
3659 } else if (LocalNeedPredication && (PredFBB != PreviousBB)) {
3660 // Predicate instruction based on condition feeding it.
3661 // This is generally a statefull pull-up path.
3662 // Can this insn be predicated? If so, try to do it.
3663 if (TII->isPredicable(MI: *InstrToMove)) {
3664 if (PredTBB) {
3665 if (PreviousBB != PredTBB) {
3666 // If we "came" not from TBB, we need to invert condition.
3667 if (TII->reverseBranchCondition(Cond)) {
3668 LLVM_DEBUG(dbgs() << "\tUnable to invert condition.\n");
3669 if (NeedCleanup)
3670 CleanupBB->erase(I: InstrToMove);
3671 return false;
3672 }
3673 }
3674 LLVM_DEBUG(dbgs() << "\tTBB(" << PredTBB->getNumber()
3675 << ")InvertCondition("
3676 << (PreviousBB != PredTBB) << ")\n");
3677 }
3678 // Create a new copy of the instruction we are trying to move.
3679 // It changes enough (new BB, predicated form) and untill we
3680 // reach home, we do not even know if it is going to work.
3681 InstrToMove = insertTempCopy(MBB: CurrentBB, TargetPacket, MI: &*InstrToMove,
3682 DeleteOldCopy: NeedCleanup);
3683 NeedCleanup = true;
3684 NeedToPredicate = true;
3685 CleanupBB = CurrentBB;
3686
3687 if (PredCond.empty() && // If not already predicated.
3688 TII->PredicateInstruction(MI&: *InstrToMove, Pred: Cond)) {
3689 LLVM_DEBUG(dbgs() << "\tNew predicated insn:\t";
3690 InstrToMove->dump());
3691 // After predication some instruction could become const extended:
3692 // L2_loadrigp == "$dst=memw(#$global)"
3693 // L4_ploadrit_abs == "if ($src1) $dst=memw(##$global)"
3694 // Resource checking for those is different.
3695 if ((QII->isExtended(MI: *InstrToMove) ||
3696 QII->isConstExtended(MI: *InstrToMove) ||
3697 isJumpOutOfRange(MI: &*InstrToMove)) &&
3698 !tryAllocateResourcesForConstExt(MI: &*InstrToMove, UpdateState: false)) {
3699 // If we cannot, do not modify the state.
3700 LLVM_DEBUG(dbgs()
3701 << "\tEI Could not be added to the packet.\n");
3702 CleanupBB->erase(I: InstrToMove);
3703 return false;
3704 }
3705
3706 if (!ResourceTracker->canReserveResources(MI&: *InstrToMove) ||
3707 !shouldAddToPacket(MI: *InstrToMove)) {
3708 // It will not fit in its new form...
3709 LLVM_DEBUG(dbgs() << "\tCould not be added in its new form.\n");
3710 CurrentBB->erase(I: InstrToMove);
3711 return false;
3712 }
3713
3714 // Need also verify that we can newify it if we want to.
3715 if (NeedToNewify(NewMI: InstrToMove, DepReg: &DepReg, TargetPacket: &*TargetPacket)) {
3716 if (isNewifiable(MII: InstrToMove, DepReg, TargetPacket: &*TargetPacket)) {
3717 MINeedToNewify = true;
3718 LLVM_DEBUG(dbgs() << "\t\t\tNeeds to NEWify on Reg("
3719 << printReg(DepReg, QRI) << ").\n");
3720 } else {
3721 LLVM_DEBUG(dbgs() << "\tNon newifiable in this bundle: ";
3722 InstrToMove->dump());
3723 CleanupBB->erase(I: InstrToMove);
3724 return false;
3725 }
3726 }
3727
3728 LLVM_DEBUG(dbgs() << "\tUpdated BB:\n"; CurrentBB->dump());
3729 PredCond = Cond;
3730 // Now the instruction uses the pred-reg as well.
3731 if (!Cond.empty() && (Cond.size() == 2)) {
3732 MIUseSet[OriginalInstructionToMove].push_back(x: Cond[1].getReg());
3733 }
3734 assert(((Cond.size() <= 2) &&
3735 !(QII->isNewValueJump(Cond[0].getImm()))) &&
3736 "Update MIUseSet for new-value compare jumps");
3737 } else {
3738 LLVM_DEBUG(dbgs() << "\tCould not predicate it\n");
3739 LLVM_DEBUG(dbgs() << "\tTrying to speculate!\t";
3740 InstrToMove->dump());
3741 bool DistantSpeculation = false;
3742 std::vector<MachineInstr *> NonPredPullUpPath;
3743 unsigned btidx = 0;
3744 // Generate a backtrack path for instruction to be speculated.
3745 // Original backtrack may start from a different (ancestor)
3746 // target packet.
3747 while (btidx < backtrack.size()) {
3748 const MachineBasicBlock *btBB = backtrack[btidx]->getParent();
3749 if ((btBB == PreviousBB) || (btBB == CurrentBB))
3750 NonPredPullUpPath.push_back(x: backtrack[btidx]);
3751 ++btidx;
3752 }
3753 // Speculate only to immediate predecessor.
3754 if (PreviousBB != CurrentBB) {
3755 if (*(PreviousBB->pred_begin()) != CurrentBB) {
3756 // In a region there are no side entries.
3757 DistantSpeculation = true;
3758 LLVM_DEBUG(dbgs()
3759 << "\n\tMI not in immediate successor of BB#"
3760 << CurrentBB->getNumber() << ", MI is in BB#"
3761 << PreviousBB->getNumber(););
3762 }
3763 assert((PreviousBB->pred_size() < 2) &&
3764 "Region with a side entry");
3765 }
3766 // TODO: Speculate ins. when pulled from unlikely path.
3767 if (DistantSpeculation || /*!PathInRegion ||*/
3768 InstrToMove->mayLoad() || InstrToMove->mayStore() ||
3769 InstrToMove->hasUnmodeledSideEffects() ||
3770 !canMIBeSpeculated(MI: &*InstrToMove, ToBB: CurrentBB, FromBB: PreviousBB,
3771 backtrack&: NonPredPullUpPath)) {
3772 CleanupBB->erase(I: InstrToMove);
3773 return false;
3774 } else {
3775 // Save speculated instruction moved.
3776 NeedToPredicate = false;
3777 SpeculatedIns.insert(
3778 x: std::make_pair(x&: OriginalInstructionToMove, y&: OriginBB));
3779 LLVM_DEBUG(dbgs() << "\nPredicable+SpeculatedInsToMove";
3780 InstrToMove->dump());
3781 }
3782 }
3783 } else {
3784 // This is a non-predicable instruction. We still can try to
3785 // speculate it here.
3786 LLVM_DEBUG(dbgs() << "\tNon predicable insn!\t";
3787 InstrToMove->dump());
3788 // TODO: Speculate ins. when pulled from unlikely path.
3789 if (!SpeculateNonPredInsn || !PathInRegion ||
3790 InstrToMove->mayLoad() || InstrToMove->mayStore() ||
3791 InstrToMove->hasUnmodeledSideEffects() ||
3792 !canMIBeSpeculated(MI: &*InstrToMove, ToBB: CurrentBB, FromBB: PreviousBB,
3793 backtrack&: PullUpPath)) {
3794 if (NeedCleanup)
3795 CleanupBB->erase(I: InstrToMove);
3796 return false;
3797 } else {
3798 // Save speculated instruction moved.
3799 SpeculatedIns.insert(
3800 x: std::make_pair(x&: OriginalInstructionToMove, y&: OriginBB));
3801 LLVM_DEBUG(dbgs() << "\nNonPredicable+SpeculatedInsToMove";
3802 InstrToMove->dump());
3803 }
3804
3805 InstrToMove = insertTempCopy(MBB: CurrentBB, TargetPacket, MI: &*InstrToMove,
3806 DeleteOldCopy: NeedCleanup);
3807 NeedCleanup = true;
3808 CleanupBB = CurrentBB;
3809 }
3810 } else {
3811 // No branch. Fall through.
3812 LLVM_DEBUG(dbgs() << "\tFall through BB.\n"
3813 << "\tCurrentBB:" << CurrentBB->getNumber()
3814 << "\tPreviousBB:" << PreviousBB->getNumber();
3815 if (PredFBB) dbgs()
3816 << "\tPredFBB:" << PredFBB->getNumber(););
3817 // Even though this is a fall though case, we still can
3818 // have a dual jump situation here with a CALL involved.
3819 // For now simply avoid it.
3820 if (IsDualJumpSecondCandidate(MI: &*InstrToMove)) {
3821 llvm_unreachable("Dual jumps with known?");
3822 LLVM_DEBUG(dbgs() << "\tUnderimplemented dual jump formation.\n");
3823 if (NeedCleanup)
3824 CleanupBB->erase(I: InstrToMove);
3825 return false;
3826 }
3827
3828 if (!CurrentBB->isSuccessor(MBB: PreviousBB)) {
3829 LLVM_DEBUG(dbgs() << "\tNon-successor fall through.\n");
3830 if (NeedCleanup)
3831 CleanupBB->erase(I: InstrToMove);
3832 return false;
3833 }
3834 SpeculatedIns.insert(
3835 x: std::make_pair(x&: OriginalInstructionToMove, y&: OriginBB));
3836 LLVM_DEBUG(dbgs() << "\nSpeculatedInsToMove+FallThroughBB";
3837 InstrToMove->dump());
3838 // Create a temp copy.
3839 InstrToMove = insertTempCopy(MBB: CurrentBB, TargetPacket, MI: &*InstrToMove,
3840 DeleteOldCopy: NeedCleanup);
3841 NeedCleanup = true;
3842 NeedToPredicate = false;
3843 CleanupBB = CurrentBB;
3844 LLVM_DEBUG(dbgs() << "\tUpdated BB:\n"; CurrentBB->dump());
3845 }
3846 }
3847 }
3848 // If we have reached Home, great.
3849 // Original check should have verified that instruction could be added
3850 // to the target packet, so here we do nothing for deps.
3851 if (MIWH == backtrack.front()) {
3852 LLVM_DEBUG(dbgs() << "\tHOME!\n");
3853 break;
3854 }
3855
3856 // Test if we can reorder the two MIs.
3857 // The exception is when we are forming dual jumps - we can pull up
3858 // dependent instruction to the last bundle of an immediate predecesor
3859 // of the current BB if control flow permits it.
3860 // In this special case we also need to update the bundle we are moving
3861 // from.
3862 if (!(MovingDependentOp && (MIWH == &*SourceLocation)) &&
3863 !canReorderMIs(MIa: MIWH, MIb: &*InstrToMove)) {
3864 if (NeedCleanup)
3865 CleanupBB->erase(I: InstrToMove);
3866 return false;
3867 }
3868 }
3869 // We have previously tested this instruction, but has not updated the state
3870 // for it. Do it now.
3871 if (QII->isExtended(MI: *InstrToMove) || QII->isConstExtended(MI: *InstrToMove) ||
3872 isJumpOutOfRange(MI: &*InstrToMove)) {
3873 if (!tryAllocateResourcesForConstExt(MI: &*InstrToMove))
3874 llvm_unreachable("Missed dependency test");
3875 }
3876
3877 // Ok. We can safely move this instruction all the way up.
3878 // We also potentially have a slot for it.
3879 // During move original instruction could have changed (becoming predicated).
3880 // Now try to place the final instance of it into the current packet.
3881 LLVM_DEBUG(dbgs() << "\nWant to move ";
3882 if (MovingDependentOp) dbgs() << "dependent op"; dbgs() << ": ";
3883 InstrToMove->dump(); dbgs() << "To BB:\n"; HomeBB->dump();
3884 dbgs() << "From BB:\n"; OriginBB->dump());
3885
3886 // Keep these two statistics separately.
3887 if (!isDualJump)
3888 HexagonNumPullUps++;
3889 else
3890 HexagonNumDualJumps++;
3891
3892 // This means we have not yet inserted the temp copy of InstrToMove
3893 // in the target bundle. We are probably inside the same BB.
3894 if (!NeedCleanup) {
3895 InstrToMove =
3896 insertTempCopy(MBB: HomeBB, TargetPacket, MI: &*InstrToMove, DeleteOldCopy: NeedCleanup);
3897 NeedCleanup = true;
3898 }
3899
3900 // No problems detected. Add it.
3901 // If we were adding InstrToMove to a single, not yet packetized
3902 // instruction, we need to create bundle header for it before proceeding.
3903 // Be carefull since endPacket also resets the DFA state.
3904 if (!TargetPacket->isBundle()) {
3905 LLVM_DEBUG(dbgs() << "\tForm a new bundle.\n");
3906 finalizeBundle(MBB&: *HomeBB, FirstMI: TargetPacket.getInstrIterator(),
3907 LastMI: std::next(x: InstrToMove));
3908 LLVM_DEBUG(HomeBB->dump());
3909 // Now we need to adjust pointer to the newly created packet header.
3910 MachineBasicBlock::instr_iterator MII = TargetPacket.getInstrIterator();
3911 MII--;
3912
3913 // Is it also on the way home?
3914 for (unsigned i = 0; i < backtrack.size(); ++i)
3915 if (backtrack[i] == &*TargetPacket)
3916 backtrack[i] = &*MII;
3917
3918 // Is it where our next MI is pointing?
3919 if (NextMI == TargetPacket)
3920 NextMI = MII;
3921 TargetPacket = MII;
3922 }
3923
3924 // Move and Update Liveness info.
3925 MoveAndUpdateLiveness(CurrentRegion, HomeBB, InstrToMove: &*InstrToMove, NeedToNewify: MINeedToNewify,
3926 DepReg, MovingDependentOp, OriginBB,
3927 OriginalInstruction: OriginalInstructionToMove, Cond&: PredCond, SourceLocation,
3928 TargetPacket, NextMI, backtrack);
3929
3930 LLVM_DEBUG(dbgs() << "\n______Updated______\n"; HomeBB->dump();
3931 OriginBB->dump());
3932
3933 return true;
3934}
3935
3936/// Verify that we respect CFG layout during pull-up.
3937bool HexagonGlobalSchedulerImpl::isBranchWithinRegion(
3938 BasicBlockRegion *CurrentRegion, MachineInstr *MI) {
3939 assert(MI && MI->isBranch() && "Missing call info");
3940
3941 MachineBasicBlock *MBB = MI->getParent();
3942 LLVM_DEBUG(dbgs() << "\t[isBranchWithinRegion] BB(" << MBB->getNumber()
3943 << ") Branch instr:\t";
3944 MI->dump());
3945 // If there is only one successor, it is safe to pull.
3946 if (MBB->succ_size() <= 1)
3947 return true;
3948 // If there are multiple successors (jump table), we should
3949 // not allow pull up over this instruction.
3950 if (MBB->succ_size() > 2)
3951 return false;
3952
3953 MachineBasicBlock *NextRegionBB;
3954 MachineBasicBlock *TBB, *FBB;
3955 MachineInstr *FirstTerm = NULL;
3956 MachineInstr *SecondTerm = NULL;
3957
3958 if (AnalyzeBBBranches(MBB, TBB, FirstTerm, FBB, SecondTerm)) {
3959 LLVM_DEBUG(dbgs() << "\t\tAnalyzeBBBranches failed!\n");
3960 return false;
3961 }
3962
3963 // If there is no jump in this BB, it simply falls through.
3964 if (!FirstTerm) {
3965 LLVM_DEBUG(dbgs() << "\t\tNo FirstTerm\n");
3966 return true;
3967 } else if (QII->isEndLoopN(Opcode: FirstTerm->getOpcode())) {
3968 // We can easily analyze where endloop would take us
3969 // but here it would be pointless either way since
3970 // the region will not cross it.
3971 LLVM_DEBUG(dbgs() << "\t\tEndloop terminator\n");
3972 return false;
3973 }
3974 // On some occasions we see code like this:
3975 // BB#142: derived from LLVM BB %init, Align 4 (16 bytes)
3976 // Live Ins: %R17 %R18
3977 // Predecessors according to CFG: BB#2
3978 // EH_LABEL <MCSym=.Ltmp35>
3979 // J2_jump <BB#3>, %PC<imp-def>
3980 // Successors according to CFG: BB#3(1048575) BB#138(1)
3981 // It breaks most assumptions about CFG layout, so untill we know
3982 // the source of it, let's have a safeguard.
3983 if (MBB->succ_size() > 1 && !TII->isPredicated(MI: *FirstTerm) &&
3984 !QII->isNewValueJump(MI: *FirstTerm)) {
3985 LLVM_DEBUG(dbgs() << "\t\tBadly formed BB.\n");
3986 return false;
3987 }
3988
3989 LLVM_DEBUG(dbgs() << "\t\tFirstTerm: "; FirstTerm->dump());
3990 LLVM_DEBUG(dbgs() << "\t\tSecondTerm: "; if (SecondTerm) SecondTerm->dump();
3991 else dbgs() << "None\n";);
3992
3993 // All cases where there is only one branch in BB are OK to proceed.
3994 if (!SecondTerm)
3995 return true;
3996
3997 assert(!QII->isEndLoopN(SecondTerm->getOpcode()) && "Found endloop.");
3998
3999 // Find next BB in this region - if there is none, we will likely
4000 // stop pulling in the next check outside of this function.
4001 // This largely is don't care.
4002 NextRegionBB = CurrentRegion->findNextMBB(MBB);
4003 if (!NextRegionBB) {
4004 LLVM_DEBUG(dbgs() << "\t\tNo next BB in the region...\n");
4005 return true;
4006 }
4007 LLVM_DEBUG(dbgs() << "\t\tNextRegionBB(" << NextRegionBB->getNumber()
4008 << ")\n");
4009 assert(TBB && "Corrupt BB layout");
4010 // This means we are trying to pull into a packet _before_ the first
4011 // branch in the MBB.
4012 if (MI == FirstTerm) {
4013 LLVM_DEBUG(dbgs() << "\t\tTBB(" << TBB->getNumber()
4014 << ") NextBB in the region(" << NextRegionBB->getNumber()
4015 << ")\n");
4016 return (TBB == NextRegionBB);
4017 }
4018 assert(FBB && "Corrupt BB layout");
4019 // This means we are trying to pull into the packet _after_ first branch,
4020 // and it is OK if we pull from the second branch target.
4021 // This pull is always speculative.
4022 if ((MI != SecondTerm)) {
4023 LLVM_DEBUG(dbgs() << "\t\tDual terminator not matching SecondTerm.\n");
4024 return false;
4025 }
4026 // Analyze the second branch in the BB.
4027 LLVM_DEBUG(dbgs() << "\t\tFBB(" << FBB->getNumber()
4028 << ") NextBB in the region(" << NextRegionBB->getNumber()
4029 << ")\n");
4030 return (FBB == NextRegionBB);
4031}
4032
4033/// Check if a given instruction is:
4034/// - a jump to a distant target
4035/// - that exceeds its immediate range
4036/// If both conditions are true, it requires constant extension.
4037bool HexagonGlobalSchedulerImpl::isJumpOutOfRange(MachineInstr *UnCond,
4038 MachineInstr *Cond) {
4039 if (!UnCond || !UnCond->isBranch())
4040 return false;
4041
4042 MachineBasicBlock *UnCondBB = UnCond->getParent();
4043 MachineBasicBlock *CondBB = Cond->getParent();
4044 MachineInstr *FirstTerm = &*(CondBB->getFirstInstrTerminator());
4045 // This might be worth an assert.
4046 if (FirstTerm == &*CondBB->instr_end())
4047 return false;
4048
4049 unsigned InstOffset = BlockToInstOffset[UnCondBB];
4050 unsigned Distance = 0;
4051
4052 // To save time, estimate exact position of a branch instruction
4053 // as one at the end of the UnCondBB.
4054 // Number of instructions times typical instruction size.
4055 InstOffset += (QII->nonDbgBBSize(BB: UnCondBB) * HEXAGON_INSTR_SIZE);
4056
4057 MachineBasicBlock *TBB = NULL, *FBB = NULL;
4058 SmallVector<MachineOperand, 4> CondList;
4059
4060 // Find the target of the unconditional branch in UnCondBB, which is returned
4061 // in TBB. Then use the CondBB to extract the FirsTerm. We desire to replace
4062 // the branch target in FirstTerm with the branch location from the UnCondBB,
4063 // provided it is within the distance of the opcode in FirstTerm.
4064 if (QII->analyzeBranch(MBB&: *UnCondBB, TBB, FBB, Cond&: CondList, AllowModify: false))
4065 // Could not analyze it. give up.
4066 return false;
4067
4068 if (TBB && (Cond == FirstTerm)) {
4069 Distance =
4070 (unsigned)std::abs(x: (long long)InstOffset - BlockToInstOffset[TBB]) +
4071 SafetyBuffer;
4072 return !QII->isJumpWithinBranchRange(MI: *FirstTerm, offset: Distance);
4073 }
4074 return false;
4075}
4076
4077// findBundleAndBranch returns the branch instruction and the
4078// bundle which contains it. Null is returned if not found.
4079MachineInstr *HexagonGlobalSchedulerImpl::findBundleAndBranch(
4080 MachineBasicBlock *BB, MachineBasicBlock::iterator &Bundle) {
4081 // Find the conditional branch out of BB.
4082 if (!BB)
4083 return NULL;
4084 MachineInstr *CondBranch = NULL;
4085 Bundle = BB->end();
4086 for (MachineBasicBlock::instr_iterator MII = BB->getFirstInstrTerminator(),
4087 MBBEnd = BB->instr_end();
4088 MII != MBBEnd; ++MII) {
4089 MachineInstr *MI = &*MII;
4090 if (MII->isConditionalBranch()) {
4091 CondBranch = MI;
4092 }
4093 }
4094 if (!CondBranch)
4095 return NULL;
4096 MachineBasicBlock::instr_iterator MII = CondBranch->getIterator();
4097 if (!MII->isBundled())
4098 return NULL;
4099 // Find bundle header.
4100 for (--MII; MII->isBundled(); --MII)
4101 if (MII->isBundle()) {
4102 Bundle = MII;
4103 break;
4104 }
4105 return CondBranch;
4106}
4107
4108// pullUpPeelBBLoop
4109// A single BB loop with a register copy at the beginning in its
4110// own bundle, benefits from eliminating the extra bundle. We do
4111// this by predicating the register copy in the predecessor BB, and
4112// again in the last bundle of the loop.
4113bool HexagonGlobalSchedulerImpl::pullUpPeelBBLoop(MachineBasicBlock *PredBB,
4114 MachineBasicBlock *LoopBB) {
4115 if (!AllowBBPeelPullUp)
4116 return false;
4117 if (!LoopBB || !PredBB)
4118 return false;
4119
4120 // We consider single BB loops only. Check for it here.
4121 if (LoopBB->isEHPad() || LoopBB->hasAddressTaken())
4122 return false;
4123 if (LoopBB->succ_size() != 2)
4124 return false;
4125 if (LoopBB->pred_size() != 2)
4126 return false;
4127 // Make sure one of the successors and one of the predecssors is to self.
4128 if (!(LoopBB->isSuccessor(MBB: LoopBB) && LoopBB->isPredecessor(MBB: LoopBB)))
4129 return false;
4130
4131 // Find the none self successor block. We know we only have 2 successors.
4132 MachineBasicBlock *SuccBB = NULL;
4133 for (MachineBasicBlock::succ_iterator SI = LoopBB->succ_begin(),
4134 SE = LoopBB->succ_end();
4135 SI != SE; ++SI)
4136 if (*SI != LoopBB) {
4137 SuccBB = *SI;
4138 break;
4139 }
4140 if (!SuccBB)
4141 return false;
4142
4143 // Find the conditional branch and its bundle inside PredBB.
4144 MachineBasicBlock::iterator PredBundle;
4145 MachineInstr *PredCondBranch = NULL;
4146 PredCondBranch = findBundleAndBranch(BB: PredBB, Bundle&: PredBundle);
4147 if (!PredCondBranch)
4148 return false;
4149 if (PredBundle == PredBB->end())
4150 return false;
4151 LLVM_DEBUG(dbgs() << "PredBB's Branch: ");
4152 LLVM_DEBUG(dbgs() << *PredCondBranch);
4153
4154 // Look for leading reg copy as single bundle and make sure its live in.
4155 MachineBasicBlock::instr_iterator FMI = LoopBB->instr_begin();
4156 // Skip debug instructions.
4157 while (FMI->isDebugInstr())
4158 FMI++;
4159
4160 MachineInstr *RegMI = &*FMI;
4161 if (RegMI->isBundle())
4162 return false;
4163 int TfrOpcode = RegMI->getOpcode();
4164 if (TfrOpcode != Hexagon::A2_tfr && TfrOpcode != Hexagon::A2_tfr)
4165 return false;
4166 if (!(RegMI->getOperand(i: 0).isReg() && RegMI->getOperand(i: 1).isReg()))
4167 return false;
4168 unsigned InLoopReg = RegMI->getOperand(i: 1).getReg();
4169 if (!LoopBB->isLiveIn(Reg: InLoopReg))
4170 return false;
4171
4172 // Create a region to pass to ResourcesAvailableInBundle.
4173 BasicBlockRegion PUR(BasicBlockRegion(TII, QRI, PredBB));
4174 PUR.addBBtoRegion(MBB: LoopBB);
4175 PUR.addBBtoRegion(MBB: SuccBB);
4176
4177 // Make sure we have space in PredBB's last bundle.
4178 if (!ResourcesAvailableInBundle(CurrentRegion: &PUR, TargetPacket&: PredBundle))
4179 return false;
4180 SmallVector<MachineInstr *, HEXAGON_PACKET_SIZE> PredBundlePkt(
4181 CurrentState.HomeBundle);
4182
4183 // Find condition to use for predicating the reg copy into PredBB.
4184 MachineBasicBlock *TBB = NULL, *FBB = NULL;
4185 SmallVector<MachineOperand, 4> Cond;
4186 if (QII->analyzeBranch(MBB&: *PredBB, TBB, FBB, Cond, AllowModify: false))
4187 return false;
4188 if (Cond.empty())
4189 return false;
4190
4191 // Find condition to use for predicating the reg copy at the end of LoopBB.
4192 MachineBasicBlock *LTBB = NULL, *LFBB = NULL;
4193 SmallVector<MachineOperand, 4> LCond;
4194 if (QII->analyzeBranch(MBB&: *LoopBB, TBB&: LTBB, FBB&: LFBB, Cond&: LCond, AllowModify: false))
4195 return false;
4196 if (LCond.empty())
4197 return false;
4198
4199 // Move predicated reg copy to previous BB's last bundle.
4200 if (!TII->isPredicable(MI: *RegMI))
4201 return false;
4202 MachineInstr *InstrToMove =
4203 &*insertTempCopy(MBB: PredBB, TargetPacket&: PredBundle, MI: RegMI, DeleteOldCopy: false);
4204 if (!canAddMIToThisPacket(MI: InstrToMove, Bundle&: PredBundlePkt)) {
4205 PredBB->erase_instr(I: InstrToMove);
4206 return false;
4207 }
4208
4209 if (!TII->PredicateInstruction(MI&: *InstrToMove, Pred: Cond)) {
4210 // Failed to predicate the copy reg.
4211 PredBB->erase_instr(I: InstrToMove);
4212 return false;
4213 }
4214
4215 // Can we newify this instruction?
4216 unsigned DepReg = 0;
4217 if (NeedToNewify(NewMI: InstrToMove->getIterator(), DepReg: &DepReg, TargetPacket: &*PredBundle) &&
4218 !isNewifiable(MII: InstrToMove->getIterator(), DepReg, TargetPacket: &*PredBundle)) {
4219 PredBB->erase_instr(I: InstrToMove);
4220 return false;
4221 }
4222 // Newify it, and then undo it if we determine we are using a .old.
4223 int NewOpcode = QII->getDotNewPredOp(MI: *InstrToMove, MBPI);
4224 // Undo newify if we have a non .new predicated jump we are matching.
4225 if (!QII->isDotNewInst(MI: *PredCondBranch))
4226 NewOpcode = QII->getDotOldOp(MI: *InstrToMove);
4227 NewOpcode = QII->getInvertedPredicatedOpcode(Opc: NewOpcode);
4228 // Properly set the opcode on the new hoisted reg copy instruction.
4229 InstrToMove->setDesc(QII->get(Opcode: NewOpcode));
4230 if (!incrementalAddToPacket(MI&: *InstrToMove)) {
4231 PredBB->erase_instr(I: InstrToMove);
4232 return false;
4233 }
4234
4235 // Find the conditional branch and its bundle for LoopBB.
4236 MachineBasicBlock::iterator LoopBundle;
4237 MachineInstr *LoopCondBranch = NULL;
4238 LoopCondBranch = findBundleAndBranch(BB: LoopBB, Bundle&: LoopBundle);
4239 if (!LoopCondBranch)
4240 return false;
4241 if (LoopBundle == LoopBB->end())
4242 return false;
4243 LLVM_DEBUG(dbgs() << "LoopBB's Branch: ");
4244 LLVM_DEBUG(dbgs() << *LoopCondBranch);
4245
4246 // Make sure we have space in LoopBB's last bundle.
4247 if (!ResourcesAvailableInBundle(CurrentRegion: &PUR, TargetPacket&: LoopBundle))
4248 return false;
4249 SmallVector<MachineInstr *, HEXAGON_PACKET_SIZE> LoopBundlePkt(
4250 CurrentState.HomeBundle);
4251
4252 // Move predicated reg copy to last bundle of LoopBB.
4253 MachineInstr *InstrToSink =
4254 &*insertTempCopy(MBB: LoopBB, TargetPacket&: LoopBundle, MI: RegMI, DeleteOldCopy: false);
4255 if (!canAddMIToThisPacket(MI: InstrToSink, Bundle&: LoopBundlePkt)) {
4256 // Get rid of previous instruction as well.
4257 PredBB->erase_instr(I: InstrToMove);
4258 LoopBB->erase_instr(I: InstrToSink);
4259 return false;
4260 }
4261
4262 if (!TII->PredicateInstruction(MI&: *InstrToSink, Pred: LCond)) {
4263 // Get rid of previous instruction as well.
4264 PredBB->erase_instr(I: InstrToMove);
4265 LoopBB->erase_instr(I: InstrToSink);
4266 return false;
4267 }
4268 // Can we newify this instruction?
4269 if (NeedToNewify(NewMI: InstrToSink->getIterator(), DepReg: &DepReg, TargetPacket: &*LoopBundle) &&
4270 !isNewifiable(MII: InstrToSink->getIterator(), DepReg, TargetPacket: &*LoopBundle)) {
4271 // Get rid of previous instruction as well.
4272 PredBB->erase_instr(I: InstrToMove);
4273 PredBB->erase_instr(I: InstrToSink);
4274 return false;
4275 }
4276 NewOpcode = QII->getDotNewPredOp(MI: *InstrToSink, MBPI);
4277 // Undo newify if we have a non .new predicated jump we are matching.
4278 if (!QII->isDotNewInst(MI: *LoopCondBranch))
4279 NewOpcode = QII->getDotOldOp(MI: *InstrToSink);
4280 InstrToSink->setDesc(QII->get(Opcode: NewOpcode));
4281 if (!incrementalAddToPacket(MI&: *InstrToSink)) {
4282 // Get rid of previous instruction as well.
4283 PredBB->erase_instr(I: InstrToMove);
4284 LoopBB->erase_instr(I: InstrToSink);
4285 return false;
4286 }
4287
4288 // Remove old instruction.
4289 LoopBB->erase_instr(I: RegMI);
4290 // Set loop alignment to 32.
4291 LoopBB->setAlignment(llvm::Align(32));
4292
4293 LLVM_DEBUG(dbgs() << "Peeled Single BBLoop copy\n");
4294 LLVM_DEBUG(dbgs() << *InstrToMove);
4295 LLVM_DEBUG(dbgs() << *InstrToSink);
4296 LLVM_DEBUG(dbgs() << *PredBB);
4297 LLVM_DEBUG(dbgs() << *LoopBB);
4298 LLVM_DEBUG(dbgs() << *SuccBB);
4299 LLVM_DEBUG(dbgs() << "--- BBLoop ---\n\n");
4300 return true;
4301}
4302
4303bool HexagonGlobalSchedulerImpl::performPullUpCFG(MachineFunction &Fn) {
4304 const Function &F = Fn.getFunction();
4305 // Check for single-block functions and skip them.
4306 if (std::next(x: F.begin()) == F.end())
4307 return false;
4308 bool Changed = false;
4309 LLVM_DEBUG(dbgs() << "****** PullUpCFG **************\n");
4310
4311 // Loop over all basic blocks, asking if 3 consecutive blocks are
4312 // the jump opportunity.
4313 MachineBasicBlock *PrevBlock = NULL;
4314 MachineBasicBlock *JumpBlock = NULL;
4315 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe;
4316 ++MBB) {
4317 MachineBasicBlock *FallBlock = &*MBB;
4318 if (PrevBlock && JumpBlock) {
4319 Changed |= pullUpPeelBBLoop(PredBB: PrevBlock, LoopBB: JumpBlock);
4320 }
4321 PrevBlock = JumpBlock;
4322 JumpBlock = FallBlock;
4323 }
4324 return Changed;
4325}
4326
4327void HexagonGlobalSchedulerImpl::GenUseDefChain(MachineFunction &Fn) {
4328 std::vector<unsigned> Defs;
4329 std::vector<unsigned> Uses;
4330 for (MachineFunction::iterator MBBIter = Fn.begin(); MBBIter != Fn.end();
4331 ++MBBIter) {
4332 for (MachineBasicBlock::instr_iterator MIter = MBBIter->instr_begin();
4333 MIter != MBBIter->instr_end(); ++MIter) {
4334 if (MIter->isBundle() || MIter->isDebugInstr())
4335 continue;
4336 LLVM_DEBUG(dbgs() << "\n\nInserted Ins:"; MIter->dump());
4337 MIUseDefSet(MI: &*MIter, Defs, Uses);
4338 LLVM_DEBUG(dbgs() << "\n\tDefs:";
4339 for (unsigned i = 0; i < Defs.size(); ++i) dbgs()
4340 << printReg(Defs[i], QRI) << ",");
4341 LLVM_DEBUG(dbgs() << "\n\tUses:";
4342 for (unsigned i = 0; i < Uses.size(); ++i) dbgs()
4343 << printReg(Uses[i], QRI) << ",");
4344 MIDefSet[&*MIter] = Defs;
4345 MIUseSet[&*MIter] = Uses;
4346 }
4347 }
4348}
4349
4350// optimizeBranching -
4351// 1. A conditional-jump transfers control to a BB with
4352// jump as the only instruction.
4353// if(p0) jump t1
4354// // ...
4355// t1: jump t2
4356// 2. When a BB with a single conditional jump, jumps to succ-of-succ and
4357// falls-through BB with only jump instruction.
4358// { if(p0) jump t1 }
4359// { jump t2 }
4360// t1: { ... }
4361MachineBasicBlock *HexagonGlobalSchedulerImpl::optimizeBranches(
4362 MachineBasicBlock *MBB, MachineBasicBlock *TBB, MachineInstr *FirstTerm,
4363 MachineBasicBlock *FBB) {
4364 LLVM_DEBUG(dbgs() << "\n\t\t[optimizeBranching]\n");
4365 if ((TBB == MBB) || (FBB == MBB))
4366 LLVM_DEBUG(dbgs() << "Cannot deal with loops in BB#" << MBB->getNumber(););
4367
4368 // LLVM_DEBUG(dbgs() << "\n\t\tTBBMIb:"; MII->dump(););
4369 // { if(p) jump t1; }
4370 // t1: { jump t2; }
4371 // --> { if(p) jump t2
4372 // remove t1: { jump t2; }, if it's address is not taken/not a landing pad.
4373 if (QII->nonDbgBBSize(BB: TBB) == 1) {
4374 MachineInstr *TBBMIb = &*TBB->getFirstNonDebugInstr();
4375 if (TBBMIb->getOpcode() == Hexagon::J2_jump &&
4376 TBBMIb->getOperand(i: 0).isMBB()) {
4377 MachineBasicBlock *NewTarget = TBBMIb->getOperand(i: 0).getMBB();
4378 if (TBB == NewTarget) // Infinite loop.
4379 return NULL;
4380
4381 LLVM_DEBUG(dbgs() << "\nSuboptimal branching in TBB");
4382 // Check if the jump in the last instruction is within range.
4383 int64_t InstOffset =
4384 BlockToInstOffset.find(Val: MBB)->second + QII->nonDbgBBSize(BB: MBB) * 4;
4385 unsigned Distance = (unsigned)std::abs(
4386 i: InstOffset - BlockToInstOffset.find(Val: NewTarget)->second);
4387 if (!QII->isJumpWithinBranchRange(MI: *FirstTerm, offset: Distance)) {
4388 LLVM_DEBUG(dbgs() << "\nUnconditional jump target:" << Distance
4389 << " out of range.");
4390 return NULL;
4391 }
4392 // We need to make sure that the TBB is _not_ also a target for another
4393 // branch. This is suboptimal since theoretically we can update both
4394 // branches.
4395 if (!TBB->hasAddressTaken() && !TBB->isEHPad() && TBB->pred_size() == 1) {
4396 updatePredecessors(MBB&: *TBB, MFBB: NewTarget);
4397 // TBB has only one successor since only one J2_jump instr.
4398 TBB->removeSuccessor(I: TBB->succ_begin());
4399 TBBMIb->removeFromParent();
4400 if (!TBB->empty()) {
4401 // There are only debug instructions in TBB now. Move them to
4402 // the beginning of NewTarget.
4403 NewTarget->splice(Where: NewTarget->getFirstNonPHI(), Other: TBB, From: TBB->begin(),
4404 To: TBB->end());
4405 }
4406 return TBB;
4407 } else {
4408 MBB->ReplaceUsesOfBlockWith(Old: TBB, New: NewTarget);
4409 return NULL;
4410 }
4411 }
4412 }
4413 // { if(p) jump t1; } may contain more instructions
4414 // { jump t2; } --only one instruction
4415 // t1: {...}
4416 // TBB is layout successor of FBB, then we can change the branch target
4417 // for conditional jump and invert the predicate to remove jump t2.
4418 // { if(!p) jump t2; }
4419 // t1: {...}
4420 if (QII->nonDbgBBSize(BB: FBB) == 1) {
4421 MachineInstr *FBBMIb = &*FBB->getFirstNonDebugInstr();
4422 if (FBBMIb->getOpcode() == Hexagon::J2_jump &&
4423 FBBMIb->getOperand(i: 0).isMBB()) {
4424 MachineBasicBlock *NewTarget = FBBMIb->getOperand(i: 0).getMBB();
4425 if (FBB->hasAddressTaken() || FBB->isEHPad() ||
4426 !FBB->isLayoutSuccessor(MBB: TBB) || (FBB == NewTarget /*Infinite loop*/))
4427 return NULL;
4428
4429 LLVM_DEBUG(dbgs() << "\nSuboptimal branching in FBB");
4430 // Check if the jump in the last instruction is within range.
4431 int64_t InstOffset =
4432 BlockToInstOffset.find(Val: MBB)->second + QII->nonDbgBBSize(BB: MBB) * 4;
4433 unsigned Distance = (unsigned)std::abs(
4434 i: InstOffset - BlockToInstOffset.find(Val: NewTarget)->second);
4435 if (!QII->isJumpWithinBranchRange(MI: *FirstTerm, offset: Distance)) {
4436 LLVM_DEBUG(dbgs() << "\nUnconditional jump target:" << Distance
4437 << " out of range.");
4438 return NULL;
4439 }
4440 if (!QII->invertAndChangeJumpTarget(MI&: *FirstTerm, NewTarget))
4441 return NULL;
4442 LLVM_DEBUG(dbgs() << "\nNew instruction:"; FirstTerm->dump(););
4443 updatePredecessors(MBB&: *FBB, MFBB: NewTarget);
4444 // Only one successor remains for FBB
4445 FBB->removeSuccessor(I: FBB->succ_begin());
4446 FBBMIb->removeFromParent();
4447 return FBB;
4448 }
4449 }
4450 return NULL;
4451}
4452
4453// performExposedOptimizations -
4454// look for optimization opportunities after pullup.
4455// e.g. jump to adjacent targets
4456bool HexagonGlobalSchedulerImpl::performExposedOptimizations(
4457 MachineFunction &Fn) {
4458 // Check for single-block functions and skip them.
4459 if (std::next(x: Fn.getFunction().begin()) == Fn.getFunction().end())
4460 return true;
4461 LLVM_DEBUG(dbgs() << "\n\t\t[performExposedOptimizations]\n");
4462 // Erasing the empty basic blocks formed during pullup.
4463 std::vector<MachineBasicBlock *>::iterator ebb = EmptyBBs.begin();
4464 while (ebb != EmptyBBs.end()) {
4465 assert(IsEmptyBlock(*ebb) && "Pullup inserted packets into an empty BB");
4466 LLVM_DEBUG(dbgs() << "Removing BB(" << (*ebb)->getNumber()
4467 << ") from parent.\n");
4468 (*ebb)->eraseFromParent();
4469 ++ebb;
4470 }
4471 MachineBasicBlock *TBB = NULL, *FBB = NULL;
4472 MachineInstr *FirstTerm = NULL, *SecondTerm = NULL;
4473
4474 SmallVector<MachineBasicBlock *, 4> Erase;
4475
4476 for (MachineBasicBlock &MBB : Fn) {
4477 if (MBB.succ_size() > 2 ||
4478 AnalyzeBBBranches(MBB: &MBB, TBB, FirstTerm, FBB, SecondTerm)) {
4479 LLVM_DEBUG(dbgs() << "\nAnalyzeBBBranches failed in BB#"
4480 << MBB.getNumber() << "\n";);
4481 continue;
4482 }
4483 if (FirstTerm && QII->isCompoundBranchInstr(MI: *FirstTerm))
4484 continue;
4485 if (TBB && FirstTerm &&
4486 removeRedundantBranches(MBB: &MBB, TBB, FirstTerm, FBB, SecondTerm)) {
4487 LLVM_DEBUG(dbgs() << "\nRemoved redundant branches in BB#"
4488 << MBB.getNumber(););
4489 continue;
4490 }
4491 if (FirstTerm && SecondTerm &&
4492 optimizeDualJumps(MBB: &MBB, TBB, FirstTerm, FBB, SecondTerm)) {
4493 LLVM_DEBUG(dbgs() << "\nRemoved dual jumps in in BB#"
4494 << MBB.getNumber(););
4495 continue;
4496 }
4497 if (TBB && FBB && FirstTerm && !SecondTerm) {
4498 MachineBasicBlock *MBBToErase =
4499 optimizeBranches(MBB: &MBB, TBB, FirstTerm, FBB);
4500 if (MBBToErase) {
4501 assert(IsEmptyBlock(MBBToErase) && "Erasing non-empty BB");
4502 Erase.push_back(Elt: MBBToErase);
4503 LLVM_DEBUG(dbgs() << "\nOptimized jump from BB#" << MBB.getNumber());
4504 }
4505 }
4506 }
4507 for (MachineBasicBlock *MBB : Erase)
4508 MBB->eraseFromParent();
4509
4510 return false;
4511}
4512
4513// 1. Remove jump to the layout successor.
4514// 2. Remove multiple (dual) jump to the same target.
4515bool HexagonGlobalSchedulerImpl::removeRedundantBranches(
4516 MachineBasicBlock *MBB, MachineBasicBlock *TBB, MachineInstr *FirstTerm,
4517 MachineBasicBlock *FBB, MachineInstr *SecondTerm) {
4518 bool Analyzed = false;
4519 LLVM_DEBUG(dbgs() << "\n\t\t[removeRedundantBranches]\n");
4520 MachineInstr *Head = NULL, *ToErase = NULL;
4521 if (!FBB && (FirstTerm->getOpcode() == Hexagon::J2_jump) &&
4522 MBB->isLayoutSuccessor(MBB: TBB)) {
4523 // Jmp layout_succ_basic_block <-- Remove
4524 LLVM_DEBUG(
4525 dbgs() << "\nRemoving Uncond. jump to the layout successor in BB#"
4526 << MBB->getNumber());
4527 ToErase = FirstTerm;
4528 } else if (SecondTerm && (TBB == FBB) &&
4529 (SecondTerm->getOpcode() == Hexagon::J2_jump)) {
4530 // If both branching instructions in same packet or are consecutive.
4531 // Jmp_c t1 <-- Remove
4532 // Jmp t1
4533 // @Note: If they are in different packets or if they are separated
4534 // by packet(s), this opt. cannot be done.
4535 MachineBasicBlock::instr_iterator FirstTermIter = FirstTerm->getIterator();
4536 MachineBasicBlock::instr_iterator SecondTermIter =
4537 SecondTerm->getIterator();
4538 if (++FirstTermIter == SecondTermIter) {
4539 LLVM_DEBUG(dbgs() << "\nRemoving multiple branching to same target in BB#"
4540 << MBB->getNumber());
4541 // TODO: This might make the `p' register assignment instruction dead.
4542 // and can be removed.
4543 ToErase = FirstTerm;
4544 }
4545 } else if (SecondTerm && (SecondTerm->getOpcode() == Hexagon::J2_jump) &&
4546 FBB && MBB->isLayoutSuccessor(MBB: FBB)) {
4547 // Jmp_c t1
4548 // Jmp layout_succ_basic_block <-- Remove
4549 LLVM_DEBUG(dbgs() << "\nRemoving fall through branch in BB#"
4550 << MBB->getNumber());
4551 ToErase = SecondTerm;
4552 } else if (SecondTerm && QII->PredOpcodeHasJMP_c(Opcode: SecondTerm->getOpcode()) &&
4553 MBB->isLayoutSuccessor(MBB: getBranchDestination(MI: SecondTerm))) {
4554 // Jmp_c t1
4555 // Jmp_c layout_succ_basic_block <-- Remove
4556 // In this case AnalyzeBBBranches might assign FBB to some other BB.
4557 // So using the jump target of SecondTerm to check.
4558 LLVM_DEBUG(dbgs() << "\nRemoving Cond. jump to the layout successor in BB#"
4559 << MBB->getNumber());
4560 ToErase = SecondTerm;
4561 }
4562 // Remove the instruction from the BB
4563 if (ToErase) {
4564 if (ToErase->isBundled()) {
4565 Head = &*getBundleStart(I: ToErase->getIterator());
4566 ToErase->eraseFromBundle();
4567 UpdateBundle(BundleHead: Head);
4568 } else
4569 ToErase->eraseFromParent();
4570 Analyzed = true;
4571 }
4572 return Analyzed;
4573}
4574
4575// ----- convert
4576// p = <expr>
4577// if(p) jump layout_succ_basic_block
4578// jump t
4579// ----- to
4580// p = <expr>
4581// if(!p) jump t
4582// for now only looking at the dual jump
4583bool HexagonGlobalSchedulerImpl::optimizeDualJumps(MachineBasicBlock *MBB,
4584 MachineBasicBlock *TBB,
4585 MachineInstr *FirstTerm,
4586 MachineBasicBlock *FBB,
4587 MachineInstr *SecondTerm) {
4588 LLVM_DEBUG(dbgs() << "\n******* optimizeDualJumps *******");
4589
4590 bool Analyzed = false;
4591
4592 if (QII->PredOpcodeHasJMP_c(Opcode: FirstTerm->getOpcode()) &&
4593 (SecondTerm->getOpcode() == Hexagon::J2_jump)) {
4594
4595 if (TBB == FBB) {
4596 LLVM_DEBUG(dbgs() << "\nBoth successors are the same.");
4597 return Analyzed;
4598 }
4599
4600 // Do not optimize for dual jumps if this MBB
4601 // contains a speculatively pulled-up instruction.
4602 // A speculated instruction is more likely to be at the end of MBB.
4603 MachineBasicBlock::reverse_instr_iterator SII = MBB->instr_rbegin();
4604 while (SII != MBB->instr_rend()) {
4605 MachineInstr *SI = &*SII;
4606 std::map<MachineInstr *, MachineBasicBlock *>::iterator MIMoved;
4607 MIMoved = SpeculatedIns.find(x: SI);
4608 if ((MIMoved != SpeculatedIns.end()) &&
4609 (MIMoved->second != SI->getParent())) {
4610 return Analyzed;
4611 }
4612 ++SII;
4613 }
4614
4615 LLVM_DEBUG(dbgs() << "\nCandidate for jump optimization in BB("
4616 << MBB->getNumber() << ").\n";);
4617
4618 // Predicated jump to layout successor followed by an unconditional jump.
4619 if (MBB->isLayoutSuccessor(MBB: TBB)) {
4620
4621 // Check if the jump in the last instruction is within range.
4622 int64_t InstOffset =
4623 BlockToInstOffset.find(Val: &*MBB)->second + QII->nonDbgBBSize(BB: MBB) * 4;
4624 unsigned Distance =
4625 (unsigned)std::abs(i: InstOffset - BlockToInstOffset.find(Val: FBB)->second) +
4626 SafetyBuffer;
4627 if (!QII->isJumpWithinBranchRange(MI: *FirstTerm, offset: Distance)) {
4628 LLVM_DEBUG(dbgs() << "\nUnconditional jump target:" << Distance
4629 << " out of range.");
4630 return Analyzed;
4631 }
4632
4633 // modify the second last -predicated- instruction (sense and target)
4634 LLVM_DEBUG(dbgs() << "\nFirst Instr:" << *FirstTerm;);
4635 LLVM_DEBUG(dbgs() << "\nSecond Instr:" << *SecondTerm;);
4636 LLVM_DEBUG(dbgs() << "\nOld Succ BB(" << TBB->getNumber() << ").";);
4637
4638 QII->invertAndChangeJumpTarget(MI&: *FirstTerm, NewTarget: FBB);
4639
4640 LLVM_DEBUG(dbgs() << "\nNew First Instruction:" << *FirstTerm;);
4641
4642 // unbundle if there is only one instruction left
4643 MachineInstr *SecondHead, *FirstHead;
4644 FirstHead = FirstTerm->isBundled()
4645 ? &*getBundleStart(I: FirstTerm->getIterator())
4646 : nullptr;
4647 SecondHead = SecondTerm->isBundled()
4648 ? &*getBundleStart(I: SecondTerm->getIterator())
4649 : nullptr;
4650
4651 // 1. Both unbundled, 2. FirstTerm inside bundle, second outside.
4652 if (!SecondHead)
4653 SecondTerm->eraseFromParent();
4654 else if (!FirstHead) {
4655 // 3. FirstHead outside, SecondHead inside.
4656 SecondTerm->eraseFromBundle();
4657 UpdateBundle(BundleHead: SecondHead);
4658 } else if (FirstHead == SecondHead) {
4659 // 4. Both are in the same bundle
4660 assert((FirstHead && SecondHead) && "Unbundled Instruction");
4661 SecondTerm->eraseFromBundle();
4662 if (SecondHead->getBundleSize() < 2)
4663 UpdateBundle(BundleHead: SecondHead);
4664 } else {
4665 // 5. Both are in different bundles
4666 SecondTerm->eraseFromBundle();
4667 UpdateBundle(BundleHead: SecondHead);
4668 }
4669 Analyzed = true;
4670 }
4671 }
4672 return Analyzed;
4673}
4674
4675/// Are there any resources left in this bundle?
4676bool HexagonGlobalSchedulerImpl::ResourcesAvailableInBundle(
4677 BasicBlockRegion *CurrentRegion,
4678 MachineBasicBlock::iterator &TargetPacket) {
4679 MachineBasicBlock::instr_iterator MII = TargetPacket.getInstrIterator();
4680
4681 // If this is a single instruction, form new packet around it.
4682 if (!TargetPacket->isBundle()) {
4683 if (ignoreInstruction(MI: &*MII) || isSoloInstruction(MI: *MII))
4684 return false;
4685
4686 // Before we begin, we need to make sure that we do not
4687 // look at an unconditional jump outside the current region.
4688 if (MII->isBranch() && !isBranchWithinRegion(CurrentRegion, MI: &*MII))
4689 return false;
4690
4691 // Build up state for this new packet.
4692 // Note, we cannot create a bundle header for it,
4693 // so this "bundle" only exist in DFA state, and not in code.
4694 initPacketizerState();
4695 ResourceTracker->clearResources();
4696 CurrentState.addHomeLocation(WorkPoint: MII);
4697 return incrementalAddToPacket(MI&: *MII);
4698 }
4699
4700 MachineBasicBlock::instr_iterator End = MII->getParent()->instr_end();
4701
4702 // Build up state for this packet.
4703 initPacketizerState();
4704 ResourceTracker->clearResources();
4705 CurrentState.addHomeLocation(WorkPoint: MII);
4706
4707 for (++MII; MII != End && MII->isInsideBundle(); ++MII) {
4708 if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
4709 MII->getOpcode() == TargetOpcode::IMPLICIT_DEF ||
4710 MII->getOpcode() == TargetOpcode::CFI_INSTRUCTION || MII->isEHLabel())
4711 continue;
4712
4713 // Before we begin, we need to make sure that we do not
4714 // look at an unconditional jump outside the current region.
4715 // TODO: See if we can profit from handling this kind of cases:
4716 // B#15: derived from LLVM BB %if.then22
4717 // Predecessors according to CFG: BB#13
4718 // BUNDLE %PC<imp-def>, %P2<imp-use,kill>
4719 // * J2_jumpf %P2<kill,internal>, <BB#17>, %PC<imp-def>; flags:
4720 // * J2_jump <BB#18>, %PC<imp-def>; flags:
4721 // Successors according to CFG: BB#18(62) BB#17(62)
4722 // Curently we do not allow them.
4723 if (MII->isBranch() && !isBranchWithinRegion(CurrentRegion, MI: &*MII))
4724 return false;
4725
4726 if (!incrementalAddToPacket(MI&: *MII))
4727 return false;
4728 }
4729 return ResourceTracker->canReserveResources(MI&: *Nop);
4730}
4731
4732/// Symmetrical. See if these two instructions are fit for compound pair.
4733bool HexagonGlobalSchedulerImpl::isCompoundPair(MachineInstr *MIa,
4734 MachineInstr *MIb) {
4735 enum HexagonII::CompoundGroup MIaG = QII->getCompoundCandidateGroup(MI: *MIa),
4736 MIbG = QII->getCompoundCandidateGroup(MI: *MIb);
4737 // We have two candidates - check that this is the same register
4738 // we are talking about.
4739 unsigned Opcb = MIb->getOpcode();
4740 if (MIaG == HexagonII::HCG_C && MIbG == HexagonII::HCG_A &&
4741 (Opcb == Hexagon::A2_tfr || Opcb == Hexagon::A2_tfrsi))
4742 return true;
4743 unsigned Opca = MIa->getOpcode();
4744 if (MIbG == HexagonII::HCG_C && MIaG == HexagonII::HCG_A &&
4745 (Opca == Hexagon::A2_tfr || Opca == Hexagon::A2_tfrsi))
4746 return true;
4747 return (((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) ||
4748 (MIbG == HexagonII::HCG_A && MIaG == HexagonII::HCG_B)) &&
4749 (MIa->getOperand(i: 0).getReg() == MIb->getOperand(i: 0).getReg()));
4750}
4751
4752// This is a weird situation when BB conditionally branches + falls through
4753// to layout successor. \ref bug17792
4754inline bool HexagonGlobalSchedulerImpl::multipleBranchesFromToBB(
4755 MachineBasicBlock *BB) const {
4756 if (BB->succ_size() != 1)
4757 return false;
4758 SmallVector<MachineInstr *, 2> Jumpers = QII->getBranchingInstrs(MBB&: *BB);
4759 return ((Jumpers.size() == 1) && !Jumpers[0]->isUnconditionalBranch());
4760}
4761
4762/// Gather a worklist of MaxCandidates pull-up candidates.
4763/// Compute relative cost.
4764bool HexagonGlobalSchedulerImpl::findPullUpCandidates(
4765 MachineBasicBlock::iterator &WorkPoint,
4766 MachineBasicBlock::iterator &FromHere,
4767 std::vector<MachineInstr *> &backtrack, unsigned MaxCandidates = 1) {
4768
4769 const HexagonInstrInfo *QII = (const HexagonInstrInfo *)TII;
4770 MachineBasicBlock *FromThisBB = FromHere->getParent();
4771 bool MovingDependentOp = false;
4772 signed CostBenefit = 0;
4773
4774 // Do not collect more than that many candidates.
4775 if (CurrentState.haveCandidates() >= MaxCandidates)
4776 return false;
4777
4778 LLVM_DEBUG(dbgs() << "\n\tTry from BB(" << FromThisBB->getNumber() << "):\n";
4779 DumpPacket(FromHere.getInstrIterator()));
4780
4781 if (FromHere->isBundle()) {
4782 MachineBasicBlock::instr_iterator MII = FromHere.getInstrIterator();
4783 for (++MII; MII != FromThisBB->instr_end() && MII->isInsideBundle();
4784 ++MII) {
4785 if (MII->isDebugInstr())
4786 continue;
4787 LLVM_DEBUG(dbgs() << "\tCandidate from BB("
4788 << MII->getParent()->getNumber() << "): ";
4789 MII->dump());
4790
4791 // See if this instruction could be moved.
4792 if (!canThisMIBeMoved(MI: &*MII, WorkPoint, MovingDependentOp, Cost&: CostBenefit))
4793 continue;
4794
4795 MachineBasicBlock::instr_iterator InstrToMove = MII;
4796 if (canAddMIToThisPacket(MI: &*InstrToMove, Bundle&: CurrentState.HomeBundle)) {
4797 CostBenefit -= (backtrack.size() * 4);
4798 // Prefer instructions in empty packets.
4799 CostBenefit += (PacketSize - nonDbgBundleSize(TargetPacket&: FromHere)) * 2;
4800 // Prefer Compares.
4801 if (MII->isCompare())
4802 CostBenefit += 10;
4803 // Check duplex conditions;
4804 for (unsigned i = 0; i < CurrentState.HomeBundle.size(); i++) {
4805 if (QII->isDuplexPair(MIa: *CurrentState.HomeBundle[i], MIb: *MII)) {
4806 LLVM_DEBUG(dbgs() << "\tGot real Duplex (bundle).\n");
4807 CostBenefit += 20;
4808 }
4809 if (isCompoundPair(MIa: CurrentState.HomeBundle[i], MIb: &*MII)) {
4810 LLVM_DEBUG(dbgs() << "\tGot compound (bundle).\n");
4811 CostBenefit += 40;
4812 }
4813 }
4814 // Create a record for this location.
4815 CurrentState.addPullUpCandidate(MII: InstrToMove, HomeBundle: WorkPoint, backtrack,
4816 DependentOp: MovingDependentOp, Cost: CostBenefit);
4817 } else
4818 LLVM_DEBUG(dbgs() << "\tNo resources in the target packet.\n");
4819 }
4820 }
4821 // This is a standalone instruction.
4822 // First see if this MI can even be moved. Cost model for a single instruction
4823 // should be rather different from moving something out of a bundle.
4824 else if (canThisMIBeMoved(MI: &*FromHere, WorkPoint, MovingDependentOp,
4825 Cost&: CostBenefit)) {
4826 MachineBasicBlock::instr_iterator InstrToMove = FromHere.getInstrIterator();
4827 if (canAddMIToThisPacket(MI: &*InstrToMove, Bundle&: CurrentState.HomeBundle)) {
4828 CostBenefit -= (backtrack.size() * 4);
4829 // Prefer Compares.
4830 if (InstrToMove->isCompare())
4831 CostBenefit += 10;
4832 // It is better to pull a single instruction in to a bundle - save
4833 // a cycle immediately.
4834 CostBenefit += 10;
4835 // Search for duplex match.
4836 for (unsigned i = 0; i < CurrentState.HomeBundle.size(); i++) {
4837 if (QII->isDuplexPair(MIa: *CurrentState.HomeBundle[i], MIb: *InstrToMove)) {
4838 LLVM_DEBUG(dbgs() << "\tGot real Duplex (single).\n");
4839 CostBenefit += 30;
4840 }
4841 if (isCompoundPair(MIa: CurrentState.HomeBundle[i], MIb: &*InstrToMove)) {
4842 LLVM_DEBUG(dbgs() << "\tGot compound (single).\n");
4843 CostBenefit += 50;
4844 }
4845 }
4846 // Create a record for this location.
4847 CurrentState.addPullUpCandidate(MII: InstrToMove, HomeBundle: WorkPoint, backtrack,
4848 DependentOp: MovingDependentOp, Cost: CostBenefit);
4849 } else
4850 LLVM_DEBUG(dbgs() << "\tNo resources for single in the target packet.\n");
4851 }
4852 return true;
4853}
4854
4855/// Try to move a candidate MI.
4856/// The move can destroy all iterator system, so we have to drag them
4857/// around to keep them up to date.
4858bool HexagonGlobalSchedulerImpl::tryMultipleInstructions(
4859 MachineBasicBlock::iterator &RetVal, /* output parameter */
4860 std::vector<BasicBlockRegion *>::iterator &CurrentRegion,
4861 MachineBasicBlock::iterator &NextMI,
4862 MachineBasicBlock::iterator &ToThisBBEnd,
4863 MachineBasicBlock::iterator &FromThisBBEnd, bool PathInRegion) {
4864
4865 MachineBasicBlock::instr_iterator MII;
4866 MachineBasicBlock::iterator WorkPoint;
4867 bool MovingDependentOp = false;
4868 std::vector<MachineInstr *> backtrack;
4869
4870 LLVM_DEBUG(dbgs() << "\n\tTry Multiple candidates: \n");
4871
4872 std::sort(first: CurrentState.PullUpCandidates.begin(),
4873 last: CurrentState.PullUpCandidates.end(), comp: PullUpCandidateSorter());
4874 LLVM_DEBUG(CurrentState.dump());
4875 // Iterate through candidates in sorted order.
4876 for (SmallVector<PullUpCandidate *, 4>::iterator
4877 I = CurrentState.PullUpCandidates.begin(),
4878 E = CurrentState.PullUpCandidates.end();
4879 I != E; ++I) {
4880 (*I)->populate(MII, WorkPoint, backtrack, dependentOp&: MovingDependentOp);
4881
4882 MachineBasicBlock *FromThisBB = MII->getParent();
4883 MachineBasicBlock *ToThisBB = WorkPoint->getParent();
4884
4885 LLVM_DEBUG(dbgs() << "\n\tCandidate: "; MII->dump());
4886 LLVM_DEBUG(dbgs() << "\tDependent(" << MovingDependentOp << ") FromBB("
4887 << FromThisBB->getNumber() << ") ToBB("
4888 << ToThisBB->getNumber() << ") to this packet:\n";
4889 DumpPacket(WorkPoint.getInstrIterator()));
4890
4891 MachineBasicBlock::instr_iterator FromHereII = MII;
4892 if (MII->isInsideBundle()) {
4893 while (!FromHereII->isBundle())
4894 --FromHereII;
4895 LLVM_DEBUG(dbgs() << "\tFrom here:\n"; DumpPacket(FromHereII));
4896
4897 MachineBasicBlock::iterator FromHere(FromHereII);
4898 // We have instruction that could be moved from its current position.
4899 if (MoveMItoBundle(CurrentRegion: *CurrentRegion, InstrToMove&: MII, NextMI, TargetPacket&: WorkPoint, SourceLocation&: FromHere,
4900 backtrack, MovingDependentOp, PathInRegion)) {
4901 // If BB from which we pull is now empty, move on.
4902 if (IsEmptyBlock(MBB: FromThisBB)) {
4903 LLVM_DEBUG(dbgs() << "\n\tExhosted BB (bundle).\n");
4904 return false;
4905 }
4906 FromThisBBEnd = FromThisBB->end();
4907 ToThisBBEnd = ToThisBB->end();
4908
4909 LLVM_DEBUG(dbgs() << "\n\tAfter updates(bundle to bundle):\n");
4910 LLVM_DEBUG(dbgs() << "\t\tWorkPoint: ";
4911 DumpPacket(WorkPoint.getInstrIterator()));
4912
4913 // We should not increment current position,
4914 // but rather try one more time to pull from the same bundle.
4915 RetVal = WorkPoint;
4916 return true;
4917 } else
4918 LLVM_DEBUG(dbgs() << "\tCould not move packetized instr.\n");
4919 } else {
4920 MachineBasicBlock::iterator FromHere(FromHereII);
4921 if (MoveMItoBundle(CurrentRegion: *CurrentRegion, InstrToMove&: MII, NextMI, TargetPacket&: WorkPoint, SourceLocation&: FromHere,
4922 backtrack, MovingDependentOp, PathInRegion)) {
4923
4924 // If BB from which we pull is now empty, move on.
4925 if (IsEmptyBlock(MBB: FromThisBB)) {
4926 LLVM_DEBUG(dbgs() << "\n\tExhosted BB (single).\n");
4927 return false;
4928 }
4929 FromThisBBEnd = FromThisBB->end();
4930 ToThisBBEnd = ToThisBB->end();
4931
4932 LLVM_DEBUG(dbgs() << "\tAfter updates (single to bundle):\n");
4933 LLVM_DEBUG(dbgs() << "\t\tWorkPoint: ";
4934 DumpPacket(WorkPoint.getInstrIterator()));
4935 // We should not increment current position,
4936 // but rather try one more time to pull from the same bundle.
4937 RetVal = WorkPoint;
4938 return true;
4939 } else
4940 LLVM_DEBUG(dbgs() << "\tCould not move single.\n");
4941 }
4942 }
4943 LLVM_DEBUG(dbgs() << "\tNot a single candidate fit.\n");
4944 return false;
4945}
4946
4947/// Main function. Iterate all current regions one at a time,
4948/// and look for pull-up opportunities.
4949/// Pseudo sequence:
4950/// - for all bundles and single instructions in region:
4951/// - see if resources are available (in the same cycle) - this is HOME.
4952/// - Starting from next BB in region, find an instruction that could be:
4953/// - removed from its current location
4954/// - added to underutilized bundle (including bundles with only one op)
4955/// - If so, trace path back to HOME and check that candidate could be
4956/// reordered with all the intermediate instructions.
4957bool HexagonGlobalSchedulerImpl::performPullUp() {
4958 std::vector<MachineInstr *> backtrack;
4959 MachineBasicBlock::iterator FromHere;
4960 MachineBasicBlock::iterator FromThisBBEnd;
4961
4962 LLVM_DEBUG(dbgs() << "****** PullUpRegions ***********\n");
4963 // For all regions...
4964 for (std::vector<BasicBlockRegion *>::iterator
4965 CurrentRegion = PullUpRegions.begin(),
4966 E = PullUpRegions.end();
4967 CurrentRegion != E; ++CurrentRegion) {
4968
4969 LLVM_DEBUG(dbgs() << "\n\nRegion with(" << (*CurrentRegion)->size()
4970 << ")BBs\n");
4971
4972 if (!EnableLocalPullUp && (*CurrentRegion)->size() < 2)
4973 continue;
4974
4975 // For all MBB in the region... except the last one.
4976 // ...except when we want to allow local pull-up.
4977 for (auto ToThisBB = (*CurrentRegion)->getRootMBB(),
4978 LastBBInRegion = (*CurrentRegion)->getLastMBB();
4979 ToThisBB != LastBBInRegion; ++ToThisBB) {
4980 // If we do not want to allow same BB pull-up, take an early exit.
4981 if (!EnableLocalPullUp && (std::next(x: ToThisBB) == LastBBInRegion))
4982 break;
4983 if (multipleBranchesFromToBB(BB: *ToThisBB))
4984 break;
4985
4986 auto FromThisBB = ToThisBB;
4987 MachineBasicBlock::iterator ToThisBBEnd = (*ToThisBB)->end();
4988 MachineBasicBlock::iterator MI = (*ToThisBB)->begin();
4989
4990 LLVM_DEBUG(dbgs() << "\n\tHome iterator moved to new BB("
4991 << (*ToThisBB)->getNumber() << ")\n";
4992 (*ToThisBB)->dump());
4993
4994 // For all instructions in the BB.
4995 while (MI != ToThisBBEnd) {
4996 MachineBasicBlock::iterator WorkPoint = MI;
4997 ++MI;
4998
4999 // Trivial check that there are unused resources
5000 // in the current location (cycle).
5001 while (ResourcesAvailableInBundle(CurrentRegion: *CurrentRegion, TargetPacket&: WorkPoint)) {
5002 LLVM_DEBUG(dbgs() << "\nxxxx Next Home in BB("
5003 << (*ToThisBB)->getNumber() << "):\n";
5004 DumpPacket(WorkPoint.getInstrIterator()));
5005 // Keep the path to the candidate.
5006 // It is the traveled path between home and work point.
5007 // Reset it for the new iteration.
5008 backtrack.clear();
5009
5010 // The point of pull-up source (WorkPoint) could begin from the
5011 // current BB, but only if we allow pull-up in the same BB.
5012 // At the moment we do not.
5013 // We also do not process last block in the region,
5014 // so it is safe to always begin with the next BB in the region.
5015 // Start from "next" BB in the region.
5016 if (EnableLocalPullUp) {
5017 FromThisBB = ToThisBB;
5018 FromHere = WorkPoint;
5019 ++FromHere;
5020 FromThisBBEnd = (*FromThisBB)->end();
5021
5022 // Initialize backtrack.
5023 // These are instructions between Home location
5024 // and the WorkPoint.
5025 for (MachineBasicBlock::iterator I = WorkPoint, IE = FromHere;
5026 I != IE; ++I)
5027 backtrack.push_back(x: &*I);
5028 } else {
5029 FromThisBB = ToThisBB;
5030 ++FromThisBB;
5031 FromHere = (*FromThisBB)->begin();
5032 FromThisBBEnd = (*FromThisBB)->end();
5033
5034 // Initialize backtrack.
5035 // These are instructions between Home location
5036 // and the end of the home BB.
5037 for (MachineBasicBlock::iterator I = WorkPoint, IE = ToThisBBEnd;
5038 I != IE; ++I)
5039 backtrack.push_back(x: &*I);
5040 }
5041
5042 // Search for pull-up candidate.
5043 while (true) {
5044 // If this BB is over, move onto the next one
5045 // in this region.
5046 if (FromHere == FromThisBBEnd) {
5047 ++FromThisBB;
5048 // Refresh LastBBInRegion in case tryMultipleInstructions modified
5049 // the regions Elements vector, invalidating the iterator.
5050 LastBBInRegion = (*CurrentRegion)->getLastMBB();
5051 if (FromThisBB == LastBBInRegion)
5052 break;
5053 else {
5054 LLVM_DEBUG(dbgs() << "\n\tNext BB in this region\n";
5055 (*FromThisBB)->dump());
5056 FromThisBBEnd = (*FromThisBB)->end();
5057 FromHere = (*FromThisBB)->begin();
5058 if (FromThisBBEnd == FromHere)
5059 break;
5060 }
5061 }
5062 if ((*FromHere).isDebugInstr()) {
5063 ++FromHere;
5064 continue;
5065 }
5066 // This is a step Home.
5067 backtrack.push_back(x: &*FromHere);
5068 if (!findPullUpCandidates(WorkPoint, FromHere, backtrack,
5069 MaxCandidates: MainCandidateQueueSize))
5070 break;
5071 ++FromHere;
5072 }
5073 // Try to pull-up one of the selected candidates.
5074 if (!tryMultipleInstructions(/*output*/ RetVal&: WorkPoint, CurrentRegion, NextMI&: MI,
5075 ToThisBBEnd, FromThisBBEnd))
5076 break;
5077 }
5078 }
5079 // Refresh LastBBInRegion after potential CFG modifications.
5080 LastBBInRegion = (*CurrentRegion)->getLastMBB();
5081 }
5082 // AllowUnlikelyPath is on by default,
5083 // if we wish to disable it, we can do so here.
5084 if (!AllowUnlikelyPath)
5085 continue;
5086
5087 // We have parsed the likely path through the region.
5088 // Now traverse the other (unlikely) path.
5089 //
5090 // Note: BasicBlockRegion uses a vector for MBB storage, so adding BBs to
5091 // the region while iterating could invalidate iterators. Collect the work
5092 // items first, then process them.
5093 std::vector<std::pair<MachineBasicBlock *, MachineBasicBlock *>>
5094 UnlikelyWork;
5095 UnlikelyWork.reserve(n: (*CurrentRegion)->size());
5096 for (auto ToIt = (*CurrentRegion)->getRootMBB(),
5097 End = (*CurrentRegion)->getLastMBB();
5098 ToIt != End; ++ToIt) {
5099 MachineBasicBlock *ToBB = *ToIt;
5100 MachineBasicBlock *SecondBest = getNextPURBB(MBB: ToBB, SecondBest: true);
5101 if (SecondBest)
5102 UnlikelyWork.emplace_back(args&: ToBB, args&: SecondBest);
5103 }
5104
5105 for (auto [ToBB, SecondBest] : UnlikelyWork) {
5106 LLVM_DEBUG(dbgs() << "\tFor BB:\n"; ToBB->dump());
5107 LLVM_DEBUG(dbgs() << "\tHave SecondBest:\n"; SecondBest->dump());
5108 // Adding this BB to the region should not be done if we
5109 // plan to reuse it(the region) again. For now it is OK.
5110 (*CurrentRegion)->addBBtoRegion(MBB: SecondBest);
5111 LLVM_DEBUG(dbgs() << "\tHome iterator moved to new BB("
5112 << ToBB->getNumber() << ")\n";
5113 ToBB->dump());
5114 MachineBasicBlock::iterator ToThisBBEnd = ToBB->end();
5115 MachineBasicBlock::iterator MI = ToBB->begin();
5116
5117 // For all instructions in the BB.
5118 while (MI != ToThisBBEnd) {
5119 MachineBasicBlock::iterator WorkPoint = MI;
5120 ++MI;
5121
5122 // Trivial check that there are unused resources
5123 // in the current location (cycle).
5124 while (ResourcesAvailableInBundle(CurrentRegion: *CurrentRegion, TargetPacket&: WorkPoint)) {
5125 LLVM_DEBUG(dbgs() << "\nxxxx Second visit Home in BB("
5126 << ToBB->getNumber() << "):\n";
5127 DumpPacket(WorkPoint.getInstrIterator()));
5128
5129 FromHere = SecondBest->begin();
5130 FromThisBBEnd = SecondBest->end();
5131
5132 // Keep the path to the candidate.
5133 backtrack.clear();
5134
5135 // This is Home location.
5136 for (MachineBasicBlock::iterator I = WorkPoint, IE = ToThisBBEnd;
5137 I != IE; ++I)
5138 backtrack.push_back(x: &*I);
5139
5140 while (true) {
5141 // If this BB is over, move onto the next one
5142 // in this region.
5143 if (FromHere == FromThisBBEnd) {
5144 LLVM_DEBUG(dbgs()
5145 << "\tOnly do one successor for the second try\n");
5146 break;
5147 }
5148 if ((*FromHere).isDebugInstr()) {
5149 ++FromHere;
5150 continue;
5151 }
5152 // This is a step Home.
5153 backtrack.push_back(x: &*FromHere);
5154 if (!findPullUpCandidates(WorkPoint, FromHere, backtrack,
5155 MaxCandidates: SecondaryCandidateQueueSize))
5156 break;
5157 ++FromHere;
5158 }
5159 // Try to pull-up one of selected candidate.
5160 if (!tryMultipleInstructions(/*output*/ RetVal&: WorkPoint, CurrentRegion, NextMI&: MI,
5161 ToThisBBEnd, FromThisBBEnd, PathInRegion: false))
5162 break;
5163 }
5164 }
5165 }
5166 }
5167 return true;
5168}
5169
5170bool HexagonGlobalSchedulerImpl::incrementalAddToPacket(MachineInstr &MI) {
5171
5172 LLVM_DEBUG(dbgs() << "\t[AddToPacket] (" << CurrentPacketMIs.size()
5173 << ") adding:\t";
5174 MI.dump());
5175
5176 if (!ResourceTracker->canReserveResources(MI) || !shouldAddToPacket(MI))
5177 return false;
5178
5179 ResourceTracker->reserveResources(MI);
5180 CurrentPacketMIs.push_back(x: &MI);
5181 CurrentState.HomeBundle.push_back(Elt: &MI);
5182
5183 if (QII->isExtended(MI) || QII->isConstExtended(MI) ||
5184 isJumpOutOfRange(MI: &MI)) {
5185 // If at this point of time we cannot reserve resources,
5186 // this might mean that the packet came into the pull-up
5187 // pass already in danger of overflowing.
5188 // Nevertheless, since this is only a possibility of overflow
5189 // no error should be issued here.
5190 if (ResourceTracker->canReserveResources(MI&: *Ext)) {
5191 ResourceTracker->reserveResources(MI&: *Ext);
5192 LLVM_DEBUG(dbgs() << "\t[AddToPacket] (" << CurrentPacketMIs.size()
5193 << ") adding:\t immext_i\n");
5194 CurrentPacketMIs.push_back(x: Ext);
5195 CurrentState.HomeBundle.push_back(Elt: Ext);
5196 return true;
5197 } else {
5198 LLVM_DEBUG(dbgs() << "\t Previous overflow possible.\n");
5199 return false;
5200 }
5201 }
5202 return true;
5203}
5204
5205void HexagonGlobalSchedulerImpl::checkBundleCounts(MachineFunction &Fn) {
5206 if (DisableCheckBundles)
5207 return;
5208
5209 unsigned BundleLimit = 4;
5210
5211 for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
5212 MBBi != MBBe; ++MBBi) {
5213
5214 for (MachineBasicBlock::iterator MI = MBBi->instr_begin(),
5215 ME = MBBi->instr_end();
5216 MI != ME; ++MI) {
5217 if (MI->isBundle()) {
5218 MachineBasicBlock::instr_iterator MII = MI.getInstrIterator();
5219 MachineBasicBlock::instr_iterator End = MII->getParent()->instr_end();
5220
5221 unsigned InstrCount = 0;
5222
5223 for (++MII; MII != End && MII->isInsideBundle(); ++MII) {
5224 if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
5225 MII->getOpcode() == TargetOpcode::IMPLICIT_DEF ||
5226 MII->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
5227 MII->isEHLabel() || QII->isEndLoopN(Opcode: MII->getOpcode())) {
5228 continue;
5229 } else {
5230 InstrCount++;
5231 }
5232 }
5233 if (InstrCount > BundleLimit) {
5234 if (WarnOnBundleSize) {
5235 LLVM_DEBUG(dbgs() << "Warning bundle size exceeded " << *MI);
5236 } else {
5237 assert(0 && "Bundle size exceeded");
5238 }
5239 }
5240 }
5241 }
5242 }
5243}
5244
5245/// Debugging only. Count compound and duplex opportunities.
5246unsigned HexagonGlobalSchedulerImpl::countCompounds(MachineFunction &Fn) {
5247 unsigned CompoundCount = 0;
5248 [[maybe_unused]] unsigned DuplexCount = 0;
5249 [[maybe_unused]] unsigned InstOffset = 0;
5250
5251 // Loop over all basic blocks.
5252 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe;
5253 ++MBB) {
5254 LLVM_DEBUG(dbgs() << "\n BB#" << MBB->getNumber() << " " << MBB->getName()
5255 << " in_func "
5256 << MBB->getParent()->getFunction().getName() << " \n");
5257 for (MachineBasicBlock::iterator MI = MBB->instr_begin(),
5258 ME = MBB->instr_end();
5259 MI != ME; ++MI) {
5260 if (MI->isDebugInstr())
5261 continue;
5262 if (MI->isBundle()) {
5263 MachineBasicBlock::instr_iterator MII = MI.getInstrIterator();
5264 MachineBasicBlock::instr_iterator MIE = MI->getParent()->instr_end();
5265 MachineInstr *FirstCompound = NULL, *SecondCompound = NULL;
5266 MachineInstr *FirstDuplex = NULL, *SecondDuplex = NULL;
5267 LLVM_DEBUG(dbgs() << "{\n");
5268
5269 for (++MII; MII != MIE && MII->isInsideBundle() && !MII->isBundle();
5270 ++MII) {
5271 if (MII->isDebugInstr())
5272 continue;
5273 LLVM_DEBUG(dbgs() << "(" << InstOffset << ")\t");
5274 InstOffset += QII->getSize(MI: *MII);
5275 if (QII->getCompoundCandidateGroup(MI: *MII)) {
5276 if (!FirstCompound) {
5277 FirstCompound = &*MII;
5278 LLVM_DEBUG(dbgs() << "XX ");
5279 } else {
5280 SecondCompound = &*MII;
5281 LLVM_DEBUG(dbgs() << "YY ");
5282 }
5283 }
5284 if (QII->getDuplexCandidateGroup(MI: *MII)) {
5285 if (!FirstDuplex) {
5286 FirstDuplex = &*MII;
5287 LLVM_DEBUG(dbgs() << "AA ");
5288 } else {
5289 SecondDuplex = &*MII;
5290 LLVM_DEBUG(dbgs() << "VV ");
5291 }
5292 }
5293 LLVM_DEBUG(MII->dump());
5294 }
5295 LLVM_DEBUG(dbgs() << "}\n");
5296 if (SecondCompound) {
5297 if (isCompoundPair(MIa: FirstCompound, MIb: SecondCompound)) {
5298 LLVM_DEBUG(dbgs() << "Compound pair (" << CompoundCount << ")\n");
5299 CompoundCount++;
5300 }
5301 }
5302 if (SecondDuplex) {
5303 if (QII->isDuplexPair(MIa: *FirstDuplex, MIb: *SecondDuplex)) {
5304 LLVM_DEBUG(dbgs() << "Duplex pair (" << DuplexCount << ")\n");
5305 DuplexCount++;
5306 }
5307 }
5308 } else {
5309 LLVM_DEBUG(dbgs() << "(" << InstOffset << ")\t");
5310 if (QII->getCompoundCandidateGroup(MI: *MI))
5311 LLVM_DEBUG(dbgs() << "XX ");
5312 if (QII->getDuplexCandidateGroup(MI: *MI))
5313 LLVM_DEBUG(dbgs() << "AA ");
5314 InstOffset += QII->getSize(MI: *MI);
5315 LLVM_DEBUG(MI->dump());
5316 }
5317 }
5318 }
5319 LLVM_DEBUG(dbgs() << "Total compound(" << CompoundCount << ") duplex("
5320 << DuplexCount << ")\n");
5321 return CompoundCount;
5322}
5323
5324//===----------------------------------------------------------------------===//
5325// Public Constructor Functions
5326//===----------------------------------------------------------------------===//
5327
5328FunctionPass *llvm::createHexagonGlobalScheduler() {
5329 return new HexagonGlobalScheduler();
5330}
5331