1//===- ARMConstantIslandPass.cpp - ARM constant islands -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that splits the constant pool up into 'islands'
10// which are scattered through-out the function. This is required due to the
11// limited pc-relative displacements that ARM has.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARM.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBasicBlockInfo.h"
18#include "ARMMachineFunctionInfo.h"
19#include "ARMSubtarget.h"
20#include "MCTargetDesc/ARMBaseInfo.h"
21#include "MVETailPredUtils.h"
22#include "Thumb2InstrInfo.h"
23#include "Utils/ARMBaseInfo.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/CodeGen/LivePhysRegs.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineDominators.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineFunctionPass.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineJumpTableInfo.h"
37#include "llvm/CodeGen/MachineOperand.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/Config/llvm-config.h"
40#include "llvm/IR/DataLayout.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/MC/MCInstrDesc.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/CommandLine.h"
45#include "llvm/Support/Compiler.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/Format.h"
49#include "llvm/Support/raw_ostream.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <iterator>
54#include <vector>
55
56using namespace llvm;
57
58#define DEBUG_TYPE "arm-cp-islands"
59
60#define ARM_CP_ISLANDS_OPT_NAME \
61 "ARM constant island placement and branch shortening pass"
62STATISTIC(NumCPEs, "Number of constpool entries");
63STATISTIC(NumSplit, "Number of uncond branches inserted");
64STATISTIC(NumCBrFixed, "Number of cond branches fixed");
65STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
66STATISTIC(NumTBs, "Number of table branches generated");
67STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
68STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
69STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
70STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
71STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
72STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");
73
74static cl::opt<bool>
75AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(Val: true),
76 cl::desc("Adjust basic block layout to better use TB[BH]"));
77
78static cl::opt<unsigned>
79CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(Val: 30),
80 cl::desc("The max number of iteration for converge"));
81
82static cl::opt<bool> SynthesizeThumb1TBB(
83 "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(Val: true),
84 cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an "
85 "equivalent to the TBB/TBH instructions"));
86
87namespace {
88
89 /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
90 /// requires constant pool entries to be scattered among the instructions
91 /// inside a function. To do this, it completely ignores the normal LLVM
92 /// constant pool; instead, it places constants wherever it feels like with
93 /// special instructions.
94 ///
95 /// The terminology used in this pass includes:
96 /// Islands - Clumps of constants placed in the function.
97 /// Water - Potential places where an island could be formed.
98 /// CPE - A constant pool entry that has been placed somewhere, which
99 /// tracks a list of users.
100 class ARMConstantIslands : public MachineFunctionPass {
101 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
102
103 /// WaterList - A sorted list of basic blocks where islands could be placed
104 /// (i.e. blocks that don't fall through to the following block, due
105 /// to a return, unreachable, or unconditional branch).
106 std::vector<MachineBasicBlock*> WaterList;
107
108 /// NewWaterList - The subset of WaterList that was created since the
109 /// previous iteration by inserting unconditional branches.
110 SmallPtrSet<MachineBasicBlock *, 4> NewWaterList;
111
112 using water_iterator = std::vector<MachineBasicBlock *>::iterator;
113
114 /// CPUser - One user of a constant pool, keeping the machine instruction
115 /// pointer, the constant pool being referenced, and the max displacement
116 /// allowed from the instruction to the CP. The HighWaterMark records the
117 /// highest basic block where a new CPEntry can be placed. To ensure this
118 /// pass terminates, the CP entries are initially placed at the end of the
119 /// function and then move monotonically to lower addresses. The
120 /// exception to this rule is when the current CP entry for a particular
121 /// CPUser is out of range, but there is another CP entry for the same
122 /// constant value in range. We want to use the existing in-range CP
123 /// entry, but if it later moves out of range, the search for new water
124 /// should resume where it left off. The HighWaterMark is used to record
125 /// that point.
126 struct CPUser {
127 MachineInstr *MI;
128 MachineInstr *CPEMI;
129 MachineBasicBlock *HighWaterMark;
130 unsigned MaxDisp;
131 bool NegOk;
132 bool IsSoImm;
133 bool KnownAlignment = false;
134
135 CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
136 bool neg, bool soimm)
137 : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
138 HighWaterMark = CPEMI->getParent();
139 }
140
141 /// getMaxDisp - Returns the maximum displacement supported by MI.
142 /// Correct for unknown alignment.
143 /// Conservatively subtract 2 bytes to handle weird alignment effects.
144 unsigned getMaxDisp() const {
145 return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
146 }
147 };
148
149 /// CPUsers - Keep track of all of the machine instructions that use various
150 /// constant pools and their max displacement.
151 std::vector<CPUser> CPUsers;
152
153 /// CPEntry - One per constant pool entry, keeping the machine instruction
154 /// pointer, the constpool index, and the number of CPUser's which
155 /// reference this entry.
156 struct CPEntry {
157 MachineInstr *CPEMI;
158 unsigned CPI;
159 unsigned RefCount;
160
161 CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
162 : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
163 };
164
165 /// CPEntries - Keep track of all of the constant pool entry machine
166 /// instructions. For each original constpool index (i.e. those that existed
167 /// upon entry to this pass), it keeps a vector of entries. Original
168 /// elements are cloned as we go along; the clones are put in the vector of
169 /// the original element, but have distinct CPIs.
170 ///
171 /// The first half of CPEntries contains generic constants, the second half
172 /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
173 /// which vector it will be in here.
174 std::vector<std::vector<CPEntry>> CPEntries;
175
176 /// Maps a JT index to the offset in CPEntries containing copies of that
177 /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
178 DenseMap<int, int> JumpTableEntryIndices;
179
180 /// Maps a JT index to the LEA that actually uses the index to calculate its
181 /// base address.
182 DenseMap<int, int> JumpTableUserIndices;
183
184 /// ImmBranch - One per immediate branch, keeping the machine instruction
185 /// pointer, conditional or unconditional, the max displacement,
186 /// and (if isCond is true) the corresponding unconditional branch
187 /// opcode.
188 struct ImmBranch {
189 MachineInstr *MI;
190 unsigned MaxDisp : 31;
191 LLVM_PREFERRED_TYPE(bool)
192 unsigned isCond : 1;
193 unsigned UncondBr;
194
195 ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
196 : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
197 };
198
199 /// ImmBranches - Keep track of all the immediate branch instructions.
200 std::vector<ImmBranch> ImmBranches;
201
202 /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
203 SmallVector<MachineInstr*, 4> PushPopMIs;
204
205 /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
206 SmallVector<MachineInstr*, 4> T2JumpTables;
207
208 MachineFunction *MF;
209 MachineConstantPool *MCP;
210 const ARMBaseInstrInfo *TII;
211 const ARMSubtarget *STI;
212 ARMFunctionInfo *AFI;
213 MachineDominatorTree *DT = nullptr;
214 bool isThumb;
215 bool isThumb1;
216 bool isThumb2;
217 bool isPositionIndependentOrROPI;
218
219 public:
220 static char ID;
221
222 ARMConstantIslands() : MachineFunctionPass(ID) {}
223
224 bool runOnMachineFunction(MachineFunction &MF) override;
225
226 void getAnalysisUsage(AnalysisUsage &AU) const override {
227 AU.addRequired<MachineDominatorTreeWrapperPass>();
228 MachineFunctionPass::getAnalysisUsage(AU);
229 }
230
231 MachineFunctionProperties getRequiredProperties() const override {
232 return MachineFunctionProperties().setNoVRegs();
233 }
234
235 StringRef getPassName() const override {
236 return ARM_CP_ISLANDS_OPT_NAME;
237 }
238
239 private:
240 void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
241 void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
242 bool BBHasFallthrough(MachineBasicBlock *MBB);
243 CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
244 Align getCPEAlign(const MachineInstr *CPEMI);
245 void scanFunctionJumpTables();
246 void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
247 MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
248 void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
249 bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
250 unsigned getCombinedIndex(const MachineInstr *CPEMI);
251 int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
252 bool findAvailableWater(CPUser&U, unsigned UserOffset,
253 water_iterator &WaterIter, bool CloserWater);
254 void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
255 MachineBasicBlock *&NewMBB);
256 bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater);
257 void removeDeadCPEMI(MachineInstr *CPEMI);
258 bool removeUnusedCPEntries();
259 bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
260 MachineInstr *CPEMI, unsigned Disp, bool NegOk,
261 bool DoDump = false);
262 bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
263 CPUser &U, unsigned &Growth);
264 bool fixupImmediateBr(ImmBranch &Br);
265 bool fixupConditionalBr(ImmBranch &Br);
266 bool fixupUnconditionalBr(ImmBranch &Br);
267 bool optimizeThumb2Instructions();
268 bool optimizeThumb2Branches();
269 bool reorderThumb2JumpTables();
270 bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
271 unsigned &DeadSize, bool &CanDeleteLEA,
272 bool &BaseRegKill);
273 bool optimizeThumb2JumpTables();
274 MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI,
275 MachineBasicBlock *BB,
276 MachineBasicBlock *JTBB);
277
278 unsigned getUserOffset(CPUser&) const;
279 void dumpBBs();
280 void verify();
281
282 bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
283 unsigned Disp, bool NegativeOK, bool IsSoImm = false);
284 bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
285 const CPUser &U) {
286 return isOffsetInRange(UserOffset, TrialOffset,
287 Disp: U.getMaxDisp(), NegativeOK: U.NegOk, IsSoImm: U.IsSoImm);
288 }
289 };
290
291} // end anonymous namespace
292
293char ARMConstantIslands::ID = 0;
294
295/// verify - check BBOffsets, BBSizes, alignment of islands
296void ARMConstantIslands::verify() {
297#ifndef NDEBUG
298 BBInfoVector &BBInfo = BBUtils->getBBInfo();
299 assert(is_sorted(*MF, [&BBInfo](const MachineBasicBlock &LHS,
300 const MachineBasicBlock &RHS) {
301 return BBInfo[LHS.getNumber()].postOffset() <
302 BBInfo[RHS.getNumber()].postOffset();
303 }));
304 LLVM_DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
305 for (CPUser &U : CPUsers) {
306 unsigned UserOffset = getUserOffset(U);
307 // Verify offset using the real max displacement without the safety
308 // adjustment.
309 if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
310 /* DoDump = */ true)) {
311 LLVM_DEBUG(dbgs() << "OK\n");
312 continue;
313 }
314 LLVM_DEBUG(dbgs() << "Out of range.\n");
315 dumpBBs();
316 LLVM_DEBUG(MF->dump());
317 llvm_unreachable("Constant pool entry out of range!");
318 }
319#endif
320}
321
322#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
323/// print block size and offset information - debugging
324LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
325 LLVM_DEBUG({
326 BBInfoVector &BBInfo = BBUtils->getBBInfo();
327 for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
328 const BasicBlockInfo &BBI = BBInfo[J];
329 dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)
330 << " kb=" << unsigned(BBI.KnownBits)
331 << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign)
332 << format(" size=%#x\n", BBInfo[J].Size);
333 }
334 });
335}
336#endif
337
338// Align blocks where the previous block does not fall through. This may add
339// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
340// measure of how much to align, and only runs at CodeGenOptLevel::Aggressive.
341static bool AlignBlocks(MachineFunction *MF, const ARMSubtarget *STI) {
342 if (MF->getTarget().getOptLevel() != CodeGenOptLevel::Aggressive ||
343 MF->getFunction().hasOptSize())
344 return false;
345
346 auto *TLI = STI->getTargetLowering();
347 const Align Alignment = TLI->getPrefLoopAlignment();
348 if (Alignment < 4)
349 return false;
350
351 bool Changed = false;
352 bool PrevCanFallthough = true;
353 for (auto &MBB : *MF) {
354 if (!PrevCanFallthough) {
355 Changed = true;
356 MBB.setAlignment(Alignment);
357 }
358
359 PrevCanFallthough = MBB.canFallThrough();
360
361 // For LOB's, the ARMLowOverheadLoops pass may remove the unconditional
362 // branch later in the pipeline.
363 if (STI->hasLOB()) {
364 for (const auto &MI : reverse(C: MBB.terminators())) {
365 if (MI.getOpcode() == ARM::t2B &&
366 MI.getOperand(i: 0).getMBB() == MBB.getNextNode())
367 continue;
368 if (isLoopStart(MI) || MI.getOpcode() == ARM::t2LoopEnd ||
369 MI.getOpcode() == ARM::t2LoopEndDec) {
370 PrevCanFallthough = true;
371 break;
372 }
373 // Any other terminator - nothing to do
374 break;
375 }
376 }
377 }
378
379 return Changed;
380}
381
382bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
383 MF = &mf;
384 MCP = mf.getConstantPool();
385 BBUtils = std::make_unique<ARMBasicBlockUtils>(args&: mf);
386
387 LLVM_DEBUG(dbgs() << "***** ARMConstantIslands: "
388 << MCP->getConstants().size() << " CP entries, aligned to "
389 << MCP->getConstantPoolAlign().value() << " bytes *****\n");
390
391 STI = &MF->getSubtarget<ARMSubtarget>();
392 TII = STI->getInstrInfo();
393 isPositionIndependentOrROPI =
394 STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
395 AFI = MF->getInfo<ARMFunctionInfo>();
396 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
397
398 isThumb = AFI->isThumbFunction();
399 isThumb1 = AFI->isThumb1OnlyFunction();
400 isThumb2 = AFI->isThumb2Function();
401
402 bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
403 // TBB generation code in this constant island pass has not been adapted to
404 // deal with speculation barriers.
405 if (STI->hardenSlsRetBr())
406 GenerateTBB = false;
407
408 // Renumber all of the machine basic blocks in the function, guaranteeing that
409 // the numbers agree with the position of the block in the function.
410 MF->RenumberBlocks();
411 DT->updateBlockNumbers();
412
413 // Try to reorder and otherwise adjust the block layout to make good use
414 // of the TB[BH] instructions.
415 bool MadeChange = false;
416 if (GenerateTBB && AdjustJumpTableBlocks) {
417 scanFunctionJumpTables();
418 MadeChange |= reorderThumb2JumpTables();
419 // Data is out of date, so clear it. It'll be re-computed later.
420 T2JumpTables.clear();
421 // Blocks may have shifted around. Keep the numbering up to date.
422 MF->RenumberBlocks();
423 DT->updateBlockNumbers();
424 }
425
426 // Align any non-fallthrough blocks
427 MadeChange |= AlignBlocks(MF, STI);
428
429 // Perform the initial placement of the constant pool entries. To start with,
430 // we put them all at the end of the function.
431 std::vector<MachineInstr*> CPEMIs;
432 if (!MCP->isEmpty())
433 doInitialConstPlacement(CPEMIs);
434
435 if (MF->getJumpTableInfo())
436 doInitialJumpTablePlacement(CPEMIs);
437
438 /// The next UID to take is the first unused one.
439 AFI->initPICLabelUId(UId: CPEMIs.size());
440
441 // Do the initial scan of the function, building up information about the
442 // sizes of each block, the location of all the water, and finding all of the
443 // constant pool users.
444 initializeFunctionInfo(CPEMIs);
445 CPEMIs.clear();
446 LLVM_DEBUG(dumpBBs());
447
448 // Functions with jump tables need an alignment of 4 because they use the ADR
449 // instruction, which aligns the PC to 4 bytes before adding an offset.
450 if (!T2JumpTables.empty())
451 MF->ensureAlignment(A: Align(4));
452
453 /// Remove dead constant pool entries.
454 MadeChange |= removeUnusedCPEntries();
455
456 // Iteratively place constant pool entries and fix up branches until there
457 // is no change.
458 unsigned NoCPIters = 0, NoBRIters = 0;
459 while (true) {
460 LLVM_DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
461 bool CPChange = false;
462 for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
463 // For most inputs, it converges in no more than 5 iterations.
464 // If it doesn't end in 10, the input may have huge BB or many CPEs.
465 // In this case, we will try different heuristics.
466 CPChange |= handleConstantPoolUser(CPUserIndex: i, CloserWater: NoCPIters >= CPMaxIteration / 2);
467 if (CPChange && ++NoCPIters > CPMaxIteration)
468 report_fatal_error(reason: "Constant Island pass failed to converge!");
469 LLVM_DEBUG(dumpBBs());
470
471 // Clear NewWaterList now. If we split a block for branches, it should
472 // appear as "new water" for the next iteration of constant pool placement.
473 NewWaterList.clear();
474
475 LLVM_DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
476 bool BRChange = false;
477 for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
478 // Note: fixupImmediateBr can append to ImmBranches.
479 BRChange |= fixupImmediateBr(Br&: ImmBranches[i]);
480 }
481 if (BRChange && ++NoBRIters > 30)
482 report_fatal_error(reason: "Branch Fix Up pass failed to converge!");
483 LLVM_DEBUG(dumpBBs());
484
485 if (!CPChange && !BRChange)
486 break;
487 MadeChange = true;
488 }
489
490 // Shrink 32-bit Thumb2 load and store instructions.
491 if (isThumb2 && !STI->prefers32BitThumb())
492 MadeChange |= optimizeThumb2Instructions();
493
494 // Shrink 32-bit branch instructions.
495 if (isThumb && STI->hasV8MBaselineOps())
496 MadeChange |= optimizeThumb2Branches();
497
498 // Optimize jump tables using TBB / TBH.
499 if (GenerateTBB && !STI->genExecuteOnly())
500 MadeChange |= optimizeThumb2JumpTables();
501
502 // After a while, this might be made debug-only, but it is not expensive.
503 verify();
504
505 // Save the mapping between original and cloned constpool entries.
506 for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
507 for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
508 const CPEntry & CPE = CPEntries[i][j];
509 if (CPE.CPEMI && CPE.CPEMI->getOperand(i: 1).isCPI())
510 AFI->recordCPEClone(CPIdx: i, CPCloneIdx: CPE.CPI);
511 }
512 }
513
514 LLVM_DEBUG(dbgs() << '\n'; dumpBBs());
515
516 BBUtils->clear();
517 WaterList.clear();
518 CPUsers.clear();
519 CPEntries.clear();
520 JumpTableEntryIndices.clear();
521 JumpTableUserIndices.clear();
522 ImmBranches.clear();
523 PushPopMIs.clear();
524 T2JumpTables.clear();
525
526 return MadeChange;
527}
528
529/// Perform the initial placement of the regular constant pool entries.
530/// To start with, we put them all at the end of the function.
531void
532ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
533 // Create the basic block to hold the CPE's.
534 MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
535 MF->push_back(MBB: BB);
536
537 // MachineConstantPool measures alignment in bytes.
538 const Align MaxAlign = MCP->getConstantPoolAlign();
539 const unsigned MaxLogAlign = Log2(A: MaxAlign);
540
541 // Mark the basic block as required by the const-pool.
542 BB->setAlignment(MaxAlign);
543
544 // The function needs to be as aligned as the basic blocks. The linker may
545 // move functions around based on their alignment.
546 // Special case: halfword literals still need word alignment on the function.
547 Align FuncAlign = MaxAlign;
548 if (MaxAlign == 2)
549 FuncAlign = Align(4);
550 MF->ensureAlignment(A: FuncAlign);
551
552 // Order the entries in BB by descending alignment. That ensures correct
553 // alignment of all entries as long as BB is sufficiently aligned. Keep
554 // track of the insertion point for each alignment. We are going to bucket
555 // sort the entries as they are created.
556 SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxLogAlign + 1,
557 BB->end());
558
559 // Add all of the constants from the constant pool to the end block, use an
560 // identity mapping of CPI's to CPE's.
561 const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
562
563 const DataLayout &TD = MF->getDataLayout();
564 for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
565 unsigned Size = CPs[i].getSizeInBytes(DL: TD);
566 Align Alignment = CPs[i].getAlign();
567 // Verify that all constant pool entries are a multiple of their alignment.
568 // If not, we would have to pad them out so that instructions stay aligned.
569 assert(isAligned(Alignment, Size) && "CP Entry not multiple of 4 bytes!");
570
571 // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
572 unsigned LogAlign = Log2(A: Alignment);
573 MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
574 MachineInstr *CPEMI =
575 BuildMI(BB&: *BB, I: InsAt, MIMD: DebugLoc(), MCID: TII->get(Opcode: ARM::CONSTPOOL_ENTRY))
576 .addImm(Val: i).addConstantPoolIndex(Idx: i).addImm(Val: Size);
577 CPEMIs.push_back(x: CPEMI);
578
579 // Ensure that future entries with higher alignment get inserted before
580 // CPEMI. This is bucket sort with iterators.
581 for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a)
582 if (InsPoint[a] == InsAt)
583 InsPoint[a] = CPEMI;
584
585 // Add a new CPEntry, but no corresponding CPUser yet.
586 CPEntries.emplace_back(args: 1, args: CPEntry(CPEMI, i));
587 ++NumCPEs;
588 LLVM_DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
589 << Size << ", align = " << Alignment.value() << '\n');
590 }
591 LLVM_DEBUG(BB->dump());
592}
593
594/// Do initial placement of the jump tables. Because Thumb2's TBB and TBH
595/// instructions can be made more efficient if the jump table immediately
596/// follows the instruction, it's best to place them immediately next to their
597/// jumps to begin with. In almost all cases they'll never be moved from that
598/// position.
599void ARMConstantIslands::doInitialJumpTablePlacement(
600 std::vector<MachineInstr *> &CPEMIs) {
601 unsigned i = CPEntries.size();
602 auto MJTI = MF->getJumpTableInfo();
603 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
604
605 // Only inline jump tables are placed in the function.
606 if (MJTI->getEntryKind() != MachineJumpTableInfo::EK_Inline)
607 return;
608
609 MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
610 for (MachineBasicBlock &MBB : *MF) {
611 auto MI = MBB.getLastNonDebugInstr();
612 // Look past potential SpeculationBarriers at end of BB.
613 while (MI != MBB.end() &&
614 (isSpeculationBarrierEndBBOpcode(Opc: MI->getOpcode()) ||
615 MI->isDebugInstr()))
616 --MI;
617
618 if (MI == MBB.end())
619 continue;
620
621 unsigned JTOpcode;
622 switch (MI->getOpcode()) {
623 default:
624 continue;
625 case ARM::BR_JTadd:
626 case ARM::BR_JTr:
627 case ARM::tBR_JTr:
628 case ARM::BR_JTm_i12:
629 case ARM::BR_JTm_rs:
630 // These instructions are emitted only in ARM or Thumb1 modes which do not
631 // support PACBTI. Hence we don't add BTI instructions in the destination
632 // blocks.
633 assert(!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement() &&
634 "Branch protection must not be enabled for Arm or Thumb1 modes");
635 JTOpcode = ARM::JUMPTABLE_ADDRS;
636 break;
637 case ARM::t2BR_JT:
638 JTOpcode = ARM::JUMPTABLE_INSTS;
639 break;
640 case ARM::tTBB_JT:
641 case ARM::t2TBB_JT:
642 JTOpcode = ARM::JUMPTABLE_TBB;
643 break;
644 case ARM::tTBH_JT:
645 case ARM::t2TBH_JT:
646 JTOpcode = ARM::JUMPTABLE_TBH;
647 break;
648 }
649
650 unsigned NumOps = MI->getDesc().getNumOperands();
651 MachineOperand JTOp =
652 MI->getOperand(i: NumOps - (MI->isPredicable() ? 2 : 1));
653 unsigned JTI = JTOp.getIndex();
654 unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
655 MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
656 MF->insert(MBBI: std::next(x: MachineFunction::iterator(MBB)), MBB: JumpTableBB);
657 MachineInstr *CPEMI = BuildMI(BB&: *JumpTableBB, I: JumpTableBB->begin(),
658 MIMD: DebugLoc(), MCID: TII->get(Opcode: JTOpcode))
659 .addImm(Val: i++)
660 .addJumpTableIndex(Idx: JTI)
661 .addImm(Val: Size);
662 CPEMIs.push_back(x: CPEMI);
663 CPEntries.emplace_back(args: 1, args: CPEntry(CPEMI, JTI));
664 JumpTableEntryIndices.insert(KV: std::make_pair(x&: JTI, y: CPEntries.size() - 1));
665 if (!LastCorrectlyNumberedBB)
666 LastCorrectlyNumberedBB = &MBB;
667 }
668
669 // If we did anything then we need to renumber the subsequent blocks.
670 if (LastCorrectlyNumberedBB) {
671 MF->RenumberBlocks(MBBFrom: LastCorrectlyNumberedBB);
672 DT->updateBlockNumbers();
673 }
674}
675
676/// BBHasFallthrough - Return true if the specified basic block can fallthrough
677/// into the block immediately after it.
678bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
679 // Get the next machine basic block in the function.
680 MachineFunction::iterator MBBI = MBB->getIterator();
681 // Can't fall off end of function.
682 if (std::next(x: MBBI) == MBB->getParent()->end())
683 return false;
684
685 MachineBasicBlock *NextBB = &*std::next(x: MBBI);
686 if (!MBB->isSuccessor(MBB: NextBB))
687 return false;
688
689 // Try to analyze the end of the block. A potential fallthrough may already
690 // have an unconditional branch for whatever reason.
691 MachineBasicBlock *TBB, *FBB;
692 SmallVector<MachineOperand, 4> Cond;
693 bool TooDifficult = TII->analyzeBranch(MBB&: *MBB, TBB, FBB, Cond);
694 return TooDifficult || FBB == nullptr;
695}
696
697/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
698/// look up the corresponding CPEntry.
699ARMConstantIslands::CPEntry *
700ARMConstantIslands::findConstPoolEntry(unsigned CPI,
701 const MachineInstr *CPEMI) {
702 std::vector<CPEntry> &CPEs = CPEntries[CPI];
703 // Number of entries per constpool index should be small, just do a
704 // linear search.
705 for (CPEntry &CPE : CPEs)
706 if (CPE.CPEMI == CPEMI)
707 return &CPE;
708 return nullptr;
709}
710
711/// getCPEAlign - Returns the required alignment of the constant pool entry
712/// represented by CPEMI.
713Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
714 switch (CPEMI->getOpcode()) {
715 case ARM::CONSTPOOL_ENTRY:
716 break;
717 case ARM::JUMPTABLE_TBB:
718 return isThumb1 ? Align(4) : Align(1);
719 case ARM::JUMPTABLE_TBH:
720 return isThumb1 ? Align(4) : Align(2);
721 case ARM::JUMPTABLE_INSTS:
722 return Align(2);
723 case ARM::JUMPTABLE_ADDRS:
724 return Align(4);
725 default:
726 llvm_unreachable("unknown constpool entry kind");
727 }
728
729 unsigned CPI = getCombinedIndex(CPEMI);
730 assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
731 return MCP->getConstants()[CPI].getAlign();
732}
733
734/// scanFunctionJumpTables - Do a scan of the function, building up
735/// information about the sizes of each block and the locations of all
736/// the jump tables.
737void ARMConstantIslands::scanFunctionJumpTables() {
738 for (MachineBasicBlock &MBB : *MF) {
739 for (MachineInstr &I : MBB)
740 if (I.isBranch() &&
741 (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
742 T2JumpTables.push_back(Elt: &I);
743 }
744}
745
746/// initializeFunctionInfo - Do the initial scan of the function, building up
747/// information about the sizes of each block, the location of all the water,
748/// and finding all of the constant pool users.
749void ARMConstantIslands::
750initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
751
752 BBUtils->computeAllBlockSizes();
753 BBInfoVector &BBInfo = BBUtils->getBBInfo();
754 // The known bits of the entry block offset are determined by the function
755 // alignment.
756 BBInfo.front().KnownBits = Log2(A: MF->getAlignment());
757
758 // Compute block offsets and known bits.
759 BBUtils->adjustBBOffsetsAfter(MBB: &MF->front());
760
761 // We only care about jump table instructions when jump tables are inline.
762 MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
763 bool InlineJumpTables =
764 MJTI && MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline;
765
766 // Now go back through the instructions and build up our data structures.
767 for (MachineBasicBlock &MBB : *MF) {
768 // If this block doesn't fall through into the next MBB, then this is
769 // 'water' that a constant pool island could be placed.
770 if (!BBHasFallthrough(MBB: &MBB))
771 WaterList.push_back(x: &MBB);
772
773 for (MachineInstr &I : MBB) {
774 if (I.isDebugInstr())
775 continue;
776
777 unsigned Opc = I.getOpcode();
778 if (I.isBranch()) {
779 bool isCond = false;
780 unsigned Bits = 0;
781 unsigned Scale = 1;
782 int UOpc = Opc;
783 switch (Opc) {
784 default:
785 continue; // Ignore other JT branches
786 case ARM::t2BR_JT:
787 case ARM::tBR_JTr:
788 if (InlineJumpTables)
789 T2JumpTables.push_back(Elt: &I);
790 continue; // Does not get an entry in ImmBranches
791 case ARM::Bcc:
792 isCond = true;
793 UOpc = ARM::B;
794 [[fallthrough]];
795 case ARM::B:
796 Bits = 24;
797 Scale = 4;
798 break;
799 case ARM::tBcc:
800 isCond = true;
801 UOpc = ARM::tB;
802 Bits = 8;
803 Scale = 2;
804 break;
805 case ARM::tB:
806 Bits = 11;
807 Scale = 2;
808 break;
809 case ARM::t2Bcc:
810 isCond = true;
811 UOpc = ARM::t2B;
812 Bits = 20;
813 Scale = 2;
814 break;
815 case ARM::t2B:
816 Bits = 24;
817 Scale = 2;
818 break;
819 }
820
821 // Record this immediate branch.
822 unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
823 ImmBranches.push_back(x: ImmBranch(&I, MaxOffs, isCond, UOpc));
824 }
825
826 if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
827 PushPopMIs.push_back(Elt: &I);
828
829 if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
830 Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
831 Opc == ARM::JUMPTABLE_TBH)
832 continue;
833
834 // Scan the instructions for constant pool operands.
835 for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op)
836 if (I.getOperand(i: op).isCPI() ||
837 (I.getOperand(i: op).isJTI() && InlineJumpTables)) {
838 // We found one. The addressing mode tells us the max displacement
839 // from the PC that this instruction permits.
840
841 // Basic size info comes from the TSFlags field.
842 unsigned Bits = 0;
843 unsigned Scale = 1;
844 bool NegOk = false;
845 bool IsSoImm = false;
846
847 switch (Opc) {
848 default:
849 llvm_unreachable("Unknown addressing mode for CP reference!");
850
851 // Taking the address of a CP entry.
852 case ARM::LEApcrel:
853 case ARM::LEApcrelJT: {
854 // This takes a SoImm, which is 8 bit immediate rotated. We'll
855 // pretend the maximum offset is 255 * 4. Since each instruction
856 // 4 byte wide, this is always correct. We'll check for other
857 // displacements that fits in a SoImm as well.
858 Bits = 8;
859 NegOk = true;
860 IsSoImm = true;
861 unsigned CPI = I.getOperand(i: op).getIndex();
862 assert(CPI < CPEMIs.size());
863 MachineInstr *CPEMI = CPEMIs[CPI];
864 const Align CPEAlign = getCPEAlign(CPEMI);
865 const unsigned LogCPEAlign = Log2(A: CPEAlign);
866 if (LogCPEAlign >= 2)
867 Scale = 4;
868 else
869 // For constants with less than 4-byte alignment,
870 // we'll pretend the maximum offset is 255 * 1.
871 Scale = 1;
872 }
873 break;
874 case ARM::t2LEApcrel:
875 case ARM::t2LEApcrelJT:
876 Bits = 12;
877 NegOk = true;
878 break;
879 case ARM::tLEApcrel:
880 case ARM::tLEApcrelJT:
881 Bits = 8;
882 Scale = 4;
883 break;
884
885 case ARM::LDRBi12:
886 case ARM::LDRi12:
887 case ARM::LDRcp:
888 case ARM::t2LDRpci:
889 case ARM::t2LDRHpci:
890 case ARM::t2LDRSHpci:
891 case ARM::t2LDRBpci:
892 case ARM::t2LDRSBpci:
893 Bits = 12; // +-offset_12
894 NegOk = true;
895 break;
896
897 case ARM::tLDRpci:
898 Bits = 8;
899 Scale = 4; // +(offset_8*4)
900 break;
901
902 case ARM::VLDRD:
903 case ARM::VLDRS:
904 Bits = 8;
905 Scale = 4; // +-(offset_8*4)
906 NegOk = true;
907 break;
908 case ARM::VLDRH:
909 Bits = 8;
910 Scale = 2; // +-(offset_8*2)
911 NegOk = true;
912 break;
913 }
914
915 // Remember that this is a user of a CP entry.
916 unsigned CPI = I.getOperand(i: op).getIndex();
917 if (I.getOperand(i: op).isJTI()) {
918 JumpTableUserIndices.insert(KV: std::make_pair(x&: CPI, y: CPUsers.size()));
919 CPI = JumpTableEntryIndices[CPI];
920 }
921
922 MachineInstr *CPEMI = CPEMIs[CPI];
923 unsigned MaxOffs = ((1 << Bits)-1) * Scale;
924 CPUsers.push_back(x: CPUser(&I, CPEMI, MaxOffs, NegOk, IsSoImm));
925
926 // Increment corresponding CPEntry reference count.
927 CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
928 assert(CPE && "Cannot find a corresponding CPEntry!");
929 CPE->RefCount++;
930
931 // Instructions can only use one CP entry, don't bother scanning the
932 // rest of the operands.
933 break;
934 }
935 }
936 }
937}
938
939/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
940/// ID.
941static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
942 const MachineBasicBlock *RHS) {
943 return LHS->getNumber() < RHS->getNumber();
944}
945
946/// updateForInsertedWaterBlock - When a block is newly inserted into the
947/// machine function, it upsets all of the block numbers. Renumber the blocks
948/// and update the arrays that parallel this numbering.
949void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
950 // Renumber the MBB's to keep them consecutive.
951 NewBB->getParent()->RenumberBlocks(MBBFrom: NewBB);
952 DT->updateBlockNumbers();
953
954 // Insert an entry into BBInfo to align it properly with the (newly
955 // renumbered) block numbers.
956 BBUtils->insert(BBNum: NewBB->getNumber(), BBI: BasicBlockInfo());
957
958 // Next, update WaterList. Specifically, we need to add NewMBB as having
959 // available water after it.
960 water_iterator IP = llvm::lower_bound(Range&: WaterList, Value&: NewBB, C: CompareMBBNumbers);
961 WaterList.insert(position: IP, x: NewBB);
962}
963
964/// Split the basic block containing MI into two blocks, which are joined by
965/// an unconditional branch. Update data structures and renumber blocks to
966/// account for this change and returns the newly created block.
967MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
968 MachineBasicBlock *OrigBB = MI->getParent();
969
970 // Collect liveness information at MI.
971 LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo());
972 LRs.addLiveOuts(MBB: *OrigBB);
973 auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse();
974 for (MachineInstr &LiveMI : make_range(x: OrigBB->rbegin(), y: LivenessEnd))
975 LRs.stepBackward(MI: LiveMI);
976
977 // Create a new MBB for the code after the OrigBB.
978 MachineBasicBlock *NewBB =
979 MF->CreateMachineBasicBlock(BB: OrigBB->getBasicBlock());
980 MachineFunction::iterator MBBI = ++OrigBB->getIterator();
981 MF->insert(MBBI, MBB: NewBB);
982
983 // Splice the instructions starting with MI over to NewBB.
984 NewBB->splice(Where: NewBB->end(), Other: OrigBB, From: MI, To: OrigBB->end());
985
986 // Add an unconditional branch from OrigBB to NewBB.
987 // Note the new unconditional branch is not being recorded.
988 // There doesn't seem to be meaningful DebugInfo available; this doesn't
989 // correspond to anything in the source.
990 unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
991 if (!isThumb)
992 BuildMI(BB: OrigBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: Opc)).addMBB(MBB: NewBB);
993 else
994 BuildMI(BB: OrigBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: Opc))
995 .addMBB(MBB: NewBB)
996 .add(MOs: predOps(Pred: ARMCC::AL));
997 ++NumSplit;
998
999 // Update the CFG. All succs of OrigBB are now succs of NewBB.
1000 NewBB->transferSuccessors(FromMBB: OrigBB);
1001
1002 // OrigBB branches to NewBB.
1003 OrigBB->addSuccessor(Succ: NewBB);
1004
1005 // Update live-in information in the new block.
1006 MachineRegisterInfo &MRI = MF->getRegInfo();
1007 for (MCPhysReg L : LRs)
1008 if (!MRI.isReserved(PhysReg: L))
1009 NewBB->addLiveIn(PhysReg: L);
1010
1011 // Update internal data structures to account for the newly inserted MBB.
1012 // This is almost the same as updateForInsertedWaterBlock, except that
1013 // the Water goes after OrigBB, not NewBB.
1014 MF->RenumberBlocks(MBBFrom: NewBB);
1015 DT->updateBlockNumbers();
1016
1017 // Insert an entry into BBInfo to align it properly with the (newly
1018 // renumbered) block numbers.
1019 BBUtils->insert(BBNum: NewBB->getNumber(), BBI: BasicBlockInfo());
1020
1021 // Next, update WaterList. Specifically, we need to add OrigMBB as having
1022 // available water after it (but not if it's already there, which happens
1023 // when splitting before a conditional branch that is followed by an
1024 // unconditional branch - in that case we want to insert NewBB).
1025 water_iterator IP = llvm::lower_bound(Range&: WaterList, Value&: OrigBB, C: CompareMBBNumbers);
1026 MachineBasicBlock* WaterBB = *IP;
1027 if (WaterBB == OrigBB)
1028 WaterList.insert(position: std::next(x: IP), x: NewBB);
1029 else
1030 WaterList.insert(position: IP, x: OrigBB);
1031 NewWaterList.insert(Ptr: OrigBB);
1032
1033 // Figure out how large the OrigBB is. As the first half of the original
1034 // block, it cannot contain a tablejump. The size includes
1035 // the new jump we added. (It should be possible to do this without
1036 // recounting everything, but it's very confusing, and this is rarely
1037 // executed.)
1038 BBUtils->computeBlockSize(MBB: OrigBB);
1039
1040 // Figure out how large the NewMBB is. As the second half of the original
1041 // block, it may contain a tablejump.
1042 BBUtils->computeBlockSize(MBB: NewBB);
1043
1044 // All BBOffsets following these blocks must be modified.
1045 BBUtils->adjustBBOffsetsAfter(MBB: OrigBB);
1046
1047 return NewBB;
1048}
1049
1050/// getUserOffset - Compute the offset of U.MI as seen by the hardware
1051/// displacement computation. Update U.KnownAlignment to match its current
1052/// basic block location.
1053unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
1054 unsigned UserOffset = BBUtils->getOffsetOf(MI: U.MI);
1055
1056 SmallVectorImpl<BasicBlockInfo> &BBInfo = BBUtils->getBBInfo();
1057 const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
1058 unsigned KnownBits = BBI.internalKnownBits();
1059
1060 // The value read from PC is offset from the actual instruction address.
1061 UserOffset += (isThumb ? 4 : 8);
1062
1063 // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
1064 // Make sure U.getMaxDisp() returns a constrained range.
1065 U.KnownAlignment = (KnownBits >= 2);
1066
1067 // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
1068 // purposes of the displacement computation; compensate for that here.
1069 // For unknown alignments, getMaxDisp() constrains the range instead.
1070 if (isThumb && U.KnownAlignment)
1071 UserOffset &= ~3u;
1072
1073 return UserOffset;
1074}
1075
1076/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
1077/// reference) is within MaxDisp of TrialOffset (a proposed location of a
1078/// constant pool entry).
1079/// UserOffset is computed by getUserOffset above to include PC adjustments. If
1080/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
1081/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
1082bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
1083 unsigned TrialOffset, unsigned MaxDisp,
1084 bool NegativeOK, bool IsSoImm) {
1085 if (UserOffset <= TrialOffset) {
1086 // User before the Trial.
1087 if (TrialOffset - UserOffset <= MaxDisp)
1088 return true;
1089 // FIXME: Make use full range of soimm values.
1090 } else if (NegativeOK) {
1091 if (UserOffset - TrialOffset <= MaxDisp)
1092 return true;
1093 // FIXME: Make use full range of soimm values.
1094 }
1095 return false;
1096}
1097
1098/// isWaterInRange - Returns true if a CPE placed after the specified
1099/// Water (a basic block) will be in range for the specific MI.
1100///
1101/// Compute how much the function will grow by inserting a CPE after Water.
1102bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
1103 MachineBasicBlock* Water, CPUser &U,
1104 unsigned &Growth) {
1105 BBInfoVector &BBInfo = BBUtils->getBBInfo();
1106 const Align CPEAlign = getCPEAlign(CPEMI: U.CPEMI);
1107 const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(Alignment: CPEAlign);
1108 unsigned NextBlockOffset;
1109 Align NextBlockAlignment;
1110 MachineFunction::const_iterator NextBlock = Water->getIterator();
1111 if (++NextBlock == MF->end()) {
1112 NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
1113 } else {
1114 NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
1115 NextBlockAlignment = NextBlock->getAlignment();
1116 }
1117 unsigned Size = U.CPEMI->getOperand(i: 2).getImm();
1118 unsigned CPEEnd = CPEOffset + Size;
1119
1120 // The CPE may be able to hide in the alignment padding before the next
1121 // block. It may also cause more padding to be required if it is more aligned
1122 // that the next block.
1123 if (CPEEnd > NextBlockOffset) {
1124 Growth = CPEEnd - NextBlockOffset;
1125 // Compute the padding that would go at the end of the CPE to align the next
1126 // block.
1127 Growth += offsetToAlignment(Value: CPEEnd, Alignment: NextBlockAlignment);
1128
1129 // If the CPE is to be inserted before the instruction, that will raise
1130 // the offset of the instruction. Also account for unknown alignment padding
1131 // in blocks between CPE and the user.
1132 if (CPEOffset < UserOffset)
1133 UserOffset += Growth + UnknownPadding(Alignment: MF->getAlignment(), KnownBits: Log2(A: CPEAlign));
1134 } else
1135 // CPE fits in existing padding.
1136 Growth = 0;
1137
1138 return isOffsetInRange(UserOffset, TrialOffset: CPEOffset, U);
1139}
1140
1141/// isCPEntryInRange - Returns true if the distance between specific MI and
1142/// specific ConstPool entry instruction can fit in MI's displacement field.
1143bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
1144 MachineInstr *CPEMI, unsigned MaxDisp,
1145 bool NegOk, bool DoDump) {
1146 unsigned CPEOffset = BBUtils->getOffsetOf(MI: CPEMI);
1147
1148 if (DoDump) {
1149 LLVM_DEBUG({
1150 BBInfoVector &BBInfo = BBUtils->getBBInfo();
1151 unsigned Block = MI->getParent()->getNumber();
1152 const BasicBlockInfo &BBI = BBInfo[Block];
1153 dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
1154 << " max delta=" << MaxDisp
1155 << format(" insn address=%#x", UserOffset) << " in "
1156 << printMBBReference(*MI->getParent()) << ": "
1157 << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
1158 << format("CPE address=%#x offset=%+d: ", CPEOffset,
1159 int(CPEOffset - UserOffset));
1160 });
1161 }
1162
1163 return isOffsetInRange(UserOffset, TrialOffset: CPEOffset, MaxDisp, NegativeOK: NegOk);
1164}
1165
1166#ifndef NDEBUG
1167/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
1168/// unconditionally branches to its only successor.
1169static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
1170 if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
1171 return false;
1172
1173 MachineBasicBlock *Succ = *MBB->succ_begin();
1174 MachineBasicBlock *Pred = *MBB->pred_begin();
1175 MachineInstr *PredMI = &Pred->back();
1176 if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
1177 || PredMI->getOpcode() == ARM::t2B)
1178 return PredMI->getOperand(0).getMBB() == Succ;
1179 return false;
1180}
1181#endif // NDEBUG
1182
1183/// decrementCPEReferenceCount - find the constant pool entry with index CPI
1184/// and instruction CPEMI, and decrement its refcount. If the refcount
1185/// becomes 0 remove the entry and instruction. Returns true if we removed
1186/// the entry, false if we didn't.
1187bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
1188 MachineInstr *CPEMI) {
1189 // Find the old entry. Eliminate it if it is no longer used.
1190 CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
1191 assert(CPE && "Unexpected!");
1192 if (--CPE->RefCount == 0) {
1193 removeDeadCPEMI(CPEMI);
1194 CPE->CPEMI = nullptr;
1195 --NumCPEs;
1196 return true;
1197 }
1198 return false;
1199}
1200
1201unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
1202 if (CPEMI->getOperand(i: 1).isCPI())
1203 return CPEMI->getOperand(i: 1).getIndex();
1204
1205 return JumpTableEntryIndices[CPEMI->getOperand(i: 1).getIndex()];
1206}
1207
1208/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
1209/// if not, see if an in-range clone of the CPE is in range, and if so,
1210/// change the data structures so the user references the clone. Returns:
1211/// 0 = no existing entry found
1212/// 1 = entry found, and there were no code insertions or deletions
1213/// 2 = entry found, and there were code insertions or deletions
1214int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) {
1215 MachineInstr *UserMI = U.MI;
1216 MachineInstr *CPEMI = U.CPEMI;
1217
1218 // Check to see if the CPE is already in-range.
1219 if (isCPEntryInRange(MI: UserMI, UserOffset, CPEMI, MaxDisp: U.getMaxDisp(), NegOk: U.NegOk,
1220 DoDump: true)) {
1221 LLVM_DEBUG(dbgs() << "In range\n");
1222 return 1;
1223 }
1224
1225 // No. Look for previously created clones of the CPE that are in range.
1226 unsigned CPI = getCombinedIndex(CPEMI);
1227 std::vector<CPEntry> &CPEs = CPEntries[CPI];
1228 for (CPEntry &CPE : CPEs) {
1229 // We already tried this one
1230 if (CPE.CPEMI == CPEMI)
1231 continue;
1232 // Removing CPEs can leave empty entries, skip
1233 if (CPE.CPEMI == nullptr)
1234 continue;
1235 if (isCPEntryInRange(MI: UserMI, UserOffset, CPEMI: CPE.CPEMI, MaxDisp: U.getMaxDisp(),
1236 NegOk: U.NegOk)) {
1237 LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPE.CPI
1238 << "\n");
1239 // Point the CPUser node to the replacement
1240 U.CPEMI = CPE.CPEMI;
1241 // Change the CPI in the instruction operand to refer to the clone.
1242 for (MachineOperand &MO : UserMI->operands())
1243 if (MO.isCPI()) {
1244 MO.setIndex(CPE.CPI);
1245 break;
1246 }
1247 // Adjust the refcount of the clone...
1248 CPE.RefCount++;
1249 // ...and the original. If we didn't remove the old entry, none of the
1250 // addresses changed, so we don't need another pass.
1251 return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
1252 }
1253 }
1254 return 0;
1255}
1256
1257/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
1258/// the specific unconditional branch instruction.
1259static inline unsigned getUnconditionalBrDisp(int Opc) {
1260 switch (Opc) {
1261 case ARM::tB:
1262 return ((1<<10)-1)*2;
1263 case ARM::t2B:
1264 return ((1<<23)-1)*2;
1265 default:
1266 break;
1267 }
1268
1269 return ((1<<23)-1)*4;
1270}
1271
1272/// findAvailableWater - Look for an existing entry in the WaterList in which
1273/// we can place the CPE referenced from U so it's within range of U's MI.
1274/// Returns true if found, false if not. If it returns true, WaterIter
1275/// is set to the WaterList entry. For Thumb, prefer water that will not
1276/// introduce padding to water that will. To ensure that this pass
1277/// terminates, the CPE location for a particular CPUser is only allowed to
1278/// move to a lower address, so search backward from the end of the list and
1279/// prefer the first water that is in range.
1280bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
1281 water_iterator &WaterIter,
1282 bool CloserWater) {
1283 if (WaterList.empty())
1284 return false;
1285
1286 unsigned BestGrowth = ~0u;
1287 // The nearest water without splitting the UserBB is right after it.
1288 // If the distance is still large (we have a big BB), then we need to split it
1289 // if we don't converge after certain iterations. This helps the following
1290 // situation to converge:
1291 // BB0:
1292 // Big BB
1293 // BB1:
1294 // Constant Pool
1295 // When a CP access is out of range, BB0 may be used as water. However,
1296 // inserting islands between BB0 and BB1 makes other accesses out of range.
1297 MachineBasicBlock *UserBB = U.MI->getParent();
1298 BBInfoVector &BBInfo = BBUtils->getBBInfo();
1299 const Align CPEAlign = getCPEAlign(CPEMI: U.CPEMI);
1300 unsigned MinNoSplitDisp = BBInfo[UserBB->getNumber()].postOffset(Alignment: CPEAlign);
1301 if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
1302 return false;
1303 for (water_iterator IP = std::prev(x: WaterList.end()), B = WaterList.begin();;
1304 --IP) {
1305 MachineBasicBlock* WaterBB = *IP;
1306 // Check if water is in range and is either at a lower address than the
1307 // current "high water mark" or a new water block that was created since
1308 // the previous iteration by inserting an unconditional branch. In the
1309 // latter case, we want to allow resetting the high water mark back to
1310 // this new water since we haven't seen it before. Inserting branches
1311 // should be relatively uncommon and when it does happen, we want to be
1312 // sure to take advantage of it for all the CPEs near that block, so that
1313 // we don't insert more branches than necessary.
1314 // When CloserWater is true, we try to find the lowest address after (or
1315 // equal to) user MI's BB no matter of padding growth.
1316 unsigned Growth;
1317 if (isWaterInRange(UserOffset, Water: WaterBB, U, Growth) &&
1318 (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
1319 NewWaterList.count(Ptr: WaterBB) || WaterBB == U.MI->getParent()) &&
1320 Growth < BestGrowth) {
1321 // This is the least amount of required padding seen so far.
1322 BestGrowth = Growth;
1323 WaterIter = IP;
1324 LLVM_DEBUG(dbgs() << "Found water after " << printMBBReference(*WaterBB)
1325 << " Growth=" << Growth << '\n');
1326
1327 if (CloserWater && WaterBB == U.MI->getParent())
1328 return true;
1329 // Keep looking unless it is perfect and we're not looking for the lowest
1330 // possible address.
1331 if (!CloserWater && BestGrowth == 0)
1332 return true;
1333 }
1334 if (IP == B)
1335 break;
1336 }
1337 return BestGrowth != ~0u;
1338}
1339
1340/// createNewWater - No existing WaterList entry will work for
1341/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
1342/// block is used if in range, and the conditional branch munged so control
1343/// flow is correct. Otherwise the block is split to create a hole with an
1344/// unconditional branch around it. In either case NewMBB is set to a
1345/// block following which the new island can be inserted (the WaterList
1346/// is not adjusted).
1347void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
1348 unsigned UserOffset,
1349 MachineBasicBlock *&NewMBB) {
1350 CPUser &U = CPUsers[CPUserIndex];
1351 MachineInstr *UserMI = U.MI;
1352 MachineInstr *CPEMI = U.CPEMI;
1353 const Align CPEAlign = getCPEAlign(CPEMI);
1354 MachineBasicBlock *UserMBB = UserMI->getParent();
1355 BBInfoVector &BBInfo = BBUtils->getBBInfo();
1356 const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
1357
1358 // If the block does not end in an unconditional branch already, and if the
1359 // end of the block is within range, make new water there. (The addition
1360 // below is for the unconditional branch we will be adding: 4 bytes on ARM +
1361 // Thumb2, 2 on Thumb1.
1362 if (BBHasFallthrough(MBB: UserMBB)) {
1363 // Size of branch to insert.
1364 unsigned Delta = isThumb1 ? 2 : 4;
1365 // Compute the offset where the CPE will begin.
1366 unsigned CPEOffset = UserBBI.postOffset(Alignment: CPEAlign) + Delta;
1367
1368 if (isOffsetInRange(UserOffset, TrialOffset: CPEOffset, U)) {
1369 LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB)
1370 << format(", expected CPE offset %#x\n", CPEOffset));
1371 NewMBB = &*++UserMBB->getIterator();
1372 // Add an unconditional branch from UserMBB to fallthrough block. Record
1373 // it for branch lengthening; this new branch will not get out of range,
1374 // but if the preceding conditional branch is out of range, the targets
1375 // will be exchanged, and the altered branch may be out of range, so the
1376 // machinery has to know about it.
1377 int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
1378 if (!isThumb)
1379 BuildMI(BB: UserMBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: UncondBr)).addMBB(MBB: NewMBB);
1380 else
1381 BuildMI(BB: UserMBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: UncondBr))
1382 .addMBB(MBB: NewMBB)
1383 .add(MOs: predOps(Pred: ARMCC::AL));
1384 unsigned MaxDisp = getUnconditionalBrDisp(Opc: UncondBr);
1385 ImmBranches.push_back(x: ImmBranch(&UserMBB->back(),
1386 MaxDisp, false, UncondBr));
1387 BBUtils->computeBlockSize(MBB: UserMBB);
1388 BBUtils->adjustBBOffsetsAfter(MBB: UserMBB);
1389 return;
1390 }
1391 }
1392
1393 // What a big block. Find a place within the block to split it. This is a
1394 // little tricky on Thumb1 since instructions are 2 bytes and constant pool
1395 // entries are 4 bytes: if instruction I references island CPE, and
1396 // instruction I+1 references CPE', it will not work well to put CPE as far
1397 // forward as possible, since then CPE' cannot immediately follow it (that
1398 // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
1399 // need to create a new island. So, we make a first guess, then walk through
1400 // the instructions between the one currently being looked at and the
1401 // possible insertion point, and make sure any other instructions that
1402 // reference CPEs will be able to use the same island area; if not, we back
1403 // up the insertion point.
1404
1405 // Try to split the block so it's fully aligned. Compute the latest split
1406 // point where we can add a 4-byte branch instruction, and then align to
1407 // Align which is the largest possible alignment in the function.
1408 const Align Align = MF->getAlignment();
1409 assert(Align >= CPEAlign && "Over-aligned constant pool entry");
1410 unsigned KnownBits = UserBBI.internalKnownBits();
1411 unsigned UPad = UnknownPadding(Alignment: Align, KnownBits);
1412 unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
1413 LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x",
1414 BaseInsertOffset));
1415
1416 // The 4 in the following is for the unconditional branch we'll be inserting
1417 // (allows for long branch on Thumb1). Alignment of the island is handled
1418 // inside isOffsetInRange.
1419 BaseInsertOffset -= 4;
1420
1421 LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
1422 << " la=" << Log2(Align) << " kb=" << KnownBits
1423 << " up=" << UPad << '\n');
1424
1425 // This could point off the end of the block if we've already got constant
1426 // pool entries following this block; only the last one is in the water list.
1427 // Back past any possible branches (allow for a conditional and a maximally
1428 // long unconditional).
1429 if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
1430 // Ensure BaseInsertOffset is larger than the offset of the instruction
1431 // following UserMI so that the loop which searches for the split point
1432 // iterates at least once.
1433 BaseInsertOffset =
1434 std::max(a: UserBBI.postOffset() - UPad - 8,
1435 b: UserOffset + TII->getInstSizeInBytes(MI: *UserMI) + 1);
1436 // If the CP is referenced(ie, UserOffset) is in first four instructions
1437 // after IT, this recalculated BaseInsertOffset could be in the middle of
1438 // an IT block. If it is, change the BaseInsertOffset to just after the
1439 // IT block. This still make the CP Entry is in range becuase of the
1440 // following reasons.
1441 // 1. The initial BaseseInsertOffset calculated is (UserOffset +
1442 // U.getMaxDisp() - UPad).
1443 // 2. An IT block is only at most 4 instructions plus the "it" itself (18
1444 // bytes).
1445 // 3. All the relevant instructions support much larger Maximum
1446 // displacement.
1447 MachineBasicBlock::iterator I = UserMI;
1448 ++I;
1449 Register PredReg;
1450 for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(MI: *UserMI);
1451 I->getOpcode() != ARM::t2IT &&
1452 getITInstrPredicate(MI: *I, PredReg) != ARMCC::AL;
1453 Offset += TII->getInstSizeInBytes(MI: *I), I = std::next(x: I)) {
1454 BaseInsertOffset =
1455 std::max(a: BaseInsertOffset, b: Offset + TII->getInstSizeInBytes(MI: *I) + 1);
1456 assert(I != UserMBB->end() && "Fell off end of block");
1457 }
1458 LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
1459 }
1460 unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
1461 CPEMI->getOperand(i: 2).getImm();
1462 MachineBasicBlock::iterator MI = UserMI;
1463 ++MI;
1464 unsigned CPUIndex = CPUserIndex+1;
1465 unsigned NumCPUsers = CPUsers.size();
1466 MachineInstr *LastIT = nullptr;
1467 for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(MI: *UserMI);
1468 Offset < BaseInsertOffset;
1469 Offset += TII->getInstSizeInBytes(MI: *MI), MI = std::next(x: MI)) {
1470 assert(MI != UserMBB->end() && "Fell off end of block");
1471 if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) {
1472 CPUser &U = CPUsers[CPUIndex];
1473 if (!isOffsetInRange(UserOffset: Offset, TrialOffset: EndInsertOffset, U)) {
1474 // Shift intertion point by one unit of alignment so it is within reach.
1475 BaseInsertOffset -= Align.value();
1476 EndInsertOffset -= Align.value();
1477 }
1478 // This is overly conservative, as we don't account for CPEMIs being
1479 // reused within the block, but it doesn't matter much. Also assume CPEs
1480 // are added in order with alignment padding. We may eventually be able
1481 // to pack the aligned CPEs better.
1482 EndInsertOffset += U.CPEMI->getOperand(i: 2).getImm();
1483 CPUIndex++;
1484 }
1485
1486 // Remember the last IT instruction.
1487 if (MI->getOpcode() == ARM::t2IT)
1488 LastIT = &*MI;
1489 }
1490
1491 --MI;
1492
1493 // Avoid splitting an IT block.
1494 if (LastIT) {
1495 Register PredReg;
1496 ARMCC::CondCodes CC = getITInstrPredicate(MI: *MI, PredReg);
1497 if (CC != ARMCC::AL)
1498 MI = LastIT;
1499 }
1500
1501 // Avoid splitting a MOVW+MOVT pair with a relocation on Windows.
1502 // On Windows, this instruction pair is covered by one single
1503 // IMAGE_REL_ARM_MOV32T relocation which covers both instructions. If a
1504 // constant island is injected inbetween them, the relocation will clobber
1505 // the instruction and fail to update the MOVT instruction.
1506 // (These instructions are bundled up until right before the ConstantIslands
1507 // pass.)
1508 if (STI->isTargetWindows() && isThumb && MI->getOpcode() == ARM::t2MOVTi16 &&
1509 (MI->getOperand(i: 2).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
1510 ARMII::MO_HI16) {
1511 --MI;
1512 assert(MI->getOpcode() == ARM::t2MOVi16 &&
1513 (MI->getOperand(1).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
1514 ARMII::MO_LO16);
1515 }
1516
1517 // We really must not split an IT block.
1518#ifndef NDEBUG
1519 Register PredReg;
1520 assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL);
1521#endif
1522 NewMBB = splitBlockBeforeInstr(MI: &*MI);
1523}
1524
1525/// handleConstantPoolUser - Analyze the specified user, checking to see if it
1526/// is out-of-range. If so, pick up the constant pool value and move it some
1527/// place in-range. Return true if we changed any addresses (thus must run
1528/// another pass of branch lengthening), false otherwise.
1529bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
1530 bool CloserWater) {
1531 CPUser &U = CPUsers[CPUserIndex];
1532 MachineInstr *UserMI = U.MI;
1533 MachineInstr *CPEMI = U.CPEMI;
1534 unsigned CPI = getCombinedIndex(CPEMI);
1535 unsigned Size = CPEMI->getOperand(i: 2).getImm();
1536 // Compute this only once, it's expensive.
1537 unsigned UserOffset = getUserOffset(U);
1538
1539 // See if the current entry is within range, or there is a clone of it
1540 // in range.
1541 int result = findInRangeCPEntry(U, UserOffset);
1542 if (result==1) return false;
1543 else if (result==2) return true;
1544
1545 // No existing clone of this CPE is within range.
1546 // We will be generating a new clone. Get a UID for it.
1547 unsigned ID = AFI->createPICLabelUId();
1548
1549 // Look for water where we can place this CPE.
1550 MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
1551 MachineBasicBlock *NewMBB;
1552 water_iterator IP;
1553 if (findAvailableWater(U, UserOffset, WaterIter&: IP, CloserWater)) {
1554 LLVM_DEBUG(dbgs() << "Found water in range\n");
1555 MachineBasicBlock *WaterBB = *IP;
1556
1557 // If the original WaterList entry was "new water" on this iteration,
1558 // propagate that to the new island. This is just keeping NewWaterList
1559 // updated to match the WaterList, which will be updated below.
1560 if (NewWaterList.erase(Ptr: WaterBB))
1561 NewWaterList.insert(Ptr: NewIsland);
1562
1563 // The new CPE goes before the following block (NewMBB).
1564 NewMBB = &*++WaterBB->getIterator();
1565 } else {
1566 // No water found.
1567 LLVM_DEBUG(dbgs() << "No water found\n");
1568 createNewWater(CPUserIndex, UserOffset, NewMBB);
1569
1570 // splitBlockBeforeInstr adds to WaterList, which is important when it is
1571 // called while handling branches so that the water will be seen on the
1572 // next iteration for constant pools, but in this context, we don't want
1573 // it. Check for this so it will be removed from the WaterList.
1574 // Also remove any entry from NewWaterList.
1575 MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
1576 IP = find(Range&: WaterList, Val: WaterBB);
1577 if (IP != WaterList.end())
1578 NewWaterList.erase(Ptr: WaterBB);
1579
1580 // We are adding new water. Update NewWaterList.
1581 NewWaterList.insert(Ptr: NewIsland);
1582 }
1583 // Always align the new block because CP entries can be smaller than 4
1584 // bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may
1585 // be an already aligned constant pool block.
1586 const Align Alignment = isThumb ? Align(2) : Align(4);
1587 if (NewMBB->getAlignment() < Alignment)
1588 NewMBB->setAlignment(Alignment);
1589
1590 // Remove the original WaterList entry; we want subsequent insertions in
1591 // this vicinity to go after the one we're about to insert. This
1592 // considerably reduces the number of times we have to move the same CPE
1593 // more than once and is also important to ensure the algorithm terminates.
1594 if (IP != WaterList.end())
1595 WaterList.erase(position: IP);
1596
1597 // Okay, we know we can put an island before NewMBB now, do it!
1598 MF->insert(MBBI: NewMBB->getIterator(), MBB: NewIsland);
1599
1600 // Update internal data structures to account for the newly inserted MBB.
1601 updateForInsertedWaterBlock(NewBB: NewIsland);
1602
1603 // Now that we have an island to add the CPE to, clone the original CPE and
1604 // add it to the island.
1605 U.HighWaterMark = NewIsland;
1606 U.CPEMI = BuildMI(BB: NewIsland, MIMD: DebugLoc(), MCID: CPEMI->getDesc())
1607 .addImm(Val: ID)
1608 .add(MO: CPEMI->getOperand(i: 1))
1609 .addImm(Val: Size);
1610 CPEntries[CPI].push_back(x: CPEntry(U.CPEMI, ID, 1));
1611 ++NumCPEs;
1612
1613 // Decrement the old entry, and remove it if refcount becomes 0.
1614 decrementCPEReferenceCount(CPI, CPEMI);
1615
1616 // Mark the basic block as aligned as required by the const-pool entry.
1617 NewIsland->setAlignment(getCPEAlign(CPEMI: U.CPEMI));
1618
1619 // Increase the size of the island block to account for the new entry.
1620 BBUtils->adjustBBSize(MBB: NewIsland, Size);
1621 BBUtils->adjustBBOffsetsAfter(MBB: &*--NewIsland->getIterator());
1622
1623 // Finally, change the CPI in the instruction operand to be ID.
1624 for (MachineOperand &MO : UserMI->operands())
1625 if (MO.isCPI()) {
1626 MO.setIndex(ID);
1627 break;
1628 }
1629
1630 LLVM_DEBUG(
1631 dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
1632 << format(" offset=%#x\n",
1633 BBUtils->getBBInfo()[NewIsland->getNumber()].Offset));
1634
1635 return true;
1636}
1637
1638/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
1639/// sizes and offsets of impacted basic blocks.
1640void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
1641 MachineBasicBlock *CPEBB = CPEMI->getParent();
1642 unsigned Size = CPEMI->getOperand(i: 2).getImm();
1643 CPEMI->eraseFromParent();
1644 BBInfoVector &BBInfo = BBUtils->getBBInfo();
1645 BBUtils->adjustBBSize(MBB: CPEBB, Size: -Size);
1646 // All succeeding offsets have the current size value added in, fix this.
1647 if (CPEBB->empty()) {
1648 BBInfo[CPEBB->getNumber()].Size = 0;
1649
1650 // This block no longer needs to be aligned.
1651 CPEBB->setAlignment(Align(1));
1652 } else {
1653 // Entries are sorted by descending alignment, so realign from the front.
1654 CPEBB->setAlignment(getCPEAlign(CPEMI: &*CPEBB->begin()));
1655 }
1656
1657 BBUtils->adjustBBOffsetsAfter(MBB: CPEBB);
1658 // An island has only one predecessor BB and one successor BB. Check if
1659 // this BB's predecessor jumps directly to this BB's successor. This
1660 // shouldn't happen currently.
1661 assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
1662 // FIXME: remove the empty blocks after all the work is done?
1663}
1664
1665/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
1666/// are zero.
1667bool ARMConstantIslands::removeUnusedCPEntries() {
1668 unsigned MadeChange = false;
1669 for (std::vector<CPEntry> &CPEs : CPEntries) {
1670 for (CPEntry &CPE : CPEs) {
1671 if (CPE.RefCount == 0 && CPE.CPEMI) {
1672 removeDeadCPEMI(CPEMI: CPE.CPEMI);
1673 CPE.CPEMI = nullptr;
1674 MadeChange = true;
1675 }
1676 }
1677 }
1678 return MadeChange;
1679}
1680
1681
1682/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
1683/// away to fit in its displacement field.
1684bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
1685 MachineInstr *MI = Br.MI;
1686 MachineBasicBlock *DestBB = MI->getOperand(i: 0).getMBB();
1687
1688 // Check to see if the DestBB is already in-range.
1689 if (BBUtils->isBBInRange(MI, DestBB, MaxDisp: Br.MaxDisp))
1690 return false;
1691
1692 if (!Br.isCond)
1693 return fixupUnconditionalBr(Br);
1694 return fixupConditionalBr(Br);
1695}
1696
1697/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
1698/// too far away to fit in its displacement field. If the LR register has been
1699/// spilled in the epilogue, then we can use BL to implement a far jump.
1700/// Otherwise, add an intermediate branch instruction to a branch.
1701bool
1702ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
1703 MachineInstr *MI = Br.MI;
1704 MachineBasicBlock *MBB = MI->getParent();
1705 if (!isThumb1)
1706 llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
1707
1708 if (!AFI->isLRSpilled())
1709 report_fatal_error(reason: "underestimated function size");
1710
1711 // Use BL to implement far jump.
1712 Br.MaxDisp = (1 << 21) * 2;
1713 MI->setDesc(TII->get(Opcode: ARM::tBfar));
1714 BBInfoVector &BBInfo = BBUtils->getBBInfo();
1715 BBInfo[MBB->getNumber()].Size += 2;
1716 BBUtils->adjustBBOffsetsAfter(MBB);
1717 ++NumUBrFixed;
1718
1719 LLVM_DEBUG(dbgs() << " Changed B to long jump " << *MI);
1720
1721 return true;
1722}
1723
1724/// fixupConditionalBr - Fix up a conditional branch whose destination is too
1725/// far away to fit in its displacement field. It is converted to an inverse
1726/// conditional branch + an unconditional branch to the destination.
1727bool
1728ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
1729 MachineInstr *MI = Br.MI;
1730 MachineBasicBlock *DestBB = MI->getOperand(i: 0).getMBB();
1731
1732 // Add an unconditional branch to the destination and invert the branch
1733 // condition to jump over it:
1734 // blt L1
1735 // =>
1736 // bge L2
1737 // b L1
1738 // L2:
1739 ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(i: 1).getImm();
1740 CC = ARMCC::getOppositeCondition(CC);
1741 Register CCReg = MI->getOperand(i: 2).getReg();
1742
1743 // If the branch is at the end of its MBB and that has a fall-through block,
1744 // direct the updated conditional branch to the fall-through block. Otherwise,
1745 // split the MBB before the next instruction.
1746 MachineBasicBlock *MBB = MI->getParent();
1747 MachineInstr *BMI = &MBB->back();
1748 bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
1749
1750 ++NumCBrFixed;
1751 if (BMI != MI) {
1752 if (std::next(x: MachineBasicBlock::iterator(MI)) == std::prev(x: MBB->end()) &&
1753 BMI->getOpcode() == Br.UncondBr) {
1754 // Last MI in the BB is an unconditional branch. Can we simply invert the
1755 // condition and swap destinations:
1756 // beq L1
1757 // b L2
1758 // =>
1759 // bne L2
1760 // b L1
1761 MachineBasicBlock *NewDest = BMI->getOperand(i: 0).getMBB();
1762 if (BBUtils->isBBInRange(MI, DestBB: NewDest, MaxDisp: Br.MaxDisp)) {
1763 LLVM_DEBUG(
1764 dbgs() << " Invert Bcc condition and swap its destination with "
1765 << *BMI);
1766 BMI->getOperand(i: 0).setMBB(DestBB);
1767 MI->getOperand(i: 0).setMBB(NewDest);
1768 MI->getOperand(i: 1).setImm(CC);
1769 return true;
1770 }
1771 }
1772 }
1773
1774 if (NeedSplit) {
1775 splitBlockBeforeInstr(MI);
1776 // No need for the branch to the next block. We're adding an unconditional
1777 // branch to the destination.
1778 int delta = TII->getInstSizeInBytes(MI: MBB->back());
1779 BBUtils->adjustBBSize(MBB, Size: -delta);
1780 MBB->back().eraseFromParent();
1781
1782 // The conditional successor will be swapped between the BBs after this, so
1783 // update CFG.
1784 MBB->addSuccessor(Succ: DestBB);
1785 std::next(x: MBB->getIterator())->removeSuccessor(Succ: DestBB);
1786
1787 // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
1788 }
1789 MachineBasicBlock *NextBB = &*++MBB->getIterator();
1790
1791 LLVM_DEBUG(dbgs() << " Insert B to " << printMBBReference(*DestBB)
1792 << " also invert condition and change dest. to "
1793 << printMBBReference(*NextBB) << "\n");
1794
1795 // Insert a new conditional branch and a new unconditional branch.
1796 // Also update the ImmBranch as well as adding a new entry for the new branch.
1797 BuildMI(BB: MBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: MI->getOpcode()))
1798 .addMBB(MBB: NextBB).addImm(Val: CC).addReg(RegNo: CCReg);
1799 Br.MI = &MBB->back();
1800 BBUtils->adjustBBSize(MBB, Size: TII->getInstSizeInBytes(MI: MBB->back()));
1801 if (isThumb)
1802 BuildMI(BB: MBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: Br.UncondBr))
1803 .addMBB(MBB: DestBB)
1804 .add(MOs: predOps(Pred: ARMCC::AL));
1805 else
1806 BuildMI(BB: MBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: Br.UncondBr)).addMBB(MBB: DestBB);
1807 BBUtils->adjustBBSize(MBB, Size: TII->getInstSizeInBytes(MI: MBB->back()));
1808 unsigned MaxDisp = getUnconditionalBrDisp(Opc: Br.UncondBr);
1809 ImmBranches.push_back(x: ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
1810
1811 // Remove the old conditional branch. It may or may not still be in MBB.
1812 BBUtils->adjustBBSize(MBB: MI->getParent(), Size: -TII->getInstSizeInBytes(MI: *MI));
1813 MI->eraseFromParent();
1814 BBUtils->adjustBBOffsetsAfter(MBB);
1815 return true;
1816}
1817
1818bool ARMConstantIslands::optimizeThumb2Instructions() {
1819 bool MadeChange = false;
1820
1821 // Shrink ADR and LDR from constantpool.
1822 for (CPUser &U : CPUsers) {
1823 unsigned Opcode = U.MI->getOpcode();
1824 unsigned NewOpc = 0;
1825 unsigned Scale = 1;
1826 unsigned Bits = 0;
1827 switch (Opcode) {
1828 default: break;
1829 case ARM::t2LEApcrel:
1830 if (isARMLowRegister(Reg: U.MI->getOperand(i: 0).getReg())) {
1831 NewOpc = ARM::tLEApcrel;
1832 Bits = 8;
1833 Scale = 4;
1834 }
1835 break;
1836 case ARM::t2LDRpci:
1837 if (isARMLowRegister(Reg: U.MI->getOperand(i: 0).getReg())) {
1838 NewOpc = ARM::tLDRpci;
1839 Bits = 8;
1840 Scale = 4;
1841 }
1842 break;
1843 }
1844
1845 if (!NewOpc)
1846 continue;
1847
1848 unsigned UserOffset = getUserOffset(U);
1849 unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
1850
1851 // Be conservative with inline asm.
1852 if (!U.KnownAlignment)
1853 MaxOffs -= 2;
1854
1855 // FIXME: Check if offset is multiple of scale if scale is not 4.
1856 if (isCPEntryInRange(MI: U.MI, UserOffset, CPEMI: U.CPEMI, MaxDisp: MaxOffs, NegOk: false, DoDump: true)) {
1857 LLVM_DEBUG(dbgs() << "Shrink: " << *U.MI);
1858 U.MI->setDesc(TII->get(Opcode: NewOpc));
1859 MachineBasicBlock *MBB = U.MI->getParent();
1860 BBUtils->adjustBBSize(MBB, Size: -2);
1861 BBUtils->adjustBBOffsetsAfter(MBB);
1862 ++NumT2CPShrunk;
1863 MadeChange = true;
1864 }
1865 }
1866
1867 return MadeChange;
1868}
1869
1870
1871bool ARMConstantIslands::optimizeThumb2Branches() {
1872
1873 auto TryShrinkBranch = [this](ImmBranch &Br) {
1874 unsigned Opcode = Br.MI->getOpcode();
1875 unsigned NewOpc = 0;
1876 unsigned Scale = 1;
1877 unsigned Bits = 0;
1878 switch (Opcode) {
1879 default: break;
1880 case ARM::t2B:
1881 NewOpc = ARM::tB;
1882 Bits = 11;
1883 Scale = 2;
1884 break;
1885 case ARM::t2Bcc:
1886 NewOpc = ARM::tBcc;
1887 Bits = 8;
1888 Scale = 2;
1889 break;
1890 }
1891 if (NewOpc) {
1892 unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
1893 MachineBasicBlock *DestBB = Br.MI->getOperand(i: 0).getMBB();
1894 if (BBUtils->isBBInRange(MI: Br.MI, DestBB, MaxDisp: MaxOffs)) {
1895 LLVM_DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
1896 Br.MI->setDesc(TII->get(Opcode: NewOpc));
1897 MachineBasicBlock *MBB = Br.MI->getParent();
1898 BBUtils->adjustBBSize(MBB, Size: -2);
1899 BBUtils->adjustBBOffsetsAfter(MBB);
1900 ++NumT2BrShrunk;
1901 return true;
1902 }
1903 }
1904 return false;
1905 };
1906
1907 struct ImmCompare {
1908 MachineInstr* MI = nullptr;
1909 unsigned NewOpc = 0;
1910 };
1911
1912 auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp,
1913 MachineBasicBlock *DestBB) {
1914 ImmCmp.MI = nullptr;
1915 ImmCmp.NewOpc = 0;
1916
1917 // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
1918 // so this transformation is not safe.
1919 if (!Br.MI->killsRegister(Reg: ARM::CPSR, /*TRI=*/nullptr))
1920 return false;
1921
1922 Register PredReg;
1923 unsigned NewOpc = 0;
1924 ARMCC::CondCodes Pred = getInstrPredicate(MI: *Br.MI, PredReg);
1925 if (Pred == ARMCC::EQ)
1926 NewOpc = ARM::tCBZ;
1927 else if (Pred == ARMCC::NE)
1928 NewOpc = ARM::tCBNZ;
1929 else
1930 return false;
1931
1932 // Check if the distance is within 126. Subtract starting offset by 2
1933 // because the cmp will be eliminated.
1934 unsigned BrOffset = BBUtils->getOffsetOf(MI: Br.MI) + 4 - 2;
1935 BBInfoVector &BBInfo = BBUtils->getBBInfo();
1936 unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
1937 if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
1938 return false;
1939
1940 // Search backwards to find a tCMPi8
1941 auto *TRI = STI->getRegisterInfo();
1942 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br: Br.MI, TRI);
1943 if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
1944 return false;
1945
1946 ImmCmp.MI = CmpMI;
1947 ImmCmp.NewOpc = NewOpc;
1948 return true;
1949 };
1950
1951 auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) {
1952 if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() ||
1953 STI->hasMinSize())
1954 return false;
1955
1956 MachineBasicBlock *MBB = Br.MI->getParent();
1957 MachineBasicBlock *DestBB = Br.MI->getOperand(i: 0).getMBB();
1958 if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(MBB: DestBB) ||
1959 !BBUtils->isBBInRange(MI: Br.MI, DestBB, MaxDisp: 4094))
1960 return false;
1961
1962 if (!DT->dominates(A: DestBB, B: MBB))
1963 return false;
1964
1965 // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite
1966 // target of Br. So now we need to reverse the condition.
1967 Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ;
1968
1969 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I: Br.MI, MIMD: Br.MI->getDebugLoc(),
1970 MCID: TII->get(Opcode: ARM::t2LE));
1971 // Swapped a t2Bcc for a t2LE, so no need to update the size of the block.
1972 MIB.add(MO: Br.MI->getOperand(i: 0));
1973 Br.MI->eraseFromParent();
1974 Br.MI = MIB;
1975 ++NumLEInserted;
1976 return true;
1977 };
1978
1979 bool MadeChange = false;
1980
1981 // The order in which branches appear in ImmBranches is approximately their
1982 // order within the function body. By visiting later branches first, we reduce
1983 // the distance between earlier forward branches and their targets, making it
1984 // more likely that the cbn?z optimization, which can only apply to forward
1985 // branches, will succeed.
1986 for (ImmBranch &Br : reverse(C&: ImmBranches)) {
1987 MachineBasicBlock *DestBB = Br.MI->getOperand(i: 0).getMBB();
1988 MachineBasicBlock *MBB = Br.MI->getParent();
1989 MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ?
1990 MBB->getFallThrough() :
1991 MBB->back().getOperand(i: 0).getMBB();
1992
1993 ImmCompare Cmp;
1994 if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) {
1995 DestBB = ExitBB;
1996 MadeChange = true;
1997 } else {
1998 FindCmpForCBZ(Br, Cmp, DestBB);
1999 MadeChange |= TryShrinkBranch(Br);
2000 }
2001
2002 unsigned Opcode = Br.MI->getOpcode();
2003 if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)
2004 continue;
2005
2006 Register Reg = Cmp.MI->getOperand(i: 0).getReg();
2007
2008 // Check for Kill flags on Reg. If they are present remove them and set kill
2009 // on the new CBZ.
2010 auto *TRI = STI->getRegisterInfo();
2011 MachineBasicBlock::iterator KillMI = Br.MI;
2012 bool RegKilled = false;
2013 do {
2014 --KillMI;
2015 if (KillMI->killsRegister(Reg, TRI)) {
2016 KillMI->clearRegisterKills(Reg, RegInfo: TRI);
2017 RegKilled = true;
2018 break;
2019 }
2020 } while (KillMI != Cmp.MI);
2021
2022 // Create the new CBZ/CBNZ
2023 LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);
2024 MachineInstr *NewBR =
2025 BuildMI(BB&: *MBB, I: Br.MI, MIMD: Br.MI->getDebugLoc(), MCID: TII->get(Opcode: Cmp.NewOpc))
2026 .addReg(RegNo: Reg, Flags: getKillRegState(B: RegKilled) |
2027 getRegState(RegOp: Cmp.MI->getOperand(i: 0)))
2028 .addMBB(MBB: DestBB, TargetFlags: Br.MI->getOperand(i: 0).getTargetFlags());
2029
2030 Cmp.MI->eraseFromParent();
2031
2032 if (Br.MI->getOpcode() == ARM::tBcc) {
2033 Br.MI->eraseFromParent();
2034 Br.MI = NewBR;
2035 BBUtils->adjustBBSize(MBB, Size: -2);
2036 } else if (MBB->back().getOpcode() != ARM::t2LE) {
2037 // An LE has been generated, but it's not the terminator - that is an
2038 // unconditional branch. However, the logic has now been reversed with the
2039 // CBN?Z being the conditional branch and the LE being the unconditional
2040 // branch. So this means we can remove the redundant unconditional branch
2041 // at the end of the block.
2042 MachineInstr *LastMI = &MBB->back();
2043 BBUtils->adjustBBSize(MBB, Size: -LastMI->getDesc().getSize());
2044 LastMI->eraseFromParent();
2045 }
2046 BBUtils->adjustBBOffsetsAfter(MBB);
2047 ++NumCBZ;
2048 MadeChange = true;
2049 }
2050
2051 return MadeChange;
2052}
2053
2054static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
2055 unsigned BaseReg) {
2056 if (I.getOpcode() != ARM::t2ADDrs)
2057 return false;
2058
2059 if (I.getOperand(i: 0).getReg() != EntryReg)
2060 return false;
2061
2062 if (I.getOperand(i: 1).getReg() != BaseReg)
2063 return false;
2064
2065 // FIXME: what about CC and IdxReg?
2066 return true;
2067}
2068
2069/// While trying to form a TBB/TBH instruction, we may (if the table
2070/// doesn't immediately follow the BR_JT) need access to the start of the
2071/// jump-table. We know one instruction that produces such a register; this
2072/// function works out whether that definition can be preserved to the BR_JT,
2073/// possibly by removing an intervening addition (which is usually needed to
2074/// calculate the actual entry to jump to).
2075bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
2076 MachineInstr *LEAMI,
2077 unsigned &DeadSize,
2078 bool &CanDeleteLEA,
2079 bool &BaseRegKill) {
2080 if (JumpMI->getParent() != LEAMI->getParent())
2081 return false;
2082
2083 // Now we hope that we have at least these instructions in the basic block:
2084 // BaseReg = t2LEA ...
2085 // [...]
2086 // EntryReg = t2ADDrs BaseReg, ...
2087 // [...]
2088 // t2BR_JT EntryReg
2089 //
2090 // We have to be very conservative about what we recognise here though. The
2091 // main perturbing factors to watch out for are:
2092 // + Spills at any point in the chain: not direct problems but we would
2093 // expect a blocking Def of the spilled register so in practice what we
2094 // can do is limited.
2095 // + EntryReg == BaseReg: this is the one situation we should allow a Def
2096 // of BaseReg, but only if the t2ADDrs can be removed.
2097 // + Some instruction other than t2ADDrs computing the entry. Not seen in
2098 // the wild, but we should be careful.
2099 Register EntryReg = JumpMI->getOperand(i: 0).getReg();
2100 Register BaseReg = LEAMI->getOperand(i: 0).getReg();
2101
2102 CanDeleteLEA = true;
2103 BaseRegKill = false;
2104 MachineInstr *RemovableAdd = nullptr;
2105 MachineBasicBlock::iterator I(LEAMI);
2106 for (++I; &*I != JumpMI; ++I) {
2107 if (isSimpleIndexCalc(I&: *I, EntryReg, BaseReg)) {
2108 RemovableAdd = &*I;
2109 break;
2110 }
2111
2112 for (const MachineOperand &MO : I->operands()) {
2113 if (!MO.isReg() || !MO.getReg())
2114 continue;
2115 if (MO.isDef() && MO.getReg() == BaseReg)
2116 return false;
2117 if (MO.isUse() && MO.getReg() == BaseReg) {
2118 BaseRegKill = BaseRegKill || MO.isKill();
2119 CanDeleteLEA = false;
2120 }
2121 }
2122 }
2123
2124 if (!RemovableAdd)
2125 return true;
2126
2127 // Check the add really is removable, and that nothing else in the block
2128 // clobbers BaseReg.
2129 for (++I; &*I != JumpMI; ++I) {
2130 for (const MachineOperand &MO : I->operands()) {
2131 if (!MO.isReg() || !MO.getReg())
2132 continue;
2133 if (MO.isDef() && MO.getReg() == BaseReg)
2134 return false;
2135 if (MO.isUse() && MO.getReg() == EntryReg)
2136 RemovableAdd = nullptr;
2137 }
2138 }
2139
2140 if (RemovableAdd) {
2141 RemovableAdd->eraseFromParent();
2142 DeadSize += isThumb2 ? 4 : 2;
2143 } else if (BaseReg == EntryReg) {
2144 // The add wasn't removable, but clobbered the base for the TBB. So we can't
2145 // preserve it.
2146 return false;
2147 }
2148
2149 // We reached the end of the block without seeing another definition of
2150 // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
2151 // used in the TBB/TBH if necessary.
2152 return true;
2153}
2154
2155/// Returns whether CPEMI is the first instruction in the block
2156/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
2157/// we can switch the first register to PC and usually remove the address
2158/// calculation that preceded it.
2159static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
2160 MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
2161 MachineFunction *MF = MBB->getParent();
2162 ++MBB;
2163
2164 return MBB != MF->end() && !MBB->empty() && &*MBB->begin() == CPEMI;
2165}
2166
2167static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
2168 MachineInstr *JumpMI,
2169 unsigned &DeadSize) {
2170 // Remove a dead add between the LEA and JT, which used to compute EntryReg,
2171 // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
2172 // and is not clobbered / used.
2173 MachineInstr *RemovableAdd = nullptr;
2174 Register EntryReg = JumpMI->getOperand(i: 0).getReg();
2175
2176 // Find the last ADD to set EntryReg
2177 MachineBasicBlock::iterator I(LEAMI);
2178 for (++I; &*I != JumpMI; ++I) {
2179 if (I->getOpcode() == ARM::t2ADDrs && I->getOperand(i: 0).getReg() == EntryReg)
2180 RemovableAdd = &*I;
2181 }
2182
2183 if (!RemovableAdd)
2184 return;
2185
2186 // Ensure EntryReg is not clobbered or used.
2187 MachineBasicBlock::iterator J(RemovableAdd);
2188 for (++J; &*J != JumpMI; ++J) {
2189 for (const MachineOperand &MO : J->operands()) {
2190 if (!MO.isReg() || !MO.getReg())
2191 continue;
2192 if (MO.isDef() && MO.getReg() == EntryReg)
2193 return;
2194 if (MO.isUse() && MO.getReg() == EntryReg)
2195 return;
2196 }
2197 }
2198
2199 LLVM_DEBUG(dbgs() << "Removing Dead Add: " << *RemovableAdd);
2200 RemovableAdd->eraseFromParent();
2201 DeadSize += 4;
2202}
2203
2204/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
2205/// jumptables when it's possible.
2206bool ARMConstantIslands::optimizeThumb2JumpTables() {
2207 bool MadeChange = false;
2208
2209 // FIXME: After the tables are shrunk, can we get rid some of the
2210 // constantpool tables?
2211 MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
2212 if (!MJTI) return false;
2213
2214 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
2215 for (MachineInstr *MI : T2JumpTables) {
2216 const MCInstrDesc &MCID = MI->getDesc();
2217 unsigned NumOps = MCID.getNumOperands();
2218 unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
2219 MachineOperand JTOP = MI->getOperand(i: JTOpIdx);
2220 unsigned JTI = JTOP.getIndex();
2221 assert(JTI < JT.size());
2222
2223 bool ByteOk = true;
2224 bool HalfWordOk = true;
2225 unsigned JTOffset = BBUtils->getOffsetOf(MI) + 4;
2226 const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
2227 BBInfoVector &BBInfo = BBUtils->getBBInfo();
2228 for (MachineBasicBlock *MBB : JTBBs) {
2229 unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
2230 // Negative offset is not ok. FIXME: We should change BB layout to make
2231 // sure all the branches are forward.
2232 if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
2233 ByteOk = false;
2234 unsigned TBHLimit = ((1<<16)-1)*2;
2235 if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
2236 HalfWordOk = false;
2237 if (!ByteOk && !HalfWordOk)
2238 break;
2239 }
2240
2241 if (!ByteOk && !HalfWordOk)
2242 continue;
2243
2244 CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
2245 MachineBasicBlock *MBB = MI->getParent();
2246 if (!MI->getOperand(i: 0).isKill()) // FIXME: needed now?
2247 continue;
2248
2249 unsigned DeadSize = 0;
2250 bool CanDeleteLEA = false;
2251 bool BaseRegKill = false;
2252
2253 unsigned IdxReg = ~0U;
2254 bool IdxRegKill = true;
2255 if (isThumb2) {
2256 IdxReg = MI->getOperand(i: 1).getReg();
2257 IdxRegKill = MI->getOperand(i: 1).isKill();
2258
2259 bool PreservedBaseReg =
2260 preserveBaseRegister(JumpMI: MI, LEAMI: User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
2261 if (!jumpTableFollowsTB(JTMI: MI, CPEMI: User.CPEMI) && !PreservedBaseReg)
2262 continue;
2263 } else {
2264 // We're in thumb-1 mode, so we must have something like:
2265 // %idx = tLSLri %idx, 2
2266 // %base = tLEApcrelJT
2267 // %t = tLDRr %base, %idx
2268 Register BaseReg = User.MI->getOperand(i: 0).getReg();
2269
2270 MachineBasicBlock *UserMBB = User.MI->getParent();
2271 MachineBasicBlock::iterator Shift = User.MI->getIterator();
2272 if (Shift == UserMBB->begin())
2273 continue;
2274
2275 Shift = prev_nodbg(It: Shift, Begin: UserMBB->begin());
2276 if (Shift->getOpcode() != ARM::tLSLri ||
2277 Shift->getOperand(i: 3).getImm() != 2 ||
2278 !Shift->getOperand(i: 2).isKill())
2279 continue;
2280 IdxReg = Shift->getOperand(i: 2).getReg();
2281 Register ShiftedIdxReg = Shift->getOperand(i: 0).getReg();
2282
2283 // It's important that IdxReg is live until the actual TBB/TBH. Most of
2284 // the range is checked later, but the LEA might still clobber it and not
2285 // actually get removed.
2286 if (BaseReg == IdxReg && !jumpTableFollowsTB(JTMI: MI, CPEMI: User.CPEMI))
2287 continue;
2288
2289 MachineInstr *Load = User.MI->getNextNode();
2290 if (Load->getOpcode() != ARM::tLDRr)
2291 continue;
2292 if (Load->getOperand(i: 1).getReg() != BaseReg ||
2293 Load->getOperand(i: 2).getReg() != ShiftedIdxReg ||
2294 !Load->getOperand(i: 2).isKill())
2295 continue;
2296
2297 // If we're in PIC mode, there should be another ADD following.
2298 auto *TRI = STI->getRegisterInfo();
2299
2300 // %base cannot be redefined after the load as it will appear before
2301 // TBB/TBH like:
2302 // %base =
2303 // %base =
2304 // tBB %base, %idx
2305 if (registerDefinedBetween(Reg: BaseReg, From: Load->getNextNode(), To: MBB->end(), TRI))
2306 continue;
2307
2308 if (isPositionIndependentOrROPI) {
2309 MachineInstr *Add = Load->getNextNode();
2310 if (Add->getOpcode() != ARM::tADDrr ||
2311 Add->getOperand(i: 2).getReg() != BaseReg ||
2312 Add->getOperand(i: 3).getReg() != Load->getOperand(i: 0).getReg() ||
2313 !Add->getOperand(i: 3).isKill())
2314 continue;
2315 if (Add->getOperand(i: 0).getReg() != MI->getOperand(i: 0).getReg())
2316 continue;
2317 if (registerDefinedBetween(Reg: IdxReg, From: Add->getNextNode(), To: MI, TRI))
2318 // IdxReg gets redefined in the middle of the sequence.
2319 continue;
2320 Add->eraseFromParent();
2321 DeadSize += 2;
2322 } else {
2323 if (Load->getOperand(i: 0).getReg() != MI->getOperand(i: 0).getReg())
2324 continue;
2325 if (registerDefinedBetween(Reg: IdxReg, From: Load->getNextNode(), To: MI, TRI))
2326 // IdxReg gets redefined in the middle of the sequence.
2327 continue;
2328 }
2329
2330 // Now safe to delete the load and lsl. The LEA will be removed later.
2331 CanDeleteLEA = true;
2332 Shift->eraseFromParent();
2333 Load->eraseFromParent();
2334 DeadSize += 4;
2335 }
2336
2337 LLVM_DEBUG(dbgs() << "Shrink JT: " << *MI);
2338 MachineInstr *CPEMI = User.CPEMI;
2339 unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
2340 if (!isThumb2)
2341 Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT;
2342
2343 MachineBasicBlock::iterator MI_JT = MI;
2344 MachineInstr *NewJTMI =
2345 BuildMI(BB&: *MBB, I: MI_JT, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: Opc))
2346 .addReg(RegNo: User.MI->getOperand(i: 0).getReg(),
2347 Flags: getKillRegState(B: BaseRegKill))
2348 .addReg(RegNo: IdxReg, Flags: getKillRegState(B: IdxRegKill))
2349 .addJumpTableIndex(Idx: JTI, TargetFlags: JTOP.getTargetFlags())
2350 .addImm(Val: CPEMI->getOperand(i: 0).getImm());
2351 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": " << *NewJTMI);
2352
2353 unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
2354 CPEMI->setDesc(TII->get(Opcode: JTOpc));
2355
2356 if (jumpTableFollowsTB(JTMI: MI, CPEMI: User.CPEMI)) {
2357 NewJTMI->getOperand(i: 0).setReg(ARM::PC);
2358 NewJTMI->getOperand(i: 0).setIsKill(false);
2359
2360 if (CanDeleteLEA) {
2361 if (isThumb2)
2362 RemoveDeadAddBetweenLEAAndJT(LEAMI: User.MI, JumpMI: MI, DeadSize);
2363
2364 User.MI->eraseFromParent();
2365 DeadSize += isThumb2 ? 4 : 2;
2366
2367 // The LEA was eliminated, the TBB instruction becomes the only new user
2368 // of the jump table.
2369 User.MI = NewJTMI;
2370 User.MaxDisp = 4;
2371 User.NegOk = false;
2372 User.IsSoImm = false;
2373 User.KnownAlignment = false;
2374 } else {
2375 // The LEA couldn't be eliminated, so we must add another CPUser to
2376 // record the TBB or TBH use.
2377 int CPEntryIdx = JumpTableEntryIndices[JTI];
2378 auto &CPEs = CPEntries[CPEntryIdx];
2379 auto Entry =
2380 find_if(Range&: CPEs, P: [&](CPEntry &E) { return E.CPEMI == User.CPEMI; });
2381 ++Entry->RefCount;
2382 CPUsers.emplace_back(args: CPUser(NewJTMI, User.CPEMI, 4, false, false));
2383 }
2384 }
2385
2386 unsigned NewSize = TII->getInstSizeInBytes(MI: *NewJTMI);
2387 unsigned OrigSize = TII->getInstSizeInBytes(MI: *MI);
2388 MI->eraseFromParent();
2389
2390 int Delta = OrigSize - NewSize + DeadSize;
2391 BBInfo[MBB->getNumber()].Size -= Delta;
2392 BBUtils->adjustBBOffsetsAfter(MBB);
2393
2394 ++NumTBs;
2395 MadeChange = true;
2396 }
2397
2398 return MadeChange;
2399}
2400
2401/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
2402/// jump tables always branch forwards, since that's what tbb and tbh need.
2403bool ARMConstantIslands::reorderThumb2JumpTables() {
2404 bool MadeChange = false;
2405
2406 MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
2407 if (!MJTI) return false;
2408
2409 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
2410 for (MachineInstr *MI : T2JumpTables) {
2411 const MCInstrDesc &MCID = MI->getDesc();
2412 unsigned NumOps = MCID.getNumOperands();
2413 unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
2414 MachineOperand JTOP = MI->getOperand(i: JTOpIdx);
2415 unsigned JTI = JTOP.getIndex();
2416 assert(JTI < JT.size());
2417
2418 // We prefer if target blocks for the jump table come after the jump
2419 // instruction so we can use TB[BH]. Loop through the target blocks
2420 // and try to adjust them such that that's true.
2421 int JTNumber = MI->getParent()->getNumber();
2422 const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
2423 for (MachineBasicBlock *MBB : JTBBs) {
2424 int DTNumber = MBB->getNumber();
2425
2426 if (DTNumber < JTNumber) {
2427 // The destination precedes the switch. Try to move the block forward
2428 // so we have a positive offset.
2429 MachineBasicBlock *NewBB =
2430 adjustJTTargetBlockForward(JTI, BB: MBB, JTBB: MI->getParent());
2431 if (NewBB)
2432 MJTI->ReplaceMBBInJumpTable(Idx: JTI, Old: MBB, New: NewBB);
2433 MadeChange = true;
2434 }
2435 }
2436 }
2437
2438 return MadeChange;
2439}
2440
2441MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward(
2442 unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
2443 // If the destination block is terminated by an unconditional branch,
2444 // try to move it; otherwise, create a new block following the jump
2445 // table that branches back to the actual target. This is a very simple
2446 // heuristic. FIXME: We can definitely improve it.
2447 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2448 SmallVector<MachineOperand, 4> Cond;
2449 SmallVector<MachineOperand, 4> CondPrior;
2450 MachineFunction::iterator BBi = BB->getIterator();
2451 MachineFunction::iterator OldPrior = std::prev(x: BBi);
2452 MachineFunction::iterator OldNext = std::next(x: BBi);
2453
2454 // If the block terminator isn't analyzable, don't try to move the block
2455 bool B = TII->analyzeBranch(MBB&: *BB, TBB, FBB, Cond);
2456
2457 // If the block ends in an unconditional branch, move it. The prior block
2458 // has to have an analyzable terminator for us to move this one. Be paranoid
2459 // and make sure we're not trying to move the entry block of the function.
2460 if (!B && Cond.empty() && BB != &MF->front() &&
2461 !TII->analyzeBranch(MBB&: *OldPrior, TBB, FBB, Cond&: CondPrior)) {
2462 BB->moveAfter(NewBefore: JTBB);
2463 OldPrior->updateTerminator(PreviousLayoutSuccessor: BB);
2464 BB->updateTerminator(PreviousLayoutSuccessor: OldNext != MF->end() ? &*OldNext : nullptr);
2465 // Update numbering to account for the block being moved.
2466 MF->RenumberBlocks();
2467 DT->updateBlockNumbers();
2468 ++NumJTMoved;
2469 return nullptr;
2470 }
2471
2472 // Create a new MBB for the code after the jump BB.
2473 MachineBasicBlock *NewBB =
2474 MF->CreateMachineBasicBlock(BB: JTBB->getBasicBlock());
2475 MachineFunction::iterator MBBI = ++JTBB->getIterator();
2476 MF->insert(MBBI, MBB: NewBB);
2477
2478 // Copy live-in information to new block.
2479 for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins())
2480 NewBB->addLiveIn(RegMaskPair);
2481
2482 // Add an unconditional branch from NewBB to BB.
2483 // There doesn't seem to be meaningful DebugInfo available; this doesn't
2484 // correspond directly to anything in the source.
2485 if (isThumb2)
2486 BuildMI(BB: NewBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: ARM::t2B))
2487 .addMBB(MBB: BB)
2488 .add(MOs: predOps(Pred: ARMCC::AL));
2489 else
2490 BuildMI(BB: NewBB, MIMD: DebugLoc(), MCID: TII->get(Opcode: ARM::tB))
2491 .addMBB(MBB: BB)
2492 .add(MOs: predOps(Pred: ARMCC::AL));
2493
2494 // Update internal data structures to account for the newly inserted MBB.
2495 MF->RenumberBlocks(MBBFrom: NewBB);
2496 DT->updateBlockNumbers();
2497
2498 // Update the CFG.
2499 NewBB->addSuccessor(Succ: BB);
2500 JTBB->replaceSuccessor(Old: BB, New: NewBB);
2501
2502 ++NumJTInserted;
2503 return NewBB;
2504}
2505
2506/// createARMConstantIslandPass - returns an instance of the constpool
2507/// island pass.
2508FunctionPass *llvm::createARMConstantIslandPass() {
2509 return new ARMConstantIslands();
2510}
2511
2512INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME,
2513 false, false)
2514