1 | //===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass makes sure that all branches are in range. There are several ways |
10 | // in which this could be done. One aggressive approach is to assume that all |
11 | // branches are in range and successively replace those that turn out not |
12 | // to be in range with a longer form (branch relaxation). A simple |
13 | // implementation is to continually walk through the function relaxing |
14 | // branches until no more changes are needed and a fixed point is reached. |
15 | // However, in the pathological worst case, this implementation is |
16 | // quadratic in the number of blocks; relaxing branch N can make branch N-1 |
17 | // go out of range, which in turn can make branch N-2 go out of range, |
18 | // and so on. |
19 | // |
20 | // An alternative approach is to assume that all branches must be |
21 | // converted to their long forms, then reinstate the short forms of |
22 | // branches that, even under this pessimistic assumption, turn out to be |
23 | // in range (branch shortening). This too can be implemented as a function |
24 | // walk that is repeated until a fixed point is reached. In general, |
25 | // the result of shortening is not as good as that of relaxation, and |
26 | // shortening is also quadratic in the worst case; shortening branch N |
27 | // can bring branch N-1 in range of the short form, which in turn can do |
28 | // the same for branch N-2, and so on. The main advantage of shortening |
29 | // is that each walk through the function produces valid code, so it is |
30 | // possible to stop at any point after the first walk. The quadraticness |
31 | // could therefore be handled with a maximum pass count, although the |
32 | // question then becomes: what maximum count should be used? |
33 | // |
34 | // On SystemZ, long branches are only needed for functions bigger than 64k, |
35 | // which are relatively rare to begin with, and the long branch sequences |
36 | // are actually relatively cheap. It therefore doesn't seem worth spending |
37 | // much compilation time on the problem. Instead, the approach we take is: |
38 | // |
39 | // (1) Work out the address that each block would have if no branches |
40 | // need relaxing. Exit the pass early if all branches are in range |
41 | // according to this assumption. |
42 | // |
43 | // (2) Work out the address that each block would have if all branches |
44 | // need relaxing. |
45 | // |
46 | // (3) Walk through the block calculating the final address of each instruction |
47 | // and relaxing those that need to be relaxed. For backward branches, |
48 | // this check uses the final address of the target block, as calculated |
49 | // earlier in the walk. For forward branches, this check uses the |
50 | // address of the target block that was calculated in (2). Both checks |
51 | // give a conservatively-correct range. |
52 | // |
53 | //===----------------------------------------------------------------------===// |
54 | |
55 | #include "SystemZ.h" |
56 | #include "SystemZInstrInfo.h" |
57 | #include "SystemZTargetMachine.h" |
58 | #include "llvm/ADT/SmallVector.h" |
59 | #include "llvm/ADT/Statistic.h" |
60 | #include "llvm/ADT/StringRef.h" |
61 | #include "llvm/CodeGen/MachineBasicBlock.h" |
62 | #include "llvm/CodeGen/MachineFunction.h" |
63 | #include "llvm/CodeGen/MachineFunctionPass.h" |
64 | #include "llvm/CodeGen/MachineInstr.h" |
65 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
66 | #include "llvm/IR/DebugLoc.h" |
67 | #include "llvm/Support/ErrorHandling.h" |
68 | #include <cassert> |
69 | #include <cstdint> |
70 | |
71 | using namespace llvm; |
72 | |
73 | #define DEBUG_TYPE "systemz-long-branch" |
74 | |
75 | STATISTIC(LongBranches, "Number of long branches." ); |
76 | |
77 | namespace { |
78 | |
79 | // Represents positional information about a basic block. |
80 | struct MBBInfo { |
81 | // The address that we currently assume the block has. |
82 | uint64_t Address = 0; |
83 | |
84 | // The size of the block in bytes, excluding terminators. |
85 | // This value never changes. |
86 | uint64_t Size = 0; |
87 | |
88 | // The minimum alignment of the block. |
89 | // This value never changes. |
90 | Align Alignment; |
91 | |
92 | // The number of terminators in this block. This value never changes. |
93 | unsigned NumTerminators = 0; |
94 | |
95 | MBBInfo() = default; |
96 | }; |
97 | |
98 | // Represents the state of a block terminator. |
99 | struct TerminatorInfo { |
100 | // If this terminator is a relaxable branch, this points to the branch |
101 | // instruction, otherwise it is null. |
102 | MachineInstr *Branch = nullptr; |
103 | |
104 | // The address that we currently assume the terminator has. |
105 | uint64_t Address = 0; |
106 | |
107 | // The current size of the terminator in bytes. |
108 | uint64_t Size = 0; |
109 | |
110 | // If Branch is nonnull, this is the number of the target block, |
111 | // otherwise it is unused. |
112 | unsigned TargetBlock = 0; |
113 | |
114 | // If Branch is nonnull, this is the length of the longest relaxed form, |
115 | // otherwise it is zero. |
116 | unsigned = 0; |
117 | |
118 | TerminatorInfo() = default; |
119 | }; |
120 | |
121 | // Used to keep track of the current position while iterating over the blocks. |
122 | struct BlockPosition { |
123 | // The address that we assume this position has. |
124 | uint64_t Address = 0; |
125 | |
126 | // The number of low bits in Address that are known to be the same |
127 | // as the runtime address. |
128 | unsigned KnownBits; |
129 | |
130 | BlockPosition(unsigned InitialLogAlignment) |
131 | : KnownBits(InitialLogAlignment) {} |
132 | }; |
133 | |
134 | class SystemZLongBranch : public MachineFunctionPass { |
135 | public: |
136 | static char ID; |
137 | |
138 | SystemZLongBranch() : MachineFunctionPass(ID) { |
139 | initializeSystemZLongBranchPass(*PassRegistry::getPassRegistry()); |
140 | } |
141 | |
142 | bool runOnMachineFunction(MachineFunction &F) override; |
143 | |
144 | MachineFunctionProperties getRequiredProperties() const override { |
145 | return MachineFunctionProperties().set( |
146 | MachineFunctionProperties::Property::NoVRegs); |
147 | } |
148 | |
149 | private: |
150 | void skipNonTerminators(BlockPosition &Position, MBBInfo &Block); |
151 | void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, |
152 | bool AssumeRelaxed); |
153 | TerminatorInfo describeTerminator(MachineInstr &MI); |
154 | uint64_t initMBBInfo(); |
155 | bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); |
156 | bool mustRelaxABranch(); |
157 | void setWorstCaseAddresses(); |
158 | void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode); |
159 | void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode); |
160 | void relaxBranch(TerminatorInfo &Terminator); |
161 | void relaxBranches(); |
162 | |
163 | const SystemZInstrInfo *TII = nullptr; |
164 | MachineFunction *MF = nullptr; |
165 | SmallVector<MBBInfo, 16> MBBs; |
166 | SmallVector<TerminatorInfo, 16> Terminators; |
167 | }; |
168 | |
169 | char SystemZLongBranch::ID = 0; |
170 | |
171 | const uint64_t MaxBackwardRange = 0x10000; |
172 | const uint64_t MaxForwardRange = 0xfffe; |
173 | |
174 | } // end anonymous namespace |
175 | |
176 | INITIALIZE_PASS(SystemZLongBranch, DEBUG_TYPE, "SystemZ Long Branch" , false, |
177 | false) |
178 | |
179 | // Position describes the state immediately before Block. Update Block |
180 | // accordingly and move Position to the end of the block's non-terminator |
181 | // instructions. |
182 | void SystemZLongBranch::skipNonTerminators(BlockPosition &Position, |
183 | MBBInfo &Block) { |
184 | if (Log2(A: Block.Alignment) > Position.KnownBits) { |
185 | // When calculating the address of Block, we need to conservatively |
186 | // assume that Block had the worst possible misalignment. |
187 | Position.Address += |
188 | (Block.Alignment.value() - (uint64_t(1) << Position.KnownBits)); |
189 | Position.KnownBits = Log2(A: Block.Alignment); |
190 | } |
191 | |
192 | // Align the addresses. |
193 | Position.Address = alignTo(Size: Position.Address, A: Block.Alignment); |
194 | |
195 | // Record the block's position. |
196 | Block.Address = Position.Address; |
197 | |
198 | // Move past the non-terminators in the block. |
199 | Position.Address += Block.Size; |
200 | } |
201 | |
202 | // Position describes the state immediately before Terminator. |
203 | // Update Terminator accordingly and move Position past it. |
204 | // Assume that Terminator will be relaxed if AssumeRelaxed. |
205 | void SystemZLongBranch::skipTerminator(BlockPosition &Position, |
206 | TerminatorInfo &Terminator, |
207 | bool AssumeRelaxed) { |
208 | Terminator.Address = Position.Address; |
209 | Position.Address += Terminator.Size; |
210 | if (AssumeRelaxed) |
211 | Position.Address += Terminator.ExtraRelaxSize; |
212 | } |
213 | |
214 | static unsigned getInstSizeInBytes(const MachineInstr &MI, |
215 | const SystemZInstrInfo *TII) { |
216 | unsigned Size = TII->getInstSizeInBytes(MI); |
217 | assert((Size || |
218 | // These do not have a size: |
219 | MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() || |
220 | MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER || |
221 | // These have a size that may be zero: |
222 | MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP || |
223 | MI.getOpcode() == SystemZ::PATCHPOINT) && |
224 | "Missing size value for instruction." ); |
225 | return Size; |
226 | } |
227 | |
228 | // Return a description of terminator instruction MI. |
229 | TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) { |
230 | TerminatorInfo Terminator; |
231 | Terminator.Size = getInstSizeInBytes(MI, TII); |
232 | if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) { |
233 | switch (MI.getOpcode()) { |
234 | case SystemZ::J: |
235 | // Relaxes to JG, which is 2 bytes longer. |
236 | Terminator.ExtraRelaxSize = 2; |
237 | break; |
238 | case SystemZ::BRC: |
239 | // Relaxes to BRCL, which is 2 bytes longer. |
240 | Terminator.ExtraRelaxSize = 2; |
241 | break; |
242 | case SystemZ::BRCT: |
243 | case SystemZ::BRCTG: |
244 | // Relaxes to A(G)HI and BRCL, which is 6 bytes longer. |
245 | Terminator.ExtraRelaxSize = 6; |
246 | break; |
247 | case SystemZ::BRCTH: |
248 | // Never needs to be relaxed. |
249 | Terminator.ExtraRelaxSize = 0; |
250 | break; |
251 | case SystemZ::CRJ: |
252 | case SystemZ::CLRJ: |
253 | // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer. |
254 | Terminator.ExtraRelaxSize = 2; |
255 | break; |
256 | case SystemZ::CGRJ: |
257 | case SystemZ::CLGRJ: |
258 | // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer. |
259 | Terminator.ExtraRelaxSize = 4; |
260 | break; |
261 | case SystemZ::CIJ: |
262 | case SystemZ::CGIJ: |
263 | // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer. |
264 | Terminator.ExtraRelaxSize = 4; |
265 | break; |
266 | case SystemZ::CLIJ: |
267 | case SystemZ::CLGIJ: |
268 | // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer. |
269 | Terminator.ExtraRelaxSize = 6; |
270 | break; |
271 | default: |
272 | llvm_unreachable("Unrecognized branch instruction" ); |
273 | } |
274 | Terminator.Branch = &MI; |
275 | Terminator.TargetBlock = |
276 | TII->getBranchInfo(MI).getMBBTarget()->getNumber(); |
277 | } |
278 | return Terminator; |
279 | } |
280 | |
281 | // Fill MBBs and Terminators, setting the addresses on the assumption |
282 | // that no branches need relaxation. Return the size of the function under |
283 | // this assumption. |
284 | uint64_t SystemZLongBranch::initMBBInfo() { |
285 | MF->RenumberBlocks(); |
286 | unsigned NumBlocks = MF->size(); |
287 | |
288 | MBBs.clear(); |
289 | MBBs.resize(N: NumBlocks); |
290 | |
291 | Terminators.clear(); |
292 | Terminators.reserve(N: NumBlocks); |
293 | |
294 | BlockPosition Position(Log2(A: MF->getAlignment())); |
295 | for (unsigned I = 0; I < NumBlocks; ++I) { |
296 | MachineBasicBlock *MBB = MF->getBlockNumbered(N: I); |
297 | MBBInfo &Block = MBBs[I]; |
298 | |
299 | // Record the alignment, for quick access. |
300 | Block.Alignment = MBB->getAlignment(); |
301 | |
302 | // Calculate the size of the fixed part of the block. |
303 | MachineBasicBlock::iterator MI = MBB->begin(); |
304 | MachineBasicBlock::iterator End = MBB->end(); |
305 | while (MI != End && !MI->isTerminator()) { |
306 | Block.Size += getInstSizeInBytes(MI: *MI, TII); |
307 | ++MI; |
308 | } |
309 | skipNonTerminators(Position, Block); |
310 | |
311 | // Add the terminators. |
312 | while (MI != End) { |
313 | if (!MI->isDebugInstr()) { |
314 | assert(MI->isTerminator() && "Terminator followed by non-terminator" ); |
315 | Terminators.push_back(Elt: describeTerminator(MI&: *MI)); |
316 | skipTerminator(Position, Terminator&: Terminators.back(), AssumeRelaxed: false); |
317 | ++Block.NumTerminators; |
318 | } |
319 | ++MI; |
320 | } |
321 | } |
322 | |
323 | return Position.Address; |
324 | } |
325 | |
326 | // Return true if, under current assumptions, Terminator would need to be |
327 | // relaxed if it were placed at address Address. |
328 | bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, |
329 | uint64_t Address) { |
330 | if (!Terminator.Branch || Terminator.ExtraRelaxSize == 0) |
331 | return false; |
332 | |
333 | const MBBInfo &Target = MBBs[Terminator.TargetBlock]; |
334 | if (Address >= Target.Address) { |
335 | if (Address - Target.Address <= MaxBackwardRange) |
336 | return false; |
337 | } else { |
338 | if (Target.Address - Address <= MaxForwardRange) |
339 | return false; |
340 | } |
341 | |
342 | return true; |
343 | } |
344 | |
345 | // Return true if, under current assumptions, any terminator needs |
346 | // to be relaxed. |
347 | bool SystemZLongBranch::mustRelaxABranch() { |
348 | for (auto &Terminator : Terminators) |
349 | if (mustRelaxBranch(Terminator, Address: Terminator.Address)) |
350 | return true; |
351 | return false; |
352 | } |
353 | |
354 | // Set the address of each block on the assumption that all branches |
355 | // must be long. |
356 | void SystemZLongBranch::setWorstCaseAddresses() { |
357 | SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); |
358 | BlockPosition Position(Log2(A: MF->getAlignment())); |
359 | for (auto &Block : MBBs) { |
360 | skipNonTerminators(Position, Block); |
361 | for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { |
362 | skipTerminator(Position, Terminator&: *TI, AssumeRelaxed: true); |
363 | ++TI; |
364 | } |
365 | } |
366 | } |
367 | |
368 | // Split BRANCH ON COUNT MI into the addition given by AddOpcode followed |
369 | // by a BRCL on the result. |
370 | void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI, |
371 | unsigned AddOpcode) { |
372 | MachineBasicBlock *MBB = MI->getParent(); |
373 | DebugLoc DL = MI->getDebugLoc(); |
374 | BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: AddOpcode)) |
375 | .add(MO: MI->getOperand(i: 0)) |
376 | .add(MO: MI->getOperand(i: 1)) |
377 | .addImm(Val: -1); |
378 | MachineInstr *BRCL = BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRCL)) |
379 | .addImm(Val: SystemZ::CCMASK_ICMP) |
380 | .addImm(Val: SystemZ::CCMASK_CMP_NE) |
381 | .add(MO: MI->getOperand(i: 2)); |
382 | // The implicit use of CC is a killing use. |
383 | BRCL->addRegisterKilled(IncomingReg: SystemZ::CC, RegInfo: &TII->getRegisterInfo()); |
384 | MI->eraseFromParent(); |
385 | } |
386 | |
387 | // Split MI into the comparison given by CompareOpcode followed |
388 | // a BRCL on the result. |
389 | void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, |
390 | unsigned CompareOpcode) { |
391 | MachineBasicBlock *MBB = MI->getParent(); |
392 | DebugLoc DL = MI->getDebugLoc(); |
393 | BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: CompareOpcode)) |
394 | .add(MO: MI->getOperand(i: 0)) |
395 | .add(MO: MI->getOperand(i: 1)); |
396 | MachineInstr *BRCL = BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRCL)) |
397 | .addImm(Val: SystemZ::CCMASK_ICMP) |
398 | .add(MO: MI->getOperand(i: 2)) |
399 | .add(MO: MI->getOperand(i: 3)); |
400 | // The implicit use of CC is a killing use. |
401 | BRCL->addRegisterKilled(IncomingReg: SystemZ::CC, RegInfo: &TII->getRegisterInfo()); |
402 | MI->eraseFromParent(); |
403 | } |
404 | |
405 | // Relax the branch described by Terminator. |
406 | void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { |
407 | MachineInstr *Branch = Terminator.Branch; |
408 | switch (Branch->getOpcode()) { |
409 | case SystemZ::J: |
410 | Branch->setDesc(TII->get(Opcode: SystemZ::JG)); |
411 | break; |
412 | case SystemZ::BRC: |
413 | Branch->setDesc(TII->get(Opcode: SystemZ::BRCL)); |
414 | break; |
415 | case SystemZ::BRCT: |
416 | splitBranchOnCount(MI: Branch, AddOpcode: SystemZ::AHI); |
417 | break; |
418 | case SystemZ::BRCTG: |
419 | splitBranchOnCount(MI: Branch, AddOpcode: SystemZ::AGHI); |
420 | break; |
421 | case SystemZ::CRJ: |
422 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CR); |
423 | break; |
424 | case SystemZ::CGRJ: |
425 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CGR); |
426 | break; |
427 | case SystemZ::CIJ: |
428 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CHI); |
429 | break; |
430 | case SystemZ::CGIJ: |
431 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CGHI); |
432 | break; |
433 | case SystemZ::CLRJ: |
434 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CLR); |
435 | break; |
436 | case SystemZ::CLGRJ: |
437 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CLGR); |
438 | break; |
439 | case SystemZ::CLIJ: |
440 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CLFI); |
441 | break; |
442 | case SystemZ::CLGIJ: |
443 | splitCompareBranch(MI: Branch, CompareOpcode: SystemZ::CLGFI); |
444 | break; |
445 | default: |
446 | llvm_unreachable("Unrecognized branch" ); |
447 | } |
448 | |
449 | Terminator.Size += Terminator.ExtraRelaxSize; |
450 | Terminator.ExtraRelaxSize = 0; |
451 | Terminator.Branch = nullptr; |
452 | |
453 | ++LongBranches; |
454 | } |
455 | |
456 | // Run a shortening pass and relax any branches that need to be relaxed. |
457 | void SystemZLongBranch::relaxBranches() { |
458 | SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); |
459 | BlockPosition Position(Log2(A: MF->getAlignment())); |
460 | for (auto &Block : MBBs) { |
461 | skipNonTerminators(Position, Block); |
462 | for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { |
463 | assert(Position.Address <= TI->Address && |
464 | "Addresses shouldn't go forwards" ); |
465 | if (mustRelaxBranch(Terminator: *TI, Address: Position.Address)) |
466 | relaxBranch(Terminator&: *TI); |
467 | skipTerminator(Position, Terminator&: *TI, AssumeRelaxed: false); |
468 | ++TI; |
469 | } |
470 | } |
471 | } |
472 | |
473 | bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) { |
474 | TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); |
475 | MF = &F; |
476 | uint64_t Size = initMBBInfo(); |
477 | if (Size <= MaxForwardRange || !mustRelaxABranch()) |
478 | return false; |
479 | |
480 | setWorstCaseAddresses(); |
481 | relaxBranches(); |
482 | return true; |
483 | } |
484 | |
485 | FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { |
486 | return new SystemZLongBranch(); |
487 | } |
488 | |