1 | //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that collect the Linker Optimization Hint (LOH). |
10 | // This pass should be run at the very end of the compilation flow, just before |
11 | // assembly printer. |
12 | // To be useful for the linker, the LOH must be printed into the assembly file. |
13 | // |
14 | // A LOH describes a sequence of instructions that may be optimized by the |
15 | // linker. |
16 | // This same sequence cannot be optimized by the compiler because some of |
17 | // the information will be known at link time. |
18 | // For instance, consider the following sequence: |
19 | // L1: adrp xA, sym@PAGE |
20 | // L2: add xB, xA, sym@PAGEOFF |
21 | // L3: ldr xC, [xB, #imm] |
22 | // This sequence can be turned into: |
23 | // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: |
24 | // L3: ldr xC, sym+#imm |
25 | // It may also be turned into either the following more efficient |
26 | // code sequences: |
27 | // - If sym@PAGEOFF + #imm fits the encoding space of L3. |
28 | // L1: adrp xA, sym@PAGE |
29 | // L3: ldr xC, [xB, sym@PAGEOFF + #imm] |
30 | // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: |
31 | // L1: adr xA, sym |
32 | // L3: ldr xC, [xB, #imm] |
33 | // |
34 | // To be valid a LOH must meet all the requirements needed by all the related |
35 | // possible linker transformations. |
36 | // For instance, using the running example, the constraints to emit |
37 | // ".loh AdrpAddLdr" are: |
38 | // - L1, L2, and L3 instructions are of the expected type, i.e., |
39 | // respectively ADRP, ADD (immediate), and LD. |
40 | // - The result of L1 is used only by L2. |
41 | // - The register argument (xA) used in the ADD instruction is defined |
42 | // only by L1. |
43 | // - The result of L2 is used only by L3. |
44 | // - The base address (xB) in L3 is defined only L2. |
45 | // - The ADRP in L1 and the ADD in L2 must reference the same symbol using |
46 | // @PAGE/@PAGEOFF with no additional constants |
47 | // |
48 | // Currently supported LOHs are: |
49 | // * So called non-ADRP-related: |
50 | // - .loh AdrpAddLdr L1, L2, L3: |
51 | // L1: adrp xA, sym@PAGE |
52 | // L2: add xB, xA, sym@PAGEOFF |
53 | // L3: ldr xC, [xB, #imm] |
54 | // - .loh AdrpLdrGotLdr L1, L2, L3: |
55 | // L1: adrp xA, sym@GOTPAGE |
56 | // L2: ldr xB, [xA, sym@GOTPAGEOFF] |
57 | // L3: ldr xC, [xB, #imm] |
58 | // - .loh AdrpLdr L1, L3: |
59 | // L1: adrp xA, sym@PAGE |
60 | // L3: ldr xC, [xA, sym@PAGEOFF] |
61 | // - .loh AdrpAddStr L1, L2, L3: |
62 | // L1: adrp xA, sym@PAGE |
63 | // L2: add xB, xA, sym@PAGEOFF |
64 | // L3: str xC, [xB, #imm] |
65 | // - .loh AdrpLdrGotStr L1, L2, L3: |
66 | // L1: adrp xA, sym@GOTPAGE |
67 | // L2: ldr xB, [xA, sym@GOTPAGEOFF] |
68 | // L3: str xC, [xB, #imm] |
69 | // - .loh AdrpAdd L1, L2: |
70 | // L1: adrp xA, sym@PAGE |
71 | // L2: add xB, xA, sym@PAGEOFF |
72 | // For all these LOHs, L1, L2, L3 form a simple chain: |
73 | // L1 result is used only by L2 and L2 result by L3. |
74 | // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument |
75 | // by L1. |
76 | // All these LOHs aim at using more efficient load/store patterns by folding |
77 | // some instructions used to compute the address directly into the load/store. |
78 | // |
79 | // * So called ADRP-related: |
80 | // - .loh AdrpAdrp L2, L1: |
81 | // L2: ADRP xA, sym1@PAGE |
82 | // L1: ADRP xA, sym2@PAGE |
83 | // L2 dominates L1 and xA is not redifined between L2 and L1 |
84 | // This LOH aims at getting rid of redundant ADRP instructions. |
85 | // |
86 | // The overall design for emitting the LOHs is: |
87 | // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo. |
88 | // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it: |
89 | // 1. Associates them a label. |
90 | // 2. Emits them in a MCStreamer (EmitLOHDirective). |
91 | // - The MCMachOStreamer records them into the MCAssembler. |
92 | // - The MCAsmStreamer prints them. |
93 | // - Other MCStreamers ignore them. |
94 | // 3. Closes the MCStreamer: |
95 | // - The MachObjectWriter gets them from the MCAssembler and writes |
96 | // them in the object file. |
97 | // - Other ObjectWriters ignore them. |
98 | //===----------------------------------------------------------------------===// |
99 | |
100 | #include "AArch64.h" |
101 | #include "AArch64InstrInfo.h" |
102 | #include "AArch64MachineFunctionInfo.h" |
103 | #include "llvm/ADT/SmallSet.h" |
104 | #include "llvm/ADT/Statistic.h" |
105 | #include "llvm/CodeGen/MachineBasicBlock.h" |
106 | #include "llvm/CodeGen/MachineFunctionPass.h" |
107 | #include "llvm/CodeGen/MachineInstr.h" |
108 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
109 | #include "llvm/Support/Debug.h" |
110 | #include "llvm/Support/ErrorHandling.h" |
111 | #include "llvm/Support/raw_ostream.h" |
112 | #include "llvm/Target/TargetMachine.h" |
113 | using namespace llvm; |
114 | |
115 | #define DEBUG_TYPE "aarch64-collect-loh" |
116 | |
117 | STATISTIC(NumADRPSimpleCandidate, |
118 | "Number of simplifiable ADRP dominate by another" ); |
119 | STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD" ); |
120 | STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR" ); |
121 | STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD" ); |
122 | STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR" ); |
123 | STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP" ); |
124 | STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD" ); |
125 | |
126 | #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)" |
127 | |
128 | namespace { |
129 | |
130 | struct AArch64CollectLOH : public MachineFunctionPass { |
131 | static char ID; |
132 | AArch64CollectLOH() : MachineFunctionPass(ID) {} |
133 | |
134 | bool runOnMachineFunction(MachineFunction &MF) override; |
135 | |
136 | MachineFunctionProperties getRequiredProperties() const override { |
137 | return MachineFunctionProperties().set( |
138 | MachineFunctionProperties::Property::NoVRegs); |
139 | } |
140 | |
141 | StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; } |
142 | |
143 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
144 | MachineFunctionPass::getAnalysisUsage(AU); |
145 | AU.setPreservesAll(); |
146 | } |
147 | }; |
148 | |
149 | char AArch64CollectLOH::ID = 0; |
150 | |
151 | } // end anonymous namespace. |
152 | |
153 | INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh" , |
154 | AARCH64_COLLECT_LOH_NAME, false, false) |
155 | |
156 | static bool canAddBePartOfLOH(const MachineInstr &MI) { |
157 | // Check immediate to see if the immediate is an address. |
158 | switch (MI.getOperand(i: 2).getType()) { |
159 | default: |
160 | return false; |
161 | case MachineOperand::MO_GlobalAddress: |
162 | case MachineOperand::MO_JumpTableIndex: |
163 | case MachineOperand::MO_ConstantPoolIndex: |
164 | case MachineOperand::MO_BlockAddress: |
165 | return true; |
166 | } |
167 | } |
168 | |
169 | /// Answer the following question: Can Def be one of the definition |
170 | /// involved in a part of a LOH? |
171 | static bool canDefBePartOfLOH(const MachineInstr &MI) { |
172 | // Accept ADRP, ADDLow and LOADGot. |
173 | switch (MI.getOpcode()) { |
174 | default: |
175 | return false; |
176 | case AArch64::ADRP: |
177 | return true; |
178 | case AArch64::ADDXri: |
179 | return canAddBePartOfLOH(MI); |
180 | case AArch64::LDRXui: |
181 | case AArch64::LDRWui: |
182 | // Check immediate to see if the immediate is an address. |
183 | switch (MI.getOperand(i: 2).getType()) { |
184 | default: |
185 | return false; |
186 | case MachineOperand::MO_GlobalAddress: |
187 | return MI.getOperand(i: 2).getTargetFlags() & AArch64II::MO_GOT; |
188 | } |
189 | } |
190 | } |
191 | |
192 | /// Check whether the given instruction can the end of a LOH chain involving a |
193 | /// store. |
194 | static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { |
195 | switch (MI.getOpcode()) { |
196 | default: |
197 | return false; |
198 | case AArch64::STRBBui: |
199 | case AArch64::STRHHui: |
200 | case AArch64::STRBui: |
201 | case AArch64::STRHui: |
202 | case AArch64::STRWui: |
203 | case AArch64::STRXui: |
204 | case AArch64::STRSui: |
205 | case AArch64::STRDui: |
206 | case AArch64::STRQui: |
207 | // We can only optimize the index operand. |
208 | // In case we have str xA, [xA, #imm], this is two different uses |
209 | // of xA and we cannot fold, otherwise the xA stored may be wrong, |
210 | // even if #imm == 0. |
211 | return MO.getOperandNo() == 1 && |
212 | MI.getOperand(i: 0).getReg() != MI.getOperand(i: 1).getReg(); |
213 | } |
214 | } |
215 | |
216 | /// Check whether the given instruction can be the end of a LOH chain |
217 | /// involving a load. |
218 | static bool isCandidateLoad(const MachineInstr &MI) { |
219 | switch (MI.getOpcode()) { |
220 | default: |
221 | return false; |
222 | case AArch64::LDRSBWui: |
223 | case AArch64::LDRSBXui: |
224 | case AArch64::LDRSHWui: |
225 | case AArch64::LDRSHXui: |
226 | case AArch64::LDRSWui: |
227 | case AArch64::LDRBui: |
228 | case AArch64::LDRHui: |
229 | case AArch64::LDRWui: |
230 | case AArch64::LDRXui: |
231 | case AArch64::LDRSui: |
232 | case AArch64::LDRDui: |
233 | case AArch64::LDRQui: |
234 | return !(MI.getOperand(i: 2).getTargetFlags() & AArch64II::MO_GOT); |
235 | } |
236 | } |
237 | |
238 | /// Check whether the given instruction can load a litteral. |
239 | static bool supportLoadFromLiteral(const MachineInstr &MI) { |
240 | switch (MI.getOpcode()) { |
241 | default: |
242 | return false; |
243 | case AArch64::LDRSWui: |
244 | case AArch64::LDRWui: |
245 | case AArch64::LDRXui: |
246 | case AArch64::LDRSui: |
247 | case AArch64::LDRDui: |
248 | case AArch64::LDRQui: |
249 | return true; |
250 | } |
251 | } |
252 | |
253 | /// Number of GPR registers traked by mapRegToGPRIndex() |
254 | static const unsigned N_GPR_REGS = 31; |
255 | /// Map register number to index from 0-30. |
256 | static int mapRegToGPRIndex(MCPhysReg Reg) { |
257 | static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs" ); |
258 | static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs" ); |
259 | if (AArch64::X0 <= Reg && Reg <= AArch64::X28) |
260 | return Reg - AArch64::X0; |
261 | if (AArch64::W0 <= Reg && Reg <= AArch64::W30) |
262 | return Reg - AArch64::W0; |
263 | // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to |
264 | // handle them as special cases. |
265 | if (Reg == AArch64::FP) |
266 | return 29; |
267 | if (Reg == AArch64::LR) |
268 | return 30; |
269 | return -1; |
270 | } |
271 | |
272 | /// State tracked per register. |
273 | /// The main algorithm walks backwards over a basic block maintaining this |
274 | /// datastructure for each tracked general purpose register. |
275 | struct LOHInfo { |
276 | MCLOHType Type : 8; ///< "Best" type of LOH possible. |
277 | bool IsCandidate : 1; ///< Possible LOH candidate. |
278 | bool OneUser : 1; ///< Found exactly one user (yet). |
279 | bool MultiUsers : 1; ///< Found multiple users. |
280 | const MachineInstr *MI0; ///< First instruction involved in the LOH. |
281 | const MachineInstr *MI1; ///< Second instruction involved in the LOH |
282 | /// (if any). |
283 | const MachineInstr *LastADRP; ///< Last ADRP in same register. |
284 | }; |
285 | |
286 | /// Update state \p Info given \p MI uses the tracked register. |
287 | static void handleUse(const MachineInstr &MI, const MachineOperand &MO, |
288 | LOHInfo &Info) { |
289 | // We have multiple uses if we already found one before. |
290 | if (Info.MultiUsers || Info.OneUser) { |
291 | Info.IsCandidate = false; |
292 | Info.MultiUsers = true; |
293 | return; |
294 | } |
295 | Info.OneUser = true; |
296 | |
297 | // Start new LOHInfo if applicable. |
298 | if (isCandidateLoad(MI)) { |
299 | Info.Type = MCLOH_AdrpLdr; |
300 | Info.IsCandidate = true; |
301 | Info.MI0 = &MI; |
302 | // Note that even this is AdrpLdr now, we can switch to a Ldr variant |
303 | // later. |
304 | } else if (isCandidateStore(MI, MO)) { |
305 | Info.Type = MCLOH_AdrpAddStr; |
306 | Info.IsCandidate = true; |
307 | Info.MI0 = &MI; |
308 | Info.MI1 = nullptr; |
309 | } else if (MI.getOpcode() == AArch64::ADDXri) { |
310 | Info.Type = MCLOH_AdrpAdd; |
311 | Info.IsCandidate = true; |
312 | Info.MI0 = &MI; |
313 | } else if ((MI.getOpcode() == AArch64::LDRXui || |
314 | MI.getOpcode() == AArch64::LDRWui) && |
315 | MI.getOperand(i: 2).getTargetFlags() & AArch64II::MO_GOT) { |
316 | Info.Type = MCLOH_AdrpLdrGot; |
317 | Info.IsCandidate = true; |
318 | Info.MI0 = &MI; |
319 | } |
320 | } |
321 | |
322 | /// Update state \p Info given the tracked register is clobbered. |
323 | static void handleClobber(LOHInfo &Info) { |
324 | Info.IsCandidate = false; |
325 | Info.OneUser = false; |
326 | Info.MultiUsers = false; |
327 | Info.LastADRP = nullptr; |
328 | } |
329 | |
330 | /// Update state \p Info given that \p MI is possibly the middle instruction |
331 | /// of an LOH involving 3 instructions. |
332 | static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, |
333 | LOHInfo &OpInfo) { |
334 | if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser)) |
335 | return false; |
336 | // Copy LOHInfo for dest register to LOHInfo for source register. |
337 | if (&DefInfo != &OpInfo) { |
338 | OpInfo = DefInfo; |
339 | // Invalidate \p DefInfo because we track it in \p OpInfo now. |
340 | handleClobber(Info&: DefInfo); |
341 | } else |
342 | DefInfo.LastADRP = nullptr; |
343 | |
344 | // Advance state machine. |
345 | assert(OpInfo.IsCandidate && "Expect valid state" ); |
346 | if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) { |
347 | if (OpInfo.Type == MCLOH_AdrpLdr) { |
348 | OpInfo.Type = MCLOH_AdrpAddLdr; |
349 | OpInfo.IsCandidate = true; |
350 | OpInfo.MI1 = &MI; |
351 | return true; |
352 | } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { |
353 | OpInfo.Type = MCLOH_AdrpAddStr; |
354 | OpInfo.IsCandidate = true; |
355 | OpInfo.MI1 = &MI; |
356 | return true; |
357 | } |
358 | } else { |
359 | assert((MI.getOpcode() == AArch64::LDRXui || |
360 | MI.getOpcode() == AArch64::LDRWui) && |
361 | "Expect LDRXui or LDRWui" ); |
362 | assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && |
363 | "Expected GOT relocation" ); |
364 | if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { |
365 | OpInfo.Type = MCLOH_AdrpLdrGotStr; |
366 | OpInfo.IsCandidate = true; |
367 | OpInfo.MI1 = &MI; |
368 | return true; |
369 | } else if (OpInfo.Type == MCLOH_AdrpLdr) { |
370 | OpInfo.Type = MCLOH_AdrpLdrGotLdr; |
371 | OpInfo.IsCandidate = true; |
372 | OpInfo.MI1 = &MI; |
373 | return true; |
374 | } |
375 | } |
376 | return false; |
377 | } |
378 | |
379 | /// Update state when seeing and ADRP instruction. |
380 | static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, |
381 | LOHInfo &Info, LOHInfo *LOHInfos) { |
382 | if (Info.LastADRP != nullptr) { |
383 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" |
384 | << '\t' << MI << '\t' << *Info.LastADRP); |
385 | AFI.addLOHDirective(Kind: MCLOH_AdrpAdrp, Args: {&MI, Info.LastADRP}); |
386 | ++NumADRPSimpleCandidate; |
387 | } |
388 | |
389 | // Produce LOH directive if possible. |
390 | if (Info.IsCandidate) { |
391 | switch (Info.Type) { |
392 | case MCLOH_AdrpAdd: { |
393 | // ADRPs and ADDs for this candidate may be split apart if using |
394 | // GlobalISel instead of pseudo-expanded. If that happens, the |
395 | // def register of the ADD may have a use in between. Adding an LOH in |
396 | // this case can cause the linker to rewrite the ADRP to write to that |
397 | // register, clobbering the use. |
398 | const MachineInstr *AddMI = Info.MI0; |
399 | int DefIdx = mapRegToGPRIndex(Reg: MI.getOperand(i: 0).getReg()); |
400 | int OpIdx = mapRegToGPRIndex(Reg: AddMI->getOperand(i: 0).getReg()); |
401 | LOHInfo DefInfo = LOHInfos[OpIdx]; |
402 | if (DefIdx != OpIdx && (DefInfo.OneUser || DefInfo.MultiUsers)) |
403 | break; |
404 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" |
405 | << '\t' << MI << '\t' << *Info.MI0); |
406 | AFI.addLOHDirective(Kind: MCLOH_AdrpAdd, Args: {&MI, Info.MI0}); |
407 | ++NumADRSimpleCandidate; |
408 | break; |
409 | } |
410 | case MCLOH_AdrpLdr: |
411 | if (supportLoadFromLiteral(MI: *Info.MI0)) { |
412 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" |
413 | << '\t' << MI << '\t' << *Info.MI0); |
414 | AFI.addLOHDirective(Kind: MCLOH_AdrpLdr, Args: {&MI, Info.MI0}); |
415 | ++NumADRPToLDR; |
416 | } |
417 | break; |
418 | case MCLOH_AdrpAddLdr: { |
419 | // There is a possibility that the linker may try to rewrite: |
420 | // adrp x0, @sym@PAGE |
421 | // add x1, x0, @sym@PAGEOFF |
422 | // [x0 = some other def] |
423 | // ldr x2, [x1] |
424 | // ...into... |
425 | // adrp x0, @sym |
426 | // nop |
427 | // [x0 = some other def] |
428 | // ldr x2, [x0] |
429 | // ...if the offset to the symbol won't fit within a literal load. |
430 | // This causes the load to use the result of the adrp, which in this |
431 | // case has already been clobbered. |
432 | // FIXME: Implement proper liveness tracking for all registers. For now, |
433 | // don't emit the LOH if there are any instructions between the add and |
434 | // the ldr. |
435 | MachineInstr *AddMI = const_cast<MachineInstr *>(Info.MI1); |
436 | const MachineInstr *LdrMI = Info.MI0; |
437 | auto AddIt = MachineBasicBlock::iterator(AddMI); |
438 | auto EndIt = AddMI->getParent()->end(); |
439 | if (AddMI->getIterator() == EndIt || LdrMI != &*next_nodbg(It: AddIt, End: EndIt)) |
440 | break; |
441 | |
442 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" |
443 | << '\t' << MI << '\t' << *Info.MI1 << '\t' |
444 | << *Info.MI0); |
445 | AFI.addLOHDirective(Kind: MCLOH_AdrpAddLdr, Args: {&MI, Info.MI1, Info.MI0}); |
446 | ++NumADDToLDR; |
447 | break; |
448 | } |
449 | case MCLOH_AdrpAddStr: |
450 | if (Info.MI1 != nullptr) { |
451 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" |
452 | << '\t' << MI << '\t' << *Info.MI1 << '\t' |
453 | << *Info.MI0); |
454 | AFI.addLOHDirective(Kind: MCLOH_AdrpAddStr, Args: {&MI, Info.MI1, Info.MI0}); |
455 | ++NumADDToSTR; |
456 | } |
457 | break; |
458 | case MCLOH_AdrpLdrGotLdr: |
459 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" |
460 | << '\t' << MI << '\t' << *Info.MI1 << '\t' |
461 | << *Info.MI0); |
462 | AFI.addLOHDirective(Kind: MCLOH_AdrpLdrGotLdr, Args: {&MI, Info.MI1, Info.MI0}); |
463 | ++NumLDRToLDR; |
464 | break; |
465 | case MCLOH_AdrpLdrGotStr: |
466 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" |
467 | << '\t' << MI << '\t' << *Info.MI1 << '\t' |
468 | << *Info.MI0); |
469 | AFI.addLOHDirective(Kind: MCLOH_AdrpLdrGotStr, Args: {&MI, Info.MI1, Info.MI0}); |
470 | ++NumLDRToSTR; |
471 | break; |
472 | case MCLOH_AdrpLdrGot: |
473 | LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" |
474 | << '\t' << MI << '\t' << *Info.MI0); |
475 | AFI.addLOHDirective(Kind: MCLOH_AdrpLdrGot, Args: {&MI, Info.MI0}); |
476 | break; |
477 | case MCLOH_AdrpAdrp: |
478 | llvm_unreachable("MCLOH_AdrpAdrp not used in state machine" ); |
479 | } |
480 | } |
481 | |
482 | handleClobber(Info); |
483 | Info.LastADRP = &MI; |
484 | } |
485 | |
486 | static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg, |
487 | LOHInfo *LOHInfos) { |
488 | if (!MachineOperand::clobbersPhysReg(RegMask, PhysReg: Reg)) |
489 | return; |
490 | int Idx = mapRegToGPRIndex(Reg); |
491 | if (Idx >= 0) |
492 | handleClobber(Info&: LOHInfos[Idx]); |
493 | } |
494 | |
495 | static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { |
496 | // Handle defs and regmasks. |
497 | for (const MachineOperand &MO : MI.operands()) { |
498 | if (MO.isRegMask()) { |
499 | const uint32_t *RegMask = MO.getRegMask(); |
500 | for (MCPhysReg Reg : AArch64::GPR32RegClass) |
501 | handleRegMaskClobber(RegMask, Reg, LOHInfos); |
502 | for (MCPhysReg Reg : AArch64::GPR64RegClass) |
503 | handleRegMaskClobber(RegMask, Reg, LOHInfos); |
504 | continue; |
505 | } |
506 | if (!MO.isReg() || !MO.isDef()) |
507 | continue; |
508 | int Idx = mapRegToGPRIndex(Reg: MO.getReg()); |
509 | if (Idx < 0) |
510 | continue; |
511 | handleClobber(Info&: LOHInfos[Idx]); |
512 | } |
513 | // Handle uses. |
514 | |
515 | SmallSet<int, 4> UsesSeen; |
516 | for (const MachineOperand &MO : MI.uses()) { |
517 | if (!MO.isReg() || !MO.readsReg()) |
518 | continue; |
519 | int Idx = mapRegToGPRIndex(Reg: MO.getReg()); |
520 | if (Idx < 0) |
521 | continue; |
522 | |
523 | // Multiple uses of the same register within a single instruction don't |
524 | // count as MultiUser or block optimization. This is especially important on |
525 | // arm64_32, where any memory operation is likely to be an explicit use of |
526 | // xN and an implicit use of wN (the base address register). |
527 | if (UsesSeen.insert(V: Idx).second) |
528 | handleUse(MI, MO, Info&: LOHInfos[Idx]); |
529 | } |
530 | } |
531 | |
532 | bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { |
533 | if (skipFunction(F: MF.getFunction())) |
534 | return false; |
535 | |
536 | LLVM_DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n" |
537 | << "Looking in function " << MF.getName() << '\n'); |
538 | |
539 | LOHInfo LOHInfos[N_GPR_REGS]; |
540 | AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); |
541 | for (const MachineBasicBlock &MBB : MF) { |
542 | // Reset register tracking state. |
543 | memset(s: LOHInfos, c: 0, n: sizeof(LOHInfos)); |
544 | // Live-out registers are used. |
545 | for (const MachineBasicBlock *Succ : MBB.successors()) { |
546 | for (const auto &LI : Succ->liveins()) { |
547 | int RegIdx = mapRegToGPRIndex(Reg: LI.PhysReg); |
548 | if (RegIdx >= 0) |
549 | LOHInfos[RegIdx].OneUser = true; |
550 | } |
551 | } |
552 | |
553 | // Walk the basic block backwards and update the per register state machine |
554 | // in the process. |
555 | for (const MachineInstr &MI : |
556 | instructionsWithoutDebug(It: MBB.instr_rbegin(), End: MBB.instr_rend())) { |
557 | unsigned Opcode = MI.getOpcode(); |
558 | switch (Opcode) { |
559 | case AArch64::ADDXri: |
560 | case AArch64::LDRXui: |
561 | case AArch64::LDRWui: |
562 | if (canDefBePartOfLOH(MI)) { |
563 | const MachineOperand &Def = MI.getOperand(i: 0); |
564 | const MachineOperand &Op = MI.getOperand(i: 1); |
565 | assert(Def.isReg() && Def.isDef() && "Expected reg def" ); |
566 | assert(Op.isReg() && Op.isUse() && "Expected reg use" ); |
567 | int DefIdx = mapRegToGPRIndex(Reg: Def.getReg()); |
568 | int OpIdx = mapRegToGPRIndex(Reg: Op.getReg()); |
569 | if (DefIdx >= 0 && OpIdx >= 0 && |
570 | handleMiddleInst(MI, DefInfo&: LOHInfos[DefIdx], OpInfo&: LOHInfos[OpIdx])) |
571 | continue; |
572 | } |
573 | break; |
574 | case AArch64::ADRP: |
575 | const MachineOperand &Op0 = MI.getOperand(i: 0); |
576 | int Idx = mapRegToGPRIndex(Reg: Op0.getReg()); |
577 | if (Idx >= 0) { |
578 | handleADRP(MI, AFI, Info&: LOHInfos[Idx], LOHInfos); |
579 | continue; |
580 | } |
581 | break; |
582 | } |
583 | handleNormalInst(MI, LOHInfos); |
584 | } |
585 | } |
586 | |
587 | // Return "no change": The pass only collects information. |
588 | return false; |
589 | } |
590 | |
591 | FunctionPass *llvm::createAArch64CollectLOHPass() { |
592 | return new AArch64CollectLOH(); |
593 | } |
594 | |