1 | //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that lowers homogeneous prolog/epilog instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "AArch64InstrInfo.h" |
14 | #include "AArch64Subtarget.h" |
15 | #include "MCTargetDesc/AArch64InstPrinter.h" |
16 | #include "llvm/CodeGen/MachineBasicBlock.h" |
17 | #include "llvm/CodeGen/MachineFunction.h" |
18 | #include "llvm/CodeGen/MachineInstr.h" |
19 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
20 | #include "llvm/CodeGen/MachineModuleInfo.h" |
21 | #include "llvm/CodeGen/MachineOperand.h" |
22 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
23 | #include "llvm/IR/DebugLoc.h" |
24 | #include "llvm/IR/IRBuilder.h" |
25 | #include "llvm/IR/Module.h" |
26 | #include "llvm/Pass.h" |
27 | #include <optional> |
28 | #include <sstream> |
29 | |
30 | using namespace llvm; |
31 | |
32 | #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ |
33 | "AArch64 homogeneous prolog/epilog lowering pass" |
34 | |
35 | static cl::opt<int> FrameHelperSizeThreshold( |
36 | "frame-helper-size-threshold" , cl::init(Val: 2), cl::Hidden, |
37 | cl::desc("The minimum number of instructions that are outlined in a frame " |
38 | "helper (default = 2)" )); |
39 | |
40 | namespace { |
41 | |
42 | class AArch64LowerHomogeneousPE { |
43 | public: |
44 | const AArch64InstrInfo *TII; |
45 | |
46 | AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) |
47 | : M(M), MMI(MMI) {} |
48 | |
49 | bool run(); |
50 | bool runOnMachineFunction(MachineFunction &Fn); |
51 | |
52 | private: |
53 | Module *M; |
54 | MachineModuleInfo *MMI; |
55 | |
56 | bool runOnMBB(MachineBasicBlock &MBB); |
57 | bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
58 | MachineBasicBlock::iterator &NextMBBI); |
59 | |
60 | /// Lower a HOM_Prolog pseudo instruction into a helper call |
61 | /// or a sequence of homogeneous stores. |
62 | /// When a fp setup follows, it can be optimized. |
63 | bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
64 | MachineBasicBlock::iterator &NextMBBI); |
65 | /// Lower a HOM_Epilog pseudo instruction into a helper call |
66 | /// or a sequence of homogeneous loads. |
67 | /// When a return follow, it can be optimized. |
68 | bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
69 | MachineBasicBlock::iterator &NextMBBI); |
70 | }; |
71 | |
72 | class AArch64LowerHomogeneousPrologEpilog : public ModulePass { |
73 | public: |
74 | static char ID; |
75 | |
76 | AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {} |
77 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
78 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
79 | AU.addPreserved<MachineModuleInfoWrapperPass>(); |
80 | AU.setPreservesAll(); |
81 | ModulePass::getAnalysisUsage(AU); |
82 | } |
83 | bool runOnModule(Module &M) override; |
84 | |
85 | StringRef getPassName() const override { |
86 | return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; |
87 | } |
88 | }; |
89 | |
90 | } // end anonymous namespace |
91 | |
92 | char AArch64LowerHomogeneousPrologEpilog::ID = 0; |
93 | |
94 | INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, |
95 | "aarch64-lower-homogeneous-prolog-epilog" , |
96 | AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) |
97 | |
98 | bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { |
99 | if (skipModule(M)) |
100 | return false; |
101 | |
102 | MachineModuleInfo *MMI = |
103 | &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); |
104 | return AArch64LowerHomogeneousPE(&M, MMI).run(); |
105 | } |
106 | |
107 | bool AArch64LowerHomogeneousPE::run() { |
108 | bool Changed = false; |
109 | for (auto &F : *M) { |
110 | if (F.empty()) |
111 | continue; |
112 | |
113 | MachineFunction *MF = MMI->getMachineFunction(F); |
114 | if (!MF) |
115 | continue; |
116 | Changed |= runOnMachineFunction(Fn&: *MF); |
117 | } |
118 | |
119 | return Changed; |
120 | } |
121 | enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; |
122 | |
123 | /// Return a frame helper name with the given CSRs and the helper type. |
124 | /// For instance, a prolog helper that saves x19 and x20 is named as |
125 | /// OUTLINED_FUNCTION_PROLOG_x19x20. |
126 | static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, |
127 | FrameHelperType Type, unsigned FpOffset) { |
128 | std::ostringstream RegStream; |
129 | switch (Type) { |
130 | case FrameHelperType::Prolog: |
131 | RegStream << "OUTLINED_FUNCTION_PROLOG_" ; |
132 | break; |
133 | case FrameHelperType::PrologFrame: |
134 | RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_" ; |
135 | break; |
136 | case FrameHelperType::Epilog: |
137 | RegStream << "OUTLINED_FUNCTION_EPILOG_" ; |
138 | break; |
139 | case FrameHelperType::EpilogTail: |
140 | RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_" ; |
141 | break; |
142 | } |
143 | |
144 | for (auto Reg : Regs) { |
145 | if (Reg == AArch64::NoRegister) |
146 | continue; |
147 | RegStream << AArch64InstPrinter::getRegisterName(Reg); |
148 | } |
149 | |
150 | return RegStream.str(); |
151 | } |
152 | |
153 | /// Create a Function for the unique frame helper with the given name. |
154 | /// Return a newly created MachineFunction with an empty MachineBasicBlock. |
155 | static MachineFunction &createFrameHelperMachineFunction(Module *M, |
156 | MachineModuleInfo *MMI, |
157 | StringRef Name) { |
158 | LLVMContext &C = M->getContext(); |
159 | Function *F = M->getFunction(Name); |
160 | assert(F == nullptr && "Function has been created before" ); |
161 | F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false), |
162 | Linkage: Function::ExternalLinkage, N: Name, M); |
163 | assert(F && "Function was null!" ); |
164 | |
165 | // Use ODR linkage to avoid duplication. |
166 | F->setLinkage(GlobalValue::LinkOnceODRLinkage); |
167 | F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); |
168 | |
169 | // Set minsize, so we don't insert padding between outlined functions. |
170 | F->addFnAttr(Kind: Attribute::NoInline); |
171 | F->addFnAttr(Kind: Attribute::MinSize); |
172 | F->addFnAttr(Kind: Attribute::Naked); |
173 | |
174 | MachineFunction &MF = MMI->getOrCreateMachineFunction(F&: *F); |
175 | // Remove unnecessary register liveness and set NoVRegs. |
176 | MF.getProperties().resetTracksLiveness().resetIsSSA().setNoVRegs(); |
177 | MF.getRegInfo().freezeReservedRegs(); |
178 | |
179 | // Create entry block. |
180 | BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry" , Parent: F); |
181 | IRBuilder<> Builder(EntryBB); |
182 | Builder.CreateRetVoid(); |
183 | |
184 | // Insert the new block into the function. |
185 | MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); |
186 | MF.insert(MBBI: MF.begin(), MBB); |
187 | |
188 | return MF; |
189 | } |
190 | |
191 | /// Emit a store-pair instruction for frame-setup. |
192 | /// If Reg2 is AArch64::NoRegister, emit STR instead. |
193 | static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, |
194 | MachineBasicBlock::iterator Pos, |
195 | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
196 | int Offset, bool IsPreDec) { |
197 | assert(Reg1 != AArch64::NoRegister); |
198 | const bool IsPaired = Reg2 != AArch64::NoRegister; |
199 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1); |
200 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); |
201 | unsigned Opc; |
202 | if (IsPreDec) { |
203 | if (IsFloat) |
204 | Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; |
205 | else |
206 | Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; |
207 | } else { |
208 | if (IsFloat) |
209 | Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; |
210 | else |
211 | Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; |
212 | } |
213 | // The implicit scale for Offset is 8. |
214 | TypeSize Scale(0U, false), Width(0U, false); |
215 | int64_t MinOffset, MaxOffset; |
216 | [[maybe_unused]] bool Success = |
217 | AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset); |
218 | assert(Success && "Invalid Opcode" ); |
219 | Offset *= (8 / (int)Scale); |
220 | |
221 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc)); |
222 | if (IsPreDec) |
223 | MIB.addDef(RegNo: AArch64::SP); |
224 | if (IsPaired) |
225 | MIB.addReg(RegNo: Reg2); |
226 | MIB.addReg(RegNo: Reg1) |
227 | .addReg(RegNo: AArch64::SP) |
228 | .addImm(Val: Offset) |
229 | .setMIFlag(MachineInstr::FrameSetup); |
230 | } |
231 | |
232 | /// Emit a load-pair instruction for frame-destroy. |
233 | /// If Reg2 is AArch64::NoRegister, emit LDR instead. |
234 | static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, |
235 | MachineBasicBlock::iterator Pos, |
236 | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
237 | int Offset, bool IsPostDec) { |
238 | assert(Reg1 != AArch64::NoRegister); |
239 | const bool IsPaired = Reg2 != AArch64::NoRegister; |
240 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1); |
241 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); |
242 | unsigned Opc; |
243 | if (IsPostDec) { |
244 | if (IsFloat) |
245 | Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; |
246 | else |
247 | Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; |
248 | } else { |
249 | if (IsFloat) |
250 | Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; |
251 | else |
252 | Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; |
253 | } |
254 | // The implicit scale for Offset is 8. |
255 | TypeSize Scale(0U, false), Width(0U, false); |
256 | int64_t MinOffset, MaxOffset; |
257 | [[maybe_unused]] bool Success = |
258 | AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset); |
259 | assert(Success && "Invalid Opcode" ); |
260 | Offset *= (8 / (int)Scale); |
261 | |
262 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc)); |
263 | if (IsPostDec) |
264 | MIB.addDef(RegNo: AArch64::SP); |
265 | if (IsPaired) |
266 | MIB.addReg(RegNo: Reg2, flags: getDefRegState(B: true)); |
267 | MIB.addReg(RegNo: Reg1, flags: getDefRegState(B: true)) |
268 | .addReg(RegNo: AArch64::SP) |
269 | .addImm(Val: Offset) |
270 | .setMIFlag(MachineInstr::FrameDestroy); |
271 | } |
272 | |
273 | /// Return a unique function if a helper can be formed with the given Regs |
274 | /// and frame type. |
275 | /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: |
276 | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
277 | /// stp x20, x19, [sp, #16] |
278 | /// ret |
279 | /// |
280 | /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: |
281 | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
282 | /// stp x20, x19, [sp, #16] |
283 | /// add fp, sp, #32 |
284 | /// ret |
285 | /// |
286 | /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: |
287 | /// mov x16, x30 |
288 | /// ldp x29, x30, [sp, #32] |
289 | /// ldp x20, x19, [sp, #16] |
290 | /// ldp x22, x21, [sp], #48 |
291 | /// ret x16 |
292 | /// |
293 | /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: |
294 | /// ldp x29, x30, [sp, #32] |
295 | /// ldp x20, x19, [sp, #16] |
296 | /// ldp x22, x21, [sp], #48 |
297 | /// ret |
298 | /// @param M module |
299 | /// @param MMI machine module info |
300 | /// @param Regs callee save regs that the helper will handle |
301 | /// @param Type frame helper type |
302 | /// @return a helper function |
303 | static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, |
304 | SmallVectorImpl<unsigned> &Regs, |
305 | FrameHelperType Type, |
306 | unsigned FpOffset = 0) { |
307 | assert(Regs.size() >= 2); |
308 | auto Name = getFrameHelperName(Regs, Type, FpOffset); |
309 | auto *F = M->getFunction(Name); |
310 | if (F) |
311 | return F; |
312 | |
313 | auto &MF = createFrameHelperMachineFunction(M, MMI, Name); |
314 | MachineBasicBlock &MBB = *MF.begin(); |
315 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
316 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
317 | |
318 | int Size = (int)Regs.size(); |
319 | switch (Type) { |
320 | case FrameHelperType::Prolog: |
321 | case FrameHelperType::PrologFrame: { |
322 | // Compute the remaining SP adjust beyond FP/LR. |
323 | auto LRIdx = std::distance(first: Regs.begin(), last: llvm::find(Range&: Regs, Val: AArch64::LR)); |
324 | |
325 | // If the register stored to the lowest address is not LR, we must subtract |
326 | // more from SP here. |
327 | if (LRIdx != Size - 2) { |
328 | assert(Regs[Size - 2] != AArch64::LR); |
329 | emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], |
330 | Offset: LRIdx - Size + 2, IsPreDec: true); |
331 | } |
332 | |
333 | // Store CSRs in the reverse order. |
334 | for (int I = Size - 3; I >= 0; I -= 2) { |
335 | // FP/LR has been stored at call-site. |
336 | if (Regs[I - 1] == AArch64::LR) |
337 | continue; |
338 | emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1, |
339 | IsPreDec: false); |
340 | } |
341 | if (Type == FrameHelperType::PrologFrame) |
342 | BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri)) |
343 | .addDef(RegNo: AArch64::FP) |
344 | .addUse(RegNo: AArch64::SP) |
345 | .addImm(Val: FpOffset) |
346 | .addImm(Val: 0) |
347 | .setMIFlag(MachineInstr::FrameSetup); |
348 | |
349 | BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET)) |
350 | .addReg(RegNo: AArch64::LR); |
351 | break; |
352 | } |
353 | case FrameHelperType::Epilog: |
354 | case FrameHelperType::EpilogTail: |
355 | if (Type == FrameHelperType::Epilog) |
356 | // Stash LR to X16 |
357 | BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ORRXrs)) |
358 | .addDef(RegNo: AArch64::X16) |
359 | .addReg(RegNo: AArch64::XZR) |
360 | .addUse(RegNo: AArch64::LR) |
361 | .addImm(Val: 0); |
362 | |
363 | for (int I = 0; I < Size - 2; I += 2) |
364 | emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2, |
365 | IsPostDec: false); |
366 | // Restore the last CSR with post-increment of SP. |
367 | emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size, |
368 | IsPostDec: true); |
369 | |
370 | BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET)) |
371 | .addReg(RegNo: Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); |
372 | break; |
373 | } |
374 | |
375 | return M->getFunction(Name); |
376 | } |
377 | |
378 | /// This function checks if a frame helper should be used for |
379 | /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. |
380 | /// @param MBB machine basic block |
381 | /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog |
382 | /// @param Regs callee save registers that are saved or restored. |
383 | /// @param Type frame helper type |
384 | /// @return True if a use of helper is qualified. |
385 | static bool shouldUseFrameHelper(MachineBasicBlock &MBB, |
386 | MachineBasicBlock::iterator &NextMBBI, |
387 | SmallVectorImpl<unsigned> &Regs, |
388 | FrameHelperType Type) { |
389 | const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
390 | auto RegCount = Regs.size(); |
391 | assert(RegCount > 0 && (RegCount % 2 == 0)); |
392 | // # of instructions that will be outlined. |
393 | int InstCount = RegCount / 2; |
394 | |
395 | // Do not use a helper call when not saving LR. |
396 | if (!llvm::is_contained(Range&: Regs, Element: AArch64::LR)) |
397 | return false; |
398 | |
399 | switch (Type) { |
400 | case FrameHelperType::Prolog: |
401 | // Prolog helper cannot save FP/LR. |
402 | InstCount--; |
403 | break; |
404 | case FrameHelperType::PrologFrame: { |
405 | // Effectively no change in InstCount since FpAdjustment is included. |
406 | break; |
407 | } |
408 | case FrameHelperType::Epilog: |
409 | // Bail-out if X16 is live across the epilog helper because it is used in |
410 | // the helper to handle X30. |
411 | for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { |
412 | if (NextMI->readsRegister(Reg: AArch64::W16, TRI)) |
413 | return false; |
414 | } |
415 | // Epilog may not be in the last block. Check the liveness in successors. |
416 | for (const MachineBasicBlock *SuccMBB : MBB.successors()) { |
417 | if (SuccMBB->isLiveIn(Reg: AArch64::W16) || SuccMBB->isLiveIn(Reg: AArch64::X16)) |
418 | return false; |
419 | } |
420 | // No change in InstCount for the regular epilog case. |
421 | break; |
422 | case FrameHelperType::EpilogTail: { |
423 | // EpilogTail helper includes the caller's return. |
424 | if (NextMBBI == MBB.end()) |
425 | return false; |
426 | if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) |
427 | return false; |
428 | InstCount++; |
429 | break; |
430 | } |
431 | } |
432 | |
433 | return InstCount >= FrameHelperSizeThreshold; |
434 | } |
435 | |
436 | /// Lower a HOM_Epilog pseudo instruction into a helper call while |
437 | /// creating the helper on demand. Or emit a sequence of loads in place when not |
438 | /// using a helper call. |
439 | /// |
440 | /// 1. With a helper including ret |
441 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI |
442 | /// ret ; NextMBBI |
443 | /// => |
444 | /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 |
445 | /// ... ; NextMBBI |
446 | /// |
447 | /// 2. With a helper |
448 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
449 | /// => |
450 | /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 |
451 | /// |
452 | /// 3. Without a helper |
453 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
454 | /// => |
455 | /// ldp x29, x30, [sp, #32] |
456 | /// ldp x20, x19, [sp, #16] |
457 | /// ldp x22, x21, [sp], #48 |
458 | bool AArch64LowerHomogeneousPE::lowerEpilog( |
459 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
460 | MachineBasicBlock::iterator &NextMBBI) { |
461 | auto &MF = *MBB.getParent(); |
462 | MachineInstr &MI = *MBBI; |
463 | |
464 | DebugLoc DL = MI.getDebugLoc(); |
465 | SmallVector<unsigned, 8> Regs; |
466 | bool HasUnpairedReg = false; |
467 | for (auto &MO : MI.operands()) |
468 | if (MO.isReg()) { |
469 | if (!MO.getReg().isValid()) { |
470 | // For now we are only expecting unpaired GP registers which should |
471 | // occur exactly once. |
472 | assert(!HasUnpairedReg); |
473 | HasUnpairedReg = true; |
474 | } |
475 | Regs.push_back(Elt: MO.getReg()); |
476 | } |
477 | (void)HasUnpairedReg; |
478 | int Size = (int)Regs.size(); |
479 | if (Size == 0) |
480 | return false; |
481 | // Registers are in pair. |
482 | assert(Size % 2 == 0); |
483 | assert(MI.getOpcode() == AArch64::HOM_Epilog); |
484 | |
485 | auto Return = NextMBBI; |
486 | if (shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::EpilogTail)) { |
487 | // When MBB ends with a return, emit a tail-call to the epilog helper |
488 | auto *EpilogTailHelper = |
489 | getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::EpilogTail); |
490 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::TCRETURNdi)) |
491 | .addGlobalAddress(GV: EpilogTailHelper) |
492 | .addImm(Val: 0) |
493 | .setMIFlag(MachineInstr::FrameDestroy) |
494 | .copyImplicitOps(OtherMI: MI) |
495 | .copyImplicitOps(OtherMI: *Return); |
496 | NextMBBI = std::next(x: Return); |
497 | Return->removeFromParent(); |
498 | } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, |
499 | Type: FrameHelperType::Epilog)) { |
500 | // The default epilog helper case. |
501 | auto *EpilogHelper = |
502 | getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Epilog); |
503 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL)) |
504 | .addGlobalAddress(GV: EpilogHelper) |
505 | .setMIFlag(MachineInstr::FrameDestroy) |
506 | .copyImplicitOps(OtherMI: MI); |
507 | } else { |
508 | // Fall back to no-helper. |
509 | for (int I = 0; I < Size - 2; I += 2) |
510 | emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2, IsPostDec: false); |
511 | // Restore the last CSR with post-increment of SP. |
512 | emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size, IsPostDec: true); |
513 | } |
514 | |
515 | MBBI->removeFromParent(); |
516 | return true; |
517 | } |
518 | |
519 | /// Lower a HOM_Prolog pseudo instruction into a helper call while |
520 | /// creating the helper on demand. Or emit a sequence of stores in place when |
521 | /// not using a helper call. |
522 | /// |
523 | /// 1. With a helper including frame-setup |
524 | /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 |
525 | /// => |
526 | /// stp x29, x30, [sp, #-16]! |
527 | /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 |
528 | /// |
529 | /// 2. With a helper |
530 | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
531 | /// => |
532 | /// stp x29, x30, [sp, #-16]! |
533 | /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 |
534 | /// |
535 | /// 3. Without a helper |
536 | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
537 | /// => |
538 | /// stp x22, x21, [sp, #-48]! |
539 | /// stp x20, x19, [sp, #16] |
540 | /// stp x29, x30, [sp, #32] |
541 | bool AArch64LowerHomogeneousPE::lowerProlog( |
542 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
543 | MachineBasicBlock::iterator &NextMBBI) { |
544 | auto &MF = *MBB.getParent(); |
545 | MachineInstr &MI = *MBBI; |
546 | |
547 | DebugLoc DL = MI.getDebugLoc(); |
548 | SmallVector<unsigned, 8> Regs; |
549 | bool HasUnpairedReg = false; |
550 | int LRIdx = 0; |
551 | std::optional<int> FpOffset; |
552 | for (auto &MO : MI.operands()) { |
553 | if (MO.isReg()) { |
554 | if (MO.getReg().isValid()) { |
555 | if (MO.getReg() == AArch64::LR) |
556 | LRIdx = Regs.size(); |
557 | } else { |
558 | // For now we are only expecting unpaired GP registers which should |
559 | // occur exactly once. |
560 | assert(!HasUnpairedReg); |
561 | HasUnpairedReg = true; |
562 | } |
563 | Regs.push_back(Elt: MO.getReg()); |
564 | } else if (MO.isImm()) { |
565 | FpOffset = MO.getImm(); |
566 | } |
567 | } |
568 | (void)HasUnpairedReg; |
569 | int Size = (int)Regs.size(); |
570 | if (Size == 0) |
571 | return false; |
572 | // Allow compact unwind case only for oww. |
573 | assert(Size % 2 == 0); |
574 | assert(MI.getOpcode() == AArch64::HOM_Prolog); |
575 | |
576 | if (FpOffset && |
577 | shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::PrologFrame)) { |
578 | // FP/LR is stored at the top of stack before the prolog helper call. |
579 | emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true); |
580 | auto *PrologFrameHelper = getOrCreateFrameHelper( |
581 | M, MMI, Regs, Type: FrameHelperType::PrologFrame, FpOffset: *FpOffset); |
582 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL)) |
583 | .addGlobalAddress(GV: PrologFrameHelper) |
584 | .setMIFlag(MachineInstr::FrameSetup) |
585 | .copyImplicitOps(OtherMI: MI) |
586 | .addReg(RegNo: AArch64::FP, flags: RegState::Implicit | RegState::Define) |
587 | .addReg(RegNo: AArch64::SP, flags: RegState::Implicit); |
588 | } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, |
589 | Type: FrameHelperType::Prolog)) { |
590 | // FP/LR is stored at the top of stack before the prolog helper call. |
591 | emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true); |
592 | auto *PrologHelper = |
593 | getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Prolog); |
594 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL)) |
595 | .addGlobalAddress(GV: PrologHelper) |
596 | .setMIFlag(MachineInstr::FrameSetup) |
597 | .copyImplicitOps(OtherMI: MI); |
598 | } else { |
599 | // Fall back to no-helper. |
600 | emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: -Size, IsPreDec: true); |
601 | for (int I = Size - 3; I >= 0; I -= 2) |
602 | emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1, IsPreDec: false); |
603 | if (FpOffset) { |
604 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri)) |
605 | .addDef(RegNo: AArch64::FP) |
606 | .addUse(RegNo: AArch64::SP) |
607 | .addImm(Val: *FpOffset) |
608 | .addImm(Val: 0) |
609 | .setMIFlag(MachineInstr::FrameSetup); |
610 | } |
611 | } |
612 | |
613 | MBBI->removeFromParent(); |
614 | return true; |
615 | } |
616 | |
617 | /// Process each machine instruction |
618 | /// @param MBB machine basic block |
619 | /// @param MBBI current instruction iterator |
620 | /// @param NextMBBI next instruction iterator which can be updated |
621 | /// @return True when IR is changed. |
622 | bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, |
623 | MachineBasicBlock::iterator MBBI, |
624 | MachineBasicBlock::iterator &NextMBBI) { |
625 | MachineInstr &MI = *MBBI; |
626 | unsigned Opcode = MI.getOpcode(); |
627 | switch (Opcode) { |
628 | default: |
629 | break; |
630 | case AArch64::HOM_Prolog: |
631 | return lowerProlog(MBB, MBBI, NextMBBI); |
632 | case AArch64::HOM_Epilog: |
633 | return lowerEpilog(MBB, MBBI, NextMBBI); |
634 | } |
635 | return false; |
636 | } |
637 | |
638 | bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { |
639 | bool Modified = false; |
640 | |
641 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
642 | while (MBBI != E) { |
643 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
644 | Modified |= runOnMI(MBB, MBBI, NextMBBI&: NMBBI); |
645 | MBBI = NMBBI; |
646 | } |
647 | |
648 | return Modified; |
649 | } |
650 | |
651 | bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { |
652 | TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); |
653 | |
654 | bool Modified = false; |
655 | for (auto &MBB : MF) |
656 | Modified |= runOnMBB(MBB); |
657 | return Modified; |
658 | } |
659 | |
660 | ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { |
661 | return new AArch64LowerHomogeneousPrologEpilog(); |
662 | } |
663 | |