1//===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that lowers homogeneous prolog/epilog instructions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64.h"
14#include "AArch64InstrInfo.h"
15#include "AArch64Subtarget.h"
16#include "MCTargetDesc/AArch64InstPrinter.h"
17#include "llvm/CodeGen/MachineBasicBlock.h"
18#include "llvm/CodeGen/MachineFunction.h"
19#include "llvm/CodeGen/MachineFunctionAnalysis.h"
20#include "llvm/CodeGen/MachineInstr.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineModuleInfo.h"
23#include "llvm/CodeGen/MachineOperand.h"
24#include "llvm/CodeGen/TargetSubtargetInfo.h"
25#include "llvm/IR/DebugLoc.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Module.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
30#include <optional>
31#include <sstream>
32
33using namespace llvm;
34
35#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
36 "AArch64 homogeneous prolog/epilog lowering pass"
37
38static cl::opt<int> FrameHelperSizeThreshold(
39 "frame-helper-size-threshold", cl::init(Val: 2), cl::Hidden,
40 cl::desc("The minimum number of instructions that are outlined in a frame "
41 "helper (default = 2)"));
42
43namespace {
44
45class AArch64LowerHomogeneousPrologEpilogImpl {
46public:
47 const AArch64InstrInfo *TII;
48
49 AArch64LowerHomogeneousPrologEpilogImpl(Module *M, MachineModuleInfo *MMI)
50 : M(M), MMI(MMI) {}
51
52 bool run();
53 bool runOnMachineFunction(MachineFunction &Fn);
54
55private:
56 Module *M;
57 MachineModuleInfo *MMI;
58
59 bool runOnMBB(MachineBasicBlock &MBB);
60 bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
61 MachineBasicBlock::iterator &NextMBBI);
62
63 /// Lower a HOM_Prolog pseudo instruction into a helper call
64 /// or a sequence of homogeneous stores.
65 /// When a fp setup follows, it can be optimized.
66 bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
67 MachineBasicBlock::iterator &NextMBBI);
68 /// Lower a HOM_Epilog pseudo instruction into a helper call
69 /// or a sequence of homogeneous loads.
70 /// When a return follow, it can be optimized.
71 bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
72 MachineBasicBlock::iterator &NextMBBI);
73};
74
75class AArch64LowerHomogeneousPrologEpilogLegacy : public ModulePass {
76public:
77 static char ID;
78
79 AArch64LowerHomogeneousPrologEpilogLegacy() : ModulePass(ID) {}
80 void getAnalysisUsage(AnalysisUsage &AU) const override {
81 AU.addRequired<MachineModuleInfoWrapperPass>();
82 AU.addPreserved<MachineModuleInfoWrapperPass>();
83 AU.setPreservesAll();
84 ModulePass::getAnalysisUsage(AU);
85 }
86 bool runOnModule(Module &M) override;
87
88 StringRef getPassName() const override {
89 return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
90 }
91};
92
93} // end anonymous namespace
94
95char AArch64LowerHomogeneousPrologEpilogLegacy::ID = 0;
96
97INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilogLegacy,
98 "aarch64-lower-homogeneous-prolog-epilog",
99 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
100
101bool AArch64LowerHomogeneousPrologEpilogLegacy::runOnModule(Module &M) {
102 if (skipModule(M))
103 return false;
104
105 MachineModuleInfo *MMI =
106 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
107 return AArch64LowerHomogeneousPrologEpilogImpl(&M, MMI).run();
108}
109
110PreservedAnalyses
111AArch64LowerHomogeneousPrologEpilogPass::run(Module &M,
112 ModuleAnalysisManager &MAM) {
113 MachineModuleInfo *MMI = &MAM.getResult<MachineModuleAnalysis>(IR&: M).getMMI();
114 bool Changed = AArch64LowerHomogeneousPrologEpilogImpl(&M, MMI).run();
115 if (!Changed)
116 return PreservedAnalyses::all();
117 PreservedAnalyses PA;
118 PA.preserve<MachineModuleAnalysis>();
119 return PA;
120}
121
122bool AArch64LowerHomogeneousPrologEpilogImpl::run() {
123 bool Changed = false;
124 for (auto &F : *M) {
125 if (F.empty())
126 continue;
127
128 MachineFunction *MF = MMI->getMachineFunction(F);
129 if (!MF)
130 continue;
131 Changed |= runOnMachineFunction(Fn&: *MF);
132 }
133
134 return Changed;
135}
136enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
137
138/// Return a frame helper name with the given CSRs and the helper type.
139/// For instance, a prolog helper that saves x19 and x20 is named as
140/// OUTLINED_FUNCTION_PROLOG_x19x20.
141static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
142 FrameHelperType Type, unsigned FpOffset) {
143 std::ostringstream RegStream;
144 switch (Type) {
145 case FrameHelperType::Prolog:
146 RegStream << "OUTLINED_FUNCTION_PROLOG_";
147 break;
148 case FrameHelperType::PrologFrame:
149 RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
150 break;
151 case FrameHelperType::Epilog:
152 RegStream << "OUTLINED_FUNCTION_EPILOG_";
153 break;
154 case FrameHelperType::EpilogTail:
155 RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
156 break;
157 }
158
159 for (auto Reg : Regs) {
160 if (Reg == AArch64::NoRegister)
161 continue;
162 RegStream << AArch64InstPrinter::getRegisterName(Reg);
163 }
164
165 return RegStream.str();
166}
167
168/// Create a Function for the unique frame helper with the given name.
169/// Return a newly created MachineFunction with an empty MachineBasicBlock.
170static MachineFunction &createFrameHelperMachineFunction(Module *M,
171 MachineModuleInfo *MMI,
172 StringRef Name) {
173 LLVMContext &C = M->getContext();
174 Function *F = M->getFunction(Name);
175 assert(F == nullptr && "Function has been created before");
176 F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false),
177 Linkage: Function::ExternalLinkage, N: Name, M);
178 assert(F && "Function was null!");
179
180 // Use ODR linkage to avoid duplication.
181 F->setLinkage(GlobalValue::LinkOnceODRLinkage);
182 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
183
184 // Set minsize, so we don't insert padding between outlined functions.
185 F->addFnAttr(Kind: Attribute::NoInline);
186 F->addFnAttr(Kind: Attribute::MinSize);
187 F->addFnAttr(Kind: Attribute::Naked);
188
189 MachineFunction &MF = MMI->getOrCreateMachineFunction(F&: *F);
190 // Remove unnecessary register liveness and set NoVRegs.
191 MF.getProperties().resetTracksLiveness().resetIsSSA().setNoVRegs();
192 MF.getRegInfo().freezeReservedRegs();
193
194 // Create entry block.
195 BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry", Parent: F);
196 IRBuilder<> Builder(EntryBB);
197 Builder.CreateRetVoid();
198
199 // Insert the new block into the function.
200 MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
201 MF.insert(MBBI: MF.begin(), MBB);
202
203 return MF;
204}
205
206/// Emit a store-pair instruction for frame-setup.
207/// If Reg2 is AArch64::NoRegister, emit STR instead.
208static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
209 MachineBasicBlock::iterator Pos,
210 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
211 int Offset, bool IsPreDec) {
212 assert(Reg1 != AArch64::NoRegister);
213 const bool IsPaired = Reg2 != AArch64::NoRegister;
214 bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1);
215 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
216 unsigned Opc;
217 if (IsPreDec) {
218 if (IsFloat)
219 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
220 else
221 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
222 } else {
223 if (IsFloat)
224 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
225 else
226 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
227 }
228 // The implicit scale for Offset is 8.
229 TypeSize Scale(0U, false), Width(0U, false);
230 int64_t MinOffset, MaxOffset;
231 [[maybe_unused]] bool Success =
232 AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset);
233 assert(Success && "Invalid Opcode");
234 Offset *= (8 / (int)Scale);
235
236 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc));
237 if (IsPreDec)
238 MIB.addDef(RegNo: AArch64::SP);
239 if (IsPaired)
240 MIB.addReg(RegNo: Reg2);
241 MIB.addReg(RegNo: Reg1)
242 .addReg(RegNo: AArch64::SP)
243 .addImm(Val: Offset)
244 .setMIFlag(MachineInstr::FrameSetup);
245}
246
247/// Emit a load-pair instruction for frame-destroy.
248/// If Reg2 is AArch64::NoRegister, emit LDR instead.
249static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
250 MachineBasicBlock::iterator Pos,
251 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
252 int Offset, bool IsPostDec) {
253 assert(Reg1 != AArch64::NoRegister);
254 const bool IsPaired = Reg2 != AArch64::NoRegister;
255 bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1);
256 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
257 unsigned Opc;
258 if (IsPostDec) {
259 if (IsFloat)
260 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
261 else
262 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
263 } else {
264 if (IsFloat)
265 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
266 else
267 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
268 }
269 // The implicit scale for Offset is 8.
270 TypeSize Scale(0U, false), Width(0U, false);
271 int64_t MinOffset, MaxOffset;
272 [[maybe_unused]] bool Success =
273 AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset);
274 assert(Success && "Invalid Opcode");
275 Offset *= (8 / (int)Scale);
276
277 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc));
278 if (IsPostDec)
279 MIB.addDef(RegNo: AArch64::SP);
280 if (IsPaired)
281 MIB.addReg(RegNo: Reg2, Flags: getDefRegState(B: true));
282 MIB.addReg(RegNo: Reg1, Flags: getDefRegState(B: true))
283 .addReg(RegNo: AArch64::SP)
284 .addImm(Val: Offset)
285 .setMIFlag(MachineInstr::FrameDestroy);
286}
287
288/// Return a unique function if a helper can be formed with the given Regs
289/// and frame type.
290/// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
291/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
292/// stp x20, x19, [sp, #16]
293/// ret
294///
295/// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
296/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
297/// stp x20, x19, [sp, #16]
298/// add fp, sp, #32
299/// ret
300///
301/// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
302/// mov x16, x30
303/// ldp x29, x30, [sp, #32]
304/// ldp x20, x19, [sp, #16]
305/// ldp x22, x21, [sp], #48
306/// ret x16
307///
308/// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
309/// ldp x29, x30, [sp, #32]
310/// ldp x20, x19, [sp, #16]
311/// ldp x22, x21, [sp], #48
312/// ret
313/// @param M module
314/// @param MMI machine module info
315/// @param Regs callee save regs that the helper will handle
316/// @param Type frame helper type
317/// @return a helper function
318static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
319 SmallVectorImpl<unsigned> &Regs,
320 FrameHelperType Type,
321 unsigned FpOffset = 0) {
322 assert(Regs.size() >= 2);
323 auto Name = getFrameHelperName(Regs, Type, FpOffset);
324 auto *F = M->getFunction(Name);
325 if (F)
326 return F;
327
328 auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
329 MachineBasicBlock &MBB = *MF.begin();
330 const TargetSubtargetInfo &STI = MF.getSubtarget();
331 const TargetInstrInfo &TII = *STI.getInstrInfo();
332
333 int Size = (int)Regs.size();
334 switch (Type) {
335 case FrameHelperType::Prolog:
336 case FrameHelperType::PrologFrame: {
337 // Compute the remaining SP adjust beyond FP/LR.
338 auto LRIdx = std::distance(first: Regs.begin(), last: llvm::find(Range&: Regs, Val: AArch64::LR));
339
340 // If the register stored to the lowest address is not LR, we must subtract
341 // more from SP here.
342 if (LRIdx != Size - 2) {
343 assert(Regs[Size - 2] != AArch64::LR);
344 emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1],
345 Offset: LRIdx - Size + 2, IsPreDec: true);
346 }
347
348 // Store CSRs in the reverse order.
349 for (int I = Size - 3; I >= 0; I -= 2) {
350 // FP/LR has been stored at call-site.
351 if (Regs[I - 1] == AArch64::LR)
352 continue;
353 emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1,
354 IsPreDec: false);
355 }
356 if (Type == FrameHelperType::PrologFrame)
357 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
358 .addDef(RegNo: AArch64::FP)
359 .addUse(RegNo: AArch64::SP)
360 .addImm(Val: FpOffset)
361 .addImm(Val: 0)
362 .setMIFlag(MachineInstr::FrameSetup);
363
364 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET))
365 .addReg(RegNo: AArch64::LR);
366 break;
367 }
368 case FrameHelperType::Epilog:
369 case FrameHelperType::EpilogTail:
370 if (Type == FrameHelperType::Epilog)
371 // Stash LR to X16
372 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ORRXrs))
373 .addDef(RegNo: AArch64::X16)
374 .addReg(RegNo: AArch64::XZR)
375 .addUse(RegNo: AArch64::LR)
376 .addImm(Val: 0);
377
378 for (int I = 0; I < Size - 2; I += 2)
379 emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2,
380 IsPostDec: false);
381 // Restore the last CSR with post-increment of SP.
382 emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size,
383 IsPostDec: true);
384
385 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET))
386 .addReg(RegNo: Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
387 break;
388 }
389
390 return M->getFunction(Name);
391}
392
393/// This function checks if a frame helper should be used for
394/// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
395/// @param MBB machine basic block
396/// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
397/// @param Regs callee save registers that are saved or restored.
398/// @param Type frame helper type
399/// @return True if a use of helper is qualified.
400static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
401 MachineBasicBlock::iterator &NextMBBI,
402 SmallVectorImpl<unsigned> &Regs,
403 FrameHelperType Type) {
404 const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
405 auto RegCount = Regs.size();
406 assert(RegCount > 0 && (RegCount % 2 == 0));
407 // # of instructions that will be outlined.
408 int InstCount = RegCount / 2;
409
410 // Do not use a helper call when not saving LR.
411 if (!llvm::is_contained(Range&: Regs, Element: AArch64::LR))
412 return false;
413
414 switch (Type) {
415 case FrameHelperType::Prolog:
416 // Prolog helper cannot save FP/LR.
417 InstCount--;
418 break;
419 case FrameHelperType::PrologFrame: {
420 // Effectively no change in InstCount since FpAdjustment is included.
421 break;
422 }
423 case FrameHelperType::Epilog:
424 // Bail-out if X16 is live across the epilog helper because it is used in
425 // the helper to handle X30.
426 for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
427 if (NextMI->readsRegister(Reg: AArch64::W16, TRI))
428 return false;
429 }
430 // Epilog may not be in the last block. Check the liveness in successors.
431 for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
432 if (SuccMBB->isLiveIn(Reg: AArch64::W16) || SuccMBB->isLiveIn(Reg: AArch64::X16))
433 return false;
434 }
435 // No change in InstCount for the regular epilog case.
436 break;
437 case FrameHelperType::EpilogTail: {
438 // EpilogTail helper includes the caller's return.
439 if (NextMBBI == MBB.end())
440 return false;
441 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
442 return false;
443 InstCount++;
444 break;
445 }
446 }
447
448 return InstCount >= FrameHelperSizeThreshold;
449}
450
451/// Lower a HOM_Epilog pseudo instruction into a helper call while
452/// creating the helper on demand. Or emit a sequence of loads in place when not
453/// using a helper call.
454///
455/// 1. With a helper including ret
456/// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
457/// ret ; NextMBBI
458/// =>
459/// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
460/// ... ; NextMBBI
461///
462/// 2. With a helper
463/// HOM_Epilog x30, x29, x19, x20, x21, x22
464/// =>
465/// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
466///
467/// 3. Without a helper
468/// HOM_Epilog x30, x29, x19, x20, x21, x22
469/// =>
470/// ldp x29, x30, [sp, #32]
471/// ldp x20, x19, [sp, #16]
472/// ldp x22, x21, [sp], #48
473bool AArch64LowerHomogeneousPrologEpilogImpl::lowerEpilog(
474 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
475 MachineBasicBlock::iterator &NextMBBI) {
476 auto &MF = *MBB.getParent();
477 MachineInstr &MI = *MBBI;
478
479 DebugLoc DL = MI.getDebugLoc();
480 SmallVector<unsigned, 8> Regs;
481 bool HasUnpairedReg = false;
482 for (auto &MO : MI.operands())
483 if (MO.isReg()) {
484 if (!MO.getReg().isValid()) {
485 // For now we are only expecting unpaired GP registers which should
486 // occur exactly once.
487 assert(!HasUnpairedReg);
488 HasUnpairedReg = true;
489 }
490 Regs.push_back(Elt: MO.getReg());
491 }
492 (void)HasUnpairedReg;
493 int Size = (int)Regs.size();
494 if (Size == 0)
495 return false;
496 // Registers are in pair.
497 assert(Size % 2 == 0);
498 assert(MI.getOpcode() == AArch64::HOM_Epilog);
499
500 auto Return = NextMBBI;
501 MachineInstr *HelperCall = nullptr;
502 if (shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::EpilogTail)) {
503 // When MBB ends with a return, emit a tail-call to the epilog helper
504 auto *EpilogTailHelper =
505 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::EpilogTail);
506 HelperCall = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::TCRETURNdi))
507 .addGlobalAddress(GV: EpilogTailHelper)
508 .addImm(Val: 0)
509 .setMIFlag(MachineInstr::FrameDestroy)
510 .copyImplicitOps(OtherMI: MI)
511 .copyImplicitOps(OtherMI: *Return);
512 NextMBBI = std::next(x: Return);
513 Return->removeFromParent();
514 } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
515 Type: FrameHelperType::Epilog)) {
516 // The default epilog helper case.
517 auto *EpilogHelper =
518 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Epilog);
519 HelperCall = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
520 .addGlobalAddress(GV: EpilogHelper)
521 .setMIFlag(MachineInstr::FrameDestroy)
522 .copyImplicitOps(OtherMI: MI);
523 } else {
524 // Fall back to no-helper.
525 for (int I = 0; I < Size - 2; I += 2)
526 emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2, IsPostDec: false);
527 // Restore the last CSR with post-increment of SP.
528 emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size, IsPostDec: true);
529 }
530
531 // Make sure all explicit definitions are preserved in the helper call;
532 // implicit ones are already handled by copyImplicitOps.
533 if (HelperCall)
534 for (auto &Def : MBBI->defs())
535 HelperCall->addRegisterDefined(Reg: Def.getReg(),
536 RegInfo: MF.getRegInfo().getTargetRegisterInfo());
537 MBBI->removeFromParent();
538 return true;
539}
540
541/// Lower a HOM_Prolog pseudo instruction into a helper call while
542/// creating the helper on demand. Or emit a sequence of stores in place when
543/// not using a helper call.
544///
545/// 1. With a helper including frame-setup
546/// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
547/// =>
548/// stp x29, x30, [sp, #-16]!
549/// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
550///
551/// 2. With a helper
552/// HOM_Prolog x30, x29, x19, x20, x21, x22
553/// =>
554/// stp x29, x30, [sp, #-16]!
555/// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
556///
557/// 3. Without a helper
558/// HOM_Prolog x30, x29, x19, x20, x21, x22
559/// =>
560/// stp x22, x21, [sp, #-48]!
561/// stp x20, x19, [sp, #16]
562/// stp x29, x30, [sp, #32]
563bool AArch64LowerHomogeneousPrologEpilogImpl::lowerProlog(
564 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
565 MachineBasicBlock::iterator &NextMBBI) {
566 auto &MF = *MBB.getParent();
567 MachineInstr &MI = *MBBI;
568
569 DebugLoc DL = MI.getDebugLoc();
570 SmallVector<unsigned, 8> Regs;
571 bool HasUnpairedReg = false;
572 int LRIdx = 0;
573 std::optional<int> FpOffset;
574 for (auto &MO : MI.operands()) {
575 if (MO.isReg()) {
576 if (MO.getReg().isValid()) {
577 if (MO.getReg() == AArch64::LR)
578 LRIdx = Regs.size();
579 } else {
580 // For now we are only expecting unpaired GP registers which should
581 // occur exactly once.
582 assert(!HasUnpairedReg);
583 HasUnpairedReg = true;
584 }
585 Regs.push_back(Elt: MO.getReg());
586 } else if (MO.isImm()) {
587 FpOffset = MO.getImm();
588 }
589 }
590 (void)HasUnpairedReg;
591 int Size = (int)Regs.size();
592 if (Size == 0)
593 return false;
594 // Allow compact unwind case only for oww.
595 assert(Size % 2 == 0);
596 assert(MI.getOpcode() == AArch64::HOM_Prolog);
597
598 if (FpOffset &&
599 shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::PrologFrame)) {
600 // FP/LR is stored at the top of stack before the prolog helper call.
601 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true);
602 auto *PrologFrameHelper = getOrCreateFrameHelper(
603 M, MMI, Regs, Type: FrameHelperType::PrologFrame, FpOffset: *FpOffset);
604 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
605 .addGlobalAddress(GV: PrologFrameHelper)
606 .setMIFlag(MachineInstr::FrameSetup)
607 .copyImplicitOps(OtherMI: MI)
608 .addReg(RegNo: AArch64::FP, Flags: RegState::Implicit | RegState::Define)
609 .addReg(RegNo: AArch64::SP, Flags: RegState::Implicit);
610 } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
611 Type: FrameHelperType::Prolog)) {
612 // FP/LR is stored at the top of stack before the prolog helper call.
613 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true);
614 auto *PrologHelper =
615 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Prolog);
616 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
617 .addGlobalAddress(GV: PrologHelper)
618 .setMIFlag(MachineInstr::FrameSetup)
619 .copyImplicitOps(OtherMI: MI);
620 } else {
621 // Fall back to no-helper.
622 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: -Size, IsPreDec: true);
623 for (int I = Size - 3; I >= 0; I -= 2)
624 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1, IsPreDec: false);
625 if (FpOffset) {
626 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri))
627 .addDef(RegNo: AArch64::FP)
628 .addUse(RegNo: AArch64::SP)
629 .addImm(Val: *FpOffset)
630 .addImm(Val: 0)
631 .setMIFlag(MachineInstr::FrameSetup);
632 }
633 }
634
635 MBBI->removeFromParent();
636 return true;
637}
638
639/// Process each machine instruction
640/// @param MBB machine basic block
641/// @param MBBI current instruction iterator
642/// @param NextMBBI next instruction iterator which can be updated
643/// @return True when IR is changed.
644bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMI(
645 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
646 MachineBasicBlock::iterator &NextMBBI) {
647 MachineInstr &MI = *MBBI;
648 unsigned Opcode = MI.getOpcode();
649 switch (Opcode) {
650 default:
651 break;
652 case AArch64::HOM_Prolog:
653 return lowerProlog(MBB, MBBI, NextMBBI);
654 case AArch64::HOM_Epilog:
655 return lowerEpilog(MBB, MBBI, NextMBBI);
656 }
657 return false;
658}
659
660bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMBB(MachineBasicBlock &MBB) {
661 bool Modified = false;
662
663 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
664 while (MBBI != E) {
665 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
666 Modified |= runOnMI(MBB, MBBI, NextMBBI&: NMBBI);
667 MBBI = NMBBI;
668 }
669
670 return Modified;
671}
672
673bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMachineFunction(
674 MachineFunction &MF) {
675 TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
676
677 bool Modified = false;
678 for (auto &MBB : MF)
679 Modified |= runOnMBB(MBB);
680 return Modified;
681}
682
683ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
684 return new AArch64LowerHomogeneousPrologEpilogLegacy();
685}
686