1//===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that lowers homogeneous prolog/epilog instructions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64Subtarget.h"
15#include "MCTargetDesc/AArch64InstPrinter.h"
16#include "llvm/CodeGen/MachineBasicBlock.h"
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/MachineInstr.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineModuleInfo.h"
21#include "llvm/CodeGen/MachineOperand.h"
22#include "llvm/CodeGen/TargetSubtargetInfo.h"
23#include "llvm/IR/DebugLoc.h"
24#include "llvm/IR/IRBuilder.h"
25#include "llvm/IR/Module.h"
26#include "llvm/Pass.h"
27#include <optional>
28#include <sstream>
29
30using namespace llvm;
31
32#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
33 "AArch64 homogeneous prolog/epilog lowering pass"
34
35static cl::opt<int> FrameHelperSizeThreshold(
36 "frame-helper-size-threshold", cl::init(Val: 2), cl::Hidden,
37 cl::desc("The minimum number of instructions that are outlined in a frame "
38 "helper (default = 2)"));
39
40namespace {
41
42class AArch64LowerHomogeneousPE {
43public:
44 const AArch64InstrInfo *TII;
45
46 AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
47 : M(M), MMI(MMI) {}
48
49 bool run();
50 bool runOnMachineFunction(MachineFunction &Fn);
51
52private:
53 Module *M;
54 MachineModuleInfo *MMI;
55
56 bool runOnMBB(MachineBasicBlock &MBB);
57 bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
58 MachineBasicBlock::iterator &NextMBBI);
59
60 /// Lower a HOM_Prolog pseudo instruction into a helper call
61 /// or a sequence of homogeneous stores.
62 /// When a fp setup follows, it can be optimized.
63 bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
64 MachineBasicBlock::iterator &NextMBBI);
65 /// Lower a HOM_Epilog pseudo instruction into a helper call
66 /// or a sequence of homogeneous loads.
67 /// When a return follow, it can be optimized.
68 bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
69 MachineBasicBlock::iterator &NextMBBI);
70};
71
72class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
73public:
74 static char ID;
75
76 AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {}
77 void getAnalysisUsage(AnalysisUsage &AU) const override {
78 AU.addRequired<MachineModuleInfoWrapperPass>();
79 AU.addPreserved<MachineModuleInfoWrapperPass>();
80 AU.setPreservesAll();
81 ModulePass::getAnalysisUsage(AU);
82 }
83 bool runOnModule(Module &M) override;
84
85 StringRef getPassName() const override {
86 return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
87 }
88};
89
90} // end anonymous namespace
91
92char AArch64LowerHomogeneousPrologEpilog::ID = 0;
93
94INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
95 "aarch64-lower-homogeneous-prolog-epilog",
96 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
97
98bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
99 if (skipModule(M))
100 return false;
101
102 MachineModuleInfo *MMI =
103 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
104 return AArch64LowerHomogeneousPE(&M, MMI).run();
105}
106
107bool AArch64LowerHomogeneousPE::run() {
108 bool Changed = false;
109 for (auto &F : *M) {
110 if (F.empty())
111 continue;
112
113 MachineFunction *MF = MMI->getMachineFunction(F);
114 if (!MF)
115 continue;
116 Changed |= runOnMachineFunction(Fn&: *MF);
117 }
118
119 return Changed;
120}
121enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
122
123/// Return a frame helper name with the given CSRs and the helper type.
124/// For instance, a prolog helper that saves x19 and x20 is named as
125/// OUTLINED_FUNCTION_PROLOG_x19x20.
126static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
127 FrameHelperType Type, unsigned FpOffset) {
128 std::ostringstream RegStream;
129 switch (Type) {
130 case FrameHelperType::Prolog:
131 RegStream << "OUTLINED_FUNCTION_PROLOG_";
132 break;
133 case FrameHelperType::PrologFrame:
134 RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
135 break;
136 case FrameHelperType::Epilog:
137 RegStream << "OUTLINED_FUNCTION_EPILOG_";
138 break;
139 case FrameHelperType::EpilogTail:
140 RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
141 break;
142 }
143
144 for (auto Reg : Regs) {
145 if (Reg == AArch64::NoRegister)
146 continue;
147 RegStream << AArch64InstPrinter::getRegisterName(Reg);
148 }
149
150 return RegStream.str();
151}
152
153/// Create a Function for the unique frame helper with the given name.
154/// Return a newly created MachineFunction with an empty MachineBasicBlock.
155static MachineFunction &createFrameHelperMachineFunction(Module *M,
156 MachineModuleInfo *MMI,
157 StringRef Name) {
158 LLVMContext &C = M->getContext();
159 Function *F = M->getFunction(Name);
160 assert(F == nullptr && "Function has been created before");
161 F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false),
162 Linkage: Function::ExternalLinkage, N: Name, M);
163 assert(F && "Function was null!");
164
165 // Use ODR linkage to avoid duplication.
166 F->setLinkage(GlobalValue::LinkOnceODRLinkage);
167 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
168
169 // Set minsize, so we don't insert padding between outlined functions.
170 F->addFnAttr(Kind: Attribute::NoInline);
171 F->addFnAttr(Kind: Attribute::MinSize);
172 F->addFnAttr(Kind: Attribute::Naked);
173
174 MachineFunction &MF = MMI->getOrCreateMachineFunction(F&: *F);
175 // Remove unnecessary register liveness and set NoVRegs.
176 MF.getProperties().resetTracksLiveness().resetIsSSA().setNoVRegs();
177 MF.getRegInfo().freezeReservedRegs();
178
179 // Create entry block.
180 BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry", Parent: F);
181 IRBuilder<> Builder(EntryBB);
182 Builder.CreateRetVoid();
183
184 // Insert the new block into the function.
185 MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
186 MF.insert(MBBI: MF.begin(), MBB);
187
188 return MF;
189}
190
191/// Emit a store-pair instruction for frame-setup.
192/// If Reg2 is AArch64::NoRegister, emit STR instead.
193static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
194 MachineBasicBlock::iterator Pos,
195 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
196 int Offset, bool IsPreDec) {
197 assert(Reg1 != AArch64::NoRegister);
198 const bool IsPaired = Reg2 != AArch64::NoRegister;
199 bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1);
200 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
201 unsigned Opc;
202 if (IsPreDec) {
203 if (IsFloat)
204 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
205 else
206 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
207 } else {
208 if (IsFloat)
209 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
210 else
211 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
212 }
213 // The implicit scale for Offset is 8.
214 TypeSize Scale(0U, false), Width(0U, false);
215 int64_t MinOffset, MaxOffset;
216 [[maybe_unused]] bool Success =
217 AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset);
218 assert(Success && "Invalid Opcode");
219 Offset *= (8 / (int)Scale);
220
221 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc));
222 if (IsPreDec)
223 MIB.addDef(RegNo: AArch64::SP);
224 if (IsPaired)
225 MIB.addReg(RegNo: Reg2);
226 MIB.addReg(RegNo: Reg1)
227 .addReg(RegNo: AArch64::SP)
228 .addImm(Val: Offset)
229 .setMIFlag(MachineInstr::FrameSetup);
230}
231
232/// Emit a load-pair instruction for frame-destroy.
233/// If Reg2 is AArch64::NoRegister, emit LDR instead.
234static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
235 MachineBasicBlock::iterator Pos,
236 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
237 int Offset, bool IsPostDec) {
238 assert(Reg1 != AArch64::NoRegister);
239 const bool IsPaired = Reg2 != AArch64::NoRegister;
240 bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1);
241 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
242 unsigned Opc;
243 if (IsPostDec) {
244 if (IsFloat)
245 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
246 else
247 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
248 } else {
249 if (IsFloat)
250 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
251 else
252 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
253 }
254 // The implicit scale for Offset is 8.
255 TypeSize Scale(0U, false), Width(0U, false);
256 int64_t MinOffset, MaxOffset;
257 [[maybe_unused]] bool Success =
258 AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset);
259 assert(Success && "Invalid Opcode");
260 Offset *= (8 / (int)Scale);
261
262 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc));
263 if (IsPostDec)
264 MIB.addDef(RegNo: AArch64::SP);
265 if (IsPaired)
266 MIB.addReg(RegNo: Reg2, Flags: getDefRegState(B: true));
267 MIB.addReg(RegNo: Reg1, Flags: getDefRegState(B: true))
268 .addReg(RegNo: AArch64::SP)
269 .addImm(Val: Offset)
270 .setMIFlag(MachineInstr::FrameDestroy);
271}
272
273/// Return a unique function if a helper can be formed with the given Regs
274/// and frame type.
275/// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
276/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
277/// stp x20, x19, [sp, #16]
278/// ret
279///
280/// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
281/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
282/// stp x20, x19, [sp, #16]
283/// add fp, sp, #32
284/// ret
285///
286/// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
287/// mov x16, x30
288/// ldp x29, x30, [sp, #32]
289/// ldp x20, x19, [sp, #16]
290/// ldp x22, x21, [sp], #48
291/// ret x16
292///
293/// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
294/// ldp x29, x30, [sp, #32]
295/// ldp x20, x19, [sp, #16]
296/// ldp x22, x21, [sp], #48
297/// ret
298/// @param M module
299/// @param MMI machine module info
300/// @param Regs callee save regs that the helper will handle
301/// @param Type frame helper type
302/// @return a helper function
303static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
304 SmallVectorImpl<unsigned> &Regs,
305 FrameHelperType Type,
306 unsigned FpOffset = 0) {
307 assert(Regs.size() >= 2);
308 auto Name = getFrameHelperName(Regs, Type, FpOffset);
309 auto *F = M->getFunction(Name);
310 if (F)
311 return F;
312
313 auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
314 MachineBasicBlock &MBB = *MF.begin();
315 const TargetSubtargetInfo &STI = MF.getSubtarget();
316 const TargetInstrInfo &TII = *STI.getInstrInfo();
317
318 int Size = (int)Regs.size();
319 switch (Type) {
320 case FrameHelperType::Prolog:
321 case FrameHelperType::PrologFrame: {
322 // Compute the remaining SP adjust beyond FP/LR.
323 auto LRIdx = std::distance(first: Regs.begin(), last: llvm::find(Range&: Regs, Val: AArch64::LR));
324
325 // If the register stored to the lowest address is not LR, we must subtract
326 // more from SP here.
327 if (LRIdx != Size - 2) {
328 assert(Regs[Size - 2] != AArch64::LR);
329 emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1],
330 Offset: LRIdx - Size + 2, IsPreDec: true);
331 }
332
333 // Store CSRs in the reverse order.
334 for (int I = Size - 3; I >= 0; I -= 2) {
335 // FP/LR has been stored at call-site.
336 if (Regs[I - 1] == AArch64::LR)
337 continue;
338 emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1,
339 IsPreDec: false);
340 }
341 if (Type == FrameHelperType::PrologFrame)
342 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
343 .addDef(RegNo: AArch64::FP)
344 .addUse(RegNo: AArch64::SP)
345 .addImm(Val: FpOffset)
346 .addImm(Val: 0)
347 .setMIFlag(MachineInstr::FrameSetup);
348
349 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET))
350 .addReg(RegNo: AArch64::LR);
351 break;
352 }
353 case FrameHelperType::Epilog:
354 case FrameHelperType::EpilogTail:
355 if (Type == FrameHelperType::Epilog)
356 // Stash LR to X16
357 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ORRXrs))
358 .addDef(RegNo: AArch64::X16)
359 .addReg(RegNo: AArch64::XZR)
360 .addUse(RegNo: AArch64::LR)
361 .addImm(Val: 0);
362
363 for (int I = 0; I < Size - 2; I += 2)
364 emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2,
365 IsPostDec: false);
366 // Restore the last CSR with post-increment of SP.
367 emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size,
368 IsPostDec: true);
369
370 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET))
371 .addReg(RegNo: Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
372 break;
373 }
374
375 return M->getFunction(Name);
376}
377
378/// This function checks if a frame helper should be used for
379/// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
380/// @param MBB machine basic block
381/// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
382/// @param Regs callee save registers that are saved or restored.
383/// @param Type frame helper type
384/// @return True if a use of helper is qualified.
385static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
386 MachineBasicBlock::iterator &NextMBBI,
387 SmallVectorImpl<unsigned> &Regs,
388 FrameHelperType Type) {
389 const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
390 auto RegCount = Regs.size();
391 assert(RegCount > 0 && (RegCount % 2 == 0));
392 // # of instructions that will be outlined.
393 int InstCount = RegCount / 2;
394
395 // Do not use a helper call when not saving LR.
396 if (!llvm::is_contained(Range&: Regs, Element: AArch64::LR))
397 return false;
398
399 switch (Type) {
400 case FrameHelperType::Prolog:
401 // Prolog helper cannot save FP/LR.
402 InstCount--;
403 break;
404 case FrameHelperType::PrologFrame: {
405 // Effectively no change in InstCount since FpAdjustment is included.
406 break;
407 }
408 case FrameHelperType::Epilog:
409 // Bail-out if X16 is live across the epilog helper because it is used in
410 // the helper to handle X30.
411 for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
412 if (NextMI->readsRegister(Reg: AArch64::W16, TRI))
413 return false;
414 }
415 // Epilog may not be in the last block. Check the liveness in successors.
416 for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
417 if (SuccMBB->isLiveIn(Reg: AArch64::W16) || SuccMBB->isLiveIn(Reg: AArch64::X16))
418 return false;
419 }
420 // No change in InstCount for the regular epilog case.
421 break;
422 case FrameHelperType::EpilogTail: {
423 // EpilogTail helper includes the caller's return.
424 if (NextMBBI == MBB.end())
425 return false;
426 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
427 return false;
428 InstCount++;
429 break;
430 }
431 }
432
433 return InstCount >= FrameHelperSizeThreshold;
434}
435
436/// Lower a HOM_Epilog pseudo instruction into a helper call while
437/// creating the helper on demand. Or emit a sequence of loads in place when not
438/// using a helper call.
439///
440/// 1. With a helper including ret
441/// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
442/// ret ; NextMBBI
443/// =>
444/// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
445/// ... ; NextMBBI
446///
447/// 2. With a helper
448/// HOM_Epilog x30, x29, x19, x20, x21, x22
449/// =>
450/// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
451///
452/// 3. Without a helper
453/// HOM_Epilog x30, x29, x19, x20, x21, x22
454/// =>
455/// ldp x29, x30, [sp, #32]
456/// ldp x20, x19, [sp, #16]
457/// ldp x22, x21, [sp], #48
458bool AArch64LowerHomogeneousPE::lowerEpilog(
459 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
460 MachineBasicBlock::iterator &NextMBBI) {
461 auto &MF = *MBB.getParent();
462 MachineInstr &MI = *MBBI;
463
464 DebugLoc DL = MI.getDebugLoc();
465 SmallVector<unsigned, 8> Regs;
466 bool HasUnpairedReg = false;
467 for (auto &MO : MI.operands())
468 if (MO.isReg()) {
469 if (!MO.getReg().isValid()) {
470 // For now we are only expecting unpaired GP registers which should
471 // occur exactly once.
472 assert(!HasUnpairedReg);
473 HasUnpairedReg = true;
474 }
475 Regs.push_back(Elt: MO.getReg());
476 }
477 (void)HasUnpairedReg;
478 int Size = (int)Regs.size();
479 if (Size == 0)
480 return false;
481 // Registers are in pair.
482 assert(Size % 2 == 0);
483 assert(MI.getOpcode() == AArch64::HOM_Epilog);
484
485 auto Return = NextMBBI;
486 MachineInstr *HelperCall = nullptr;
487 if (shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::EpilogTail)) {
488 // When MBB ends with a return, emit a tail-call to the epilog helper
489 auto *EpilogTailHelper =
490 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::EpilogTail);
491 HelperCall = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::TCRETURNdi))
492 .addGlobalAddress(GV: EpilogTailHelper)
493 .addImm(Val: 0)
494 .setMIFlag(MachineInstr::FrameDestroy)
495 .copyImplicitOps(OtherMI: MI)
496 .copyImplicitOps(OtherMI: *Return);
497 NextMBBI = std::next(x: Return);
498 Return->removeFromParent();
499 } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
500 Type: FrameHelperType::Epilog)) {
501 // The default epilog helper case.
502 auto *EpilogHelper =
503 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Epilog);
504 HelperCall = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
505 .addGlobalAddress(GV: EpilogHelper)
506 .setMIFlag(MachineInstr::FrameDestroy)
507 .copyImplicitOps(OtherMI: MI);
508 } else {
509 // Fall back to no-helper.
510 for (int I = 0; I < Size - 2; I += 2)
511 emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2, IsPostDec: false);
512 // Restore the last CSR with post-increment of SP.
513 emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size, IsPostDec: true);
514 }
515
516 // Make sure all explicit definitions are preserved in the helper call;
517 // implicit ones are already handled by copyImplicitOps.
518 if (HelperCall)
519 for (auto &Def : MBBI->defs())
520 HelperCall->addRegisterDefined(Reg: Def.getReg(),
521 RegInfo: MF.getRegInfo().getTargetRegisterInfo());
522 MBBI->removeFromParent();
523 return true;
524}
525
526/// Lower a HOM_Prolog pseudo instruction into a helper call while
527/// creating the helper on demand. Or emit a sequence of stores in place when
528/// not using a helper call.
529///
530/// 1. With a helper including frame-setup
531/// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
532/// =>
533/// stp x29, x30, [sp, #-16]!
534/// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
535///
536/// 2. With a helper
537/// HOM_Prolog x30, x29, x19, x20, x21, x22
538/// =>
539/// stp x29, x30, [sp, #-16]!
540/// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
541///
542/// 3. Without a helper
543/// HOM_Prolog x30, x29, x19, x20, x21, x22
544/// =>
545/// stp x22, x21, [sp, #-48]!
546/// stp x20, x19, [sp, #16]
547/// stp x29, x30, [sp, #32]
548bool AArch64LowerHomogeneousPE::lowerProlog(
549 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
550 MachineBasicBlock::iterator &NextMBBI) {
551 auto &MF = *MBB.getParent();
552 MachineInstr &MI = *MBBI;
553
554 DebugLoc DL = MI.getDebugLoc();
555 SmallVector<unsigned, 8> Regs;
556 bool HasUnpairedReg = false;
557 int LRIdx = 0;
558 std::optional<int> FpOffset;
559 for (auto &MO : MI.operands()) {
560 if (MO.isReg()) {
561 if (MO.getReg().isValid()) {
562 if (MO.getReg() == AArch64::LR)
563 LRIdx = Regs.size();
564 } else {
565 // For now we are only expecting unpaired GP registers which should
566 // occur exactly once.
567 assert(!HasUnpairedReg);
568 HasUnpairedReg = true;
569 }
570 Regs.push_back(Elt: MO.getReg());
571 } else if (MO.isImm()) {
572 FpOffset = MO.getImm();
573 }
574 }
575 (void)HasUnpairedReg;
576 int Size = (int)Regs.size();
577 if (Size == 0)
578 return false;
579 // Allow compact unwind case only for oww.
580 assert(Size % 2 == 0);
581 assert(MI.getOpcode() == AArch64::HOM_Prolog);
582
583 if (FpOffset &&
584 shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::PrologFrame)) {
585 // FP/LR is stored at the top of stack before the prolog helper call.
586 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true);
587 auto *PrologFrameHelper = getOrCreateFrameHelper(
588 M, MMI, Regs, Type: FrameHelperType::PrologFrame, FpOffset: *FpOffset);
589 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
590 .addGlobalAddress(GV: PrologFrameHelper)
591 .setMIFlag(MachineInstr::FrameSetup)
592 .copyImplicitOps(OtherMI: MI)
593 .addReg(RegNo: AArch64::FP, Flags: RegState::Implicit | RegState::Define)
594 .addReg(RegNo: AArch64::SP, Flags: RegState::Implicit);
595 } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
596 Type: FrameHelperType::Prolog)) {
597 // FP/LR is stored at the top of stack before the prolog helper call.
598 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true);
599 auto *PrologHelper =
600 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Prolog);
601 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
602 .addGlobalAddress(GV: PrologHelper)
603 .setMIFlag(MachineInstr::FrameSetup)
604 .copyImplicitOps(OtherMI: MI);
605 } else {
606 // Fall back to no-helper.
607 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: -Size, IsPreDec: true);
608 for (int I = Size - 3; I >= 0; I -= 2)
609 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1, IsPreDec: false);
610 if (FpOffset) {
611 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri))
612 .addDef(RegNo: AArch64::FP)
613 .addUse(RegNo: AArch64::SP)
614 .addImm(Val: *FpOffset)
615 .addImm(Val: 0)
616 .setMIFlag(MachineInstr::FrameSetup);
617 }
618 }
619
620 MBBI->removeFromParent();
621 return true;
622}
623
624/// Process each machine instruction
625/// @param MBB machine basic block
626/// @param MBBI current instruction iterator
627/// @param NextMBBI next instruction iterator which can be updated
628/// @return True when IR is changed.
629bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
630 MachineBasicBlock::iterator MBBI,
631 MachineBasicBlock::iterator &NextMBBI) {
632 MachineInstr &MI = *MBBI;
633 unsigned Opcode = MI.getOpcode();
634 switch (Opcode) {
635 default:
636 break;
637 case AArch64::HOM_Prolog:
638 return lowerProlog(MBB, MBBI, NextMBBI);
639 case AArch64::HOM_Epilog:
640 return lowerEpilog(MBB, MBBI, NextMBBI);
641 }
642 return false;
643}
644
645bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
646 bool Modified = false;
647
648 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
649 while (MBBI != E) {
650 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
651 Modified |= runOnMI(MBB, MBBI, NextMBBI&: NMBBI);
652 MBBI = NMBBI;
653 }
654
655 return Modified;
656}
657
658bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
659 TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
660
661 bool Modified = false;
662 for (auto &MBB : MF)
663 Modified |= runOnMBB(MBB);
664 return Modified;
665}
666
667ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
668 return new AArch64LowerHomogeneousPrologEpilog();
669}
670