1//===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that lowers homogeneous prolog/epilog instructions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64Subtarget.h"
15#include "MCTargetDesc/AArch64InstPrinter.h"
16#include "Utils/AArch64BaseInfo.h"
17#include "llvm/CodeGen/MachineBasicBlock.h"
18#include "llvm/CodeGen/MachineFunction.h"
19#include "llvm/CodeGen/MachineFunctionPass.h"
20#include "llvm/CodeGen/MachineInstr.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineModuleInfo.h"
23#include "llvm/CodeGen/MachineOperand.h"
24#include "llvm/CodeGen/TargetSubtargetInfo.h"
25#include "llvm/IR/DebugLoc.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Module.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/raw_ostream.h"
30#include <optional>
31#include <sstream>
32
33using namespace llvm;
34
35#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
36 "AArch64 homogeneous prolog/epilog lowering pass"
37
38cl::opt<int> FrameHelperSizeThreshold(
39 "frame-helper-size-threshold", cl::init(Val: 2), cl::Hidden,
40 cl::desc("The minimum number of instructions that are outlined in a frame "
41 "helper (default = 2)"));
42
43namespace {
44
45class AArch64LowerHomogeneousPE {
46public:
47 const AArch64InstrInfo *TII;
48
49 AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
50 : M(M), MMI(MMI) {}
51
52 bool run();
53 bool runOnMachineFunction(MachineFunction &Fn);
54
55private:
56 Module *M;
57 MachineModuleInfo *MMI;
58
59 bool runOnMBB(MachineBasicBlock &MBB);
60 bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
61 MachineBasicBlock::iterator &NextMBBI);
62
63 /// Lower a HOM_Prolog pseudo instruction into a helper call
64 /// or a sequence of homogeneous stores.
65 /// When a fp setup follows, it can be optimized.
66 bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
67 MachineBasicBlock::iterator &NextMBBI);
68 /// Lower a HOM_Epilog pseudo instruction into a helper call
69 /// or a sequence of homogeneous loads.
70 /// When a return follow, it can be optimized.
71 bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
72 MachineBasicBlock::iterator &NextMBBI);
73};
74
75class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
76public:
77 static char ID;
78
79 AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
80 initializeAArch64LowerHomogeneousPrologEpilogPass(
81 *PassRegistry::getPassRegistry());
82 }
83 void getAnalysisUsage(AnalysisUsage &AU) const override {
84 AU.addRequired<MachineModuleInfoWrapperPass>();
85 AU.addPreserved<MachineModuleInfoWrapperPass>();
86 AU.setPreservesAll();
87 ModulePass::getAnalysisUsage(AU);
88 }
89 bool runOnModule(Module &M) override;
90
91 StringRef getPassName() const override {
92 return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
93 }
94};
95
96} // end anonymous namespace
97
98char AArch64LowerHomogeneousPrologEpilog::ID = 0;
99
100INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
101 "aarch64-lower-homogeneous-prolog-epilog",
102 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
103
104bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
105 if (skipModule(M))
106 return false;
107
108 MachineModuleInfo *MMI =
109 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
110 return AArch64LowerHomogeneousPE(&M, MMI).run();
111}
112
113bool AArch64LowerHomogeneousPE::run() {
114 bool Changed = false;
115 for (auto &F : *M) {
116 if (F.empty())
117 continue;
118
119 MachineFunction *MF = MMI->getMachineFunction(F);
120 if (!MF)
121 continue;
122 Changed |= runOnMachineFunction(Fn&: *MF);
123 }
124
125 return Changed;
126}
127enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
128
129/// Return a frame helper name with the given CSRs and the helper type.
130/// For instance, a prolog helper that saves x19 and x20 is named as
131/// OUTLINED_FUNCTION_PROLOG_x19x20.
132static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
133 FrameHelperType Type, unsigned FpOffset) {
134 std::ostringstream RegStream;
135 switch (Type) {
136 case FrameHelperType::Prolog:
137 RegStream << "OUTLINED_FUNCTION_PROLOG_";
138 break;
139 case FrameHelperType::PrologFrame:
140 RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
141 break;
142 case FrameHelperType::Epilog:
143 RegStream << "OUTLINED_FUNCTION_EPILOG_";
144 break;
145 case FrameHelperType::EpilogTail:
146 RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
147 break;
148 }
149
150 for (auto Reg : Regs) {
151 if (Reg == AArch64::NoRegister)
152 continue;
153 RegStream << AArch64InstPrinter::getRegisterName(Reg);
154 }
155
156 return RegStream.str();
157}
158
159/// Create a Function for the unique frame helper with the given name.
160/// Return a newly created MachineFunction with an empty MachineBasicBlock.
161static MachineFunction &createFrameHelperMachineFunction(Module *M,
162 MachineModuleInfo *MMI,
163 StringRef Name) {
164 LLVMContext &C = M->getContext();
165 Function *F = M->getFunction(Name);
166 assert(F == nullptr && "Function has been created before");
167 F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false),
168 Linkage: Function::ExternalLinkage, N: Name, M);
169 assert(F && "Function was null!");
170
171 // Use ODR linkage to avoid duplication.
172 F->setLinkage(GlobalValue::LinkOnceODRLinkage);
173 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
174
175 // Set no-opt/minsize, so we don't insert padding between outlined
176 // functions.
177 F->addFnAttr(Kind: Attribute::OptimizeNone);
178 F->addFnAttr(Kind: Attribute::NoInline);
179 F->addFnAttr(Kind: Attribute::MinSize);
180 F->addFnAttr(Kind: Attribute::Naked);
181
182 MachineFunction &MF = MMI->getOrCreateMachineFunction(F&: *F);
183 // Remove unnecessary register liveness and set NoVRegs.
184 MF.getProperties().reset(P: MachineFunctionProperties::Property::TracksLiveness);
185 MF.getProperties().reset(P: MachineFunctionProperties::Property::IsSSA);
186 MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
187 MF.getRegInfo().freezeReservedRegs();
188
189 // Create entry block.
190 BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry", Parent: F);
191 IRBuilder<> Builder(EntryBB);
192 Builder.CreateRetVoid();
193
194 // Insert the new block into the function.
195 MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
196 MF.insert(MBBI: MF.begin(), MBB);
197
198 return MF;
199}
200
201/// Emit a store-pair instruction for frame-setup.
202/// If Reg2 is AArch64::NoRegister, emit STR instead.
203static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
204 MachineBasicBlock::iterator Pos,
205 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
206 int Offset, bool IsPreDec) {
207 assert(Reg1 != AArch64::NoRegister);
208 const bool IsPaired = Reg2 != AArch64::NoRegister;
209 bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1);
210 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
211 unsigned Opc;
212 if (IsPreDec) {
213 if (IsFloat)
214 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
215 else
216 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
217 } else {
218 if (IsFloat)
219 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
220 else
221 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
222 }
223 // The implicit scale for Offset is 8.
224 TypeSize Scale(0U, false), Width(0U, false);
225 int64_t MinOffset, MaxOffset;
226 [[maybe_unused]] bool Success =
227 AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset);
228 assert(Success && "Invalid Opcode");
229 Offset *= (8 / (int)Scale);
230
231 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc));
232 if (IsPreDec)
233 MIB.addDef(RegNo: AArch64::SP);
234 if (IsPaired)
235 MIB.addReg(RegNo: Reg2);
236 MIB.addReg(RegNo: Reg1)
237 .addReg(RegNo: AArch64::SP)
238 .addImm(Val: Offset)
239 .setMIFlag(MachineInstr::FrameSetup);
240}
241
242/// Emit a load-pair instruction for frame-destroy.
243/// If Reg2 is AArch64::NoRegister, emit LDR instead.
244static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
245 MachineBasicBlock::iterator Pos,
246 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
247 int Offset, bool IsPostDec) {
248 assert(Reg1 != AArch64::NoRegister);
249 const bool IsPaired = Reg2 != AArch64::NoRegister;
250 bool IsFloat = AArch64::FPR64RegClass.contains(Reg: Reg1);
251 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
252 unsigned Opc;
253 if (IsPostDec) {
254 if (IsFloat)
255 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
256 else
257 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
258 } else {
259 if (IsFloat)
260 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
261 else
262 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
263 }
264 // The implicit scale for Offset is 8.
265 TypeSize Scale(0U, false), Width(0U, false);
266 int64_t MinOffset, MaxOffset;
267 [[maybe_unused]] bool Success =
268 AArch64InstrInfo::getMemOpInfo(Opcode: Opc, Scale, Width, MinOffset, MaxOffset);
269 assert(Success && "Invalid Opcode");
270 Offset *= (8 / (int)Scale);
271
272 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Pos, MIMD: DebugLoc(), MCID: TII.get(Opcode: Opc));
273 if (IsPostDec)
274 MIB.addDef(RegNo: AArch64::SP);
275 if (IsPaired)
276 MIB.addReg(RegNo: Reg2, flags: getDefRegState(B: true));
277 MIB.addReg(RegNo: Reg1, flags: getDefRegState(B: true))
278 .addReg(RegNo: AArch64::SP)
279 .addImm(Val: Offset)
280 .setMIFlag(MachineInstr::FrameDestroy);
281}
282
283/// Return a unique function if a helper can be formed with the given Regs
284/// and frame type.
285/// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
286/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
287/// stp x20, x19, [sp, #16]
288/// ret
289///
290/// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
291/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
292/// stp x20, x19, [sp, #16]
293/// add fp, sp, #32
294/// ret
295///
296/// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
297/// mov x16, x30
298/// ldp x29, x30, [sp, #32]
299/// ldp x20, x19, [sp, #16]
300/// ldp x22, x21, [sp], #48
301/// ret x16
302///
303/// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
304/// ldp x29, x30, [sp, #32]
305/// ldp x20, x19, [sp, #16]
306/// ldp x22, x21, [sp], #48
307/// ret
308/// @param M module
309/// @param MMI machine module info
310/// @param Regs callee save regs that the helper will handle
311/// @param Type frame helper type
312/// @return a helper function
313static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
314 SmallVectorImpl<unsigned> &Regs,
315 FrameHelperType Type,
316 unsigned FpOffset = 0) {
317 assert(Regs.size() >= 2);
318 auto Name = getFrameHelperName(Regs, Type, FpOffset);
319 auto *F = M->getFunction(Name);
320 if (F)
321 return F;
322
323 auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
324 MachineBasicBlock &MBB = *MF.begin();
325 const TargetSubtargetInfo &STI = MF.getSubtarget();
326 const TargetInstrInfo &TII = *STI.getInstrInfo();
327
328 int Size = (int)Regs.size();
329 switch (Type) {
330 case FrameHelperType::Prolog:
331 case FrameHelperType::PrologFrame: {
332 // Compute the remaining SP adjust beyond FP/LR.
333 auto LRIdx = std::distance(first: Regs.begin(), last: llvm::find(Range&: Regs, Val: AArch64::LR));
334
335 // If the register stored to the lowest address is not LR, we must subtract
336 // more from SP here.
337 if (LRIdx != Size - 2) {
338 assert(Regs[Size - 2] != AArch64::LR);
339 emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1],
340 Offset: LRIdx - Size + 2, IsPreDec: true);
341 }
342
343 // Store CSRs in the reverse order.
344 for (int I = Size - 3; I >= 0; I -= 2) {
345 // FP/LR has been stored at call-site.
346 if (Regs[I - 1] == AArch64::LR)
347 continue;
348 emitStore(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1,
349 IsPreDec: false);
350 }
351 if (Type == FrameHelperType::PrologFrame)
352 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
353 .addDef(RegNo: AArch64::FP)
354 .addUse(RegNo: AArch64::SP)
355 .addImm(Val: FpOffset)
356 .addImm(Val: 0)
357 .setMIFlag(MachineInstr::FrameSetup);
358
359 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET))
360 .addReg(RegNo: AArch64::LR);
361 break;
362 }
363 case FrameHelperType::Epilog:
364 case FrameHelperType::EpilogTail:
365 if (Type == FrameHelperType::Epilog)
366 // Stash LR to X16
367 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::ORRXrs))
368 .addDef(RegNo: AArch64::X16)
369 .addReg(RegNo: AArch64::XZR)
370 .addUse(RegNo: AArch64::LR)
371 .addImm(Val: 0);
372
373 for (int I = 0; I < Size - 2; I += 2)
374 emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2,
375 IsPostDec: false);
376 // Restore the last CSR with post-increment of SP.
377 emitLoad(MF, MBB, Pos: MBB.end(), TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size,
378 IsPostDec: true);
379
380 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: TII.get(Opcode: AArch64::RET))
381 .addReg(RegNo: Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
382 break;
383 }
384
385 return M->getFunction(Name);
386}
387
388/// This function checks if a frame helper should be used for
389/// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
390/// @param MBB machine basic block
391/// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
392/// @param Regs callee save registers that are saved or restored.
393/// @param Type frame helper type
394/// @return True if a use of helper is qualified.
395static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
396 MachineBasicBlock::iterator &NextMBBI,
397 SmallVectorImpl<unsigned> &Regs,
398 FrameHelperType Type) {
399 const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
400 auto RegCount = Regs.size();
401 assert(RegCount > 0 && (RegCount % 2 == 0));
402 // # of instructions that will be outlined.
403 int InstCount = RegCount / 2;
404
405 // Do not use a helper call when not saving LR.
406 if (!llvm::is_contained(Range&: Regs, Element: AArch64::LR))
407 return false;
408
409 switch (Type) {
410 case FrameHelperType::Prolog:
411 // Prolog helper cannot save FP/LR.
412 InstCount--;
413 break;
414 case FrameHelperType::PrologFrame: {
415 // Effecitvely no change in InstCount since FpAdjusment is included.
416 break;
417 }
418 case FrameHelperType::Epilog:
419 // Bail-out if X16 is live across the epilog helper because it is used in
420 // the helper to handle X30.
421 for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
422 if (NextMI->readsRegister(Reg: AArch64::W16, TRI))
423 return false;
424 }
425 // Epilog may not be in the last block. Check the liveness in successors.
426 for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
427 if (SuccMBB->isLiveIn(Reg: AArch64::W16) || SuccMBB->isLiveIn(Reg: AArch64::X16))
428 return false;
429 }
430 // No change in InstCount for the regular epilog case.
431 break;
432 case FrameHelperType::EpilogTail: {
433 // EpilogTail helper includes the caller's return.
434 if (NextMBBI == MBB.end())
435 return false;
436 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
437 return false;
438 InstCount++;
439 break;
440 }
441 }
442
443 return InstCount >= FrameHelperSizeThreshold;
444}
445
446/// Lower a HOM_Epilog pseudo instruction into a helper call while
447/// creating the helper on demand. Or emit a sequence of loads in place when not
448/// using a helper call.
449///
450/// 1. With a helper including ret
451/// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
452/// ret ; NextMBBI
453/// =>
454/// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
455/// ... ; NextMBBI
456///
457/// 2. With a helper
458/// HOM_Epilog x30, x29, x19, x20, x21, x22
459/// =>
460/// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
461///
462/// 3. Without a helper
463/// HOM_Epilog x30, x29, x19, x20, x21, x22
464/// =>
465/// ldp x29, x30, [sp, #32]
466/// ldp x20, x19, [sp, #16]
467/// ldp x22, x21, [sp], #48
468bool AArch64LowerHomogeneousPE::lowerEpilog(
469 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
470 MachineBasicBlock::iterator &NextMBBI) {
471 auto &MF = *MBB.getParent();
472 MachineInstr &MI = *MBBI;
473
474 DebugLoc DL = MI.getDebugLoc();
475 SmallVector<unsigned, 8> Regs;
476 bool HasUnpairedReg = false;
477 for (auto &MO : MI.operands())
478 if (MO.isReg()) {
479 if (!MO.getReg().isValid()) {
480 // For now we are only expecting unpaired GP registers which should
481 // occur exactly once.
482 assert(!HasUnpairedReg);
483 HasUnpairedReg = true;
484 }
485 Regs.push_back(Elt: MO.getReg());
486 }
487 (void)HasUnpairedReg;
488 int Size = (int)Regs.size();
489 if (Size == 0)
490 return false;
491 // Registers are in pair.
492 assert(Size % 2 == 0);
493 assert(MI.getOpcode() == AArch64::HOM_Epilog);
494
495 auto Return = NextMBBI;
496 if (shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::EpilogTail)) {
497 // When MBB ends with a return, emit a tail-call to the epilog helper
498 auto *EpilogTailHelper =
499 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::EpilogTail);
500 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::TCRETURNdi))
501 .addGlobalAddress(GV: EpilogTailHelper)
502 .addImm(Val: 0)
503 .setMIFlag(MachineInstr::FrameDestroy)
504 .copyImplicitOps(OtherMI: MI)
505 .copyImplicitOps(OtherMI: *Return);
506 NextMBBI = std::next(x: Return);
507 Return->removeFromParent();
508 } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
509 Type: FrameHelperType::Epilog)) {
510 // The default epilog helper case.
511 auto *EpilogHelper =
512 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Epilog);
513 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
514 .addGlobalAddress(GV: EpilogHelper)
515 .setMIFlag(MachineInstr::FrameDestroy)
516 .copyImplicitOps(OtherMI: MI);
517 } else {
518 // Fall back to no-helper.
519 for (int I = 0; I < Size - 2; I += 2)
520 emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I], Reg2: Regs[I + 1], Offset: Size - I - 2, IsPostDec: false);
521 // Restore the last CSR with post-increment of SP.
522 emitLoad(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: Size, IsPostDec: true);
523 }
524
525 MBBI->removeFromParent();
526 return true;
527}
528
529/// Lower a HOM_Prolog pseudo instruction into a helper call while
530/// creating the helper on demand. Or emit a sequence of stores in place when
531/// not using a helper call.
532///
533/// 1. With a helper including frame-setup
534/// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
535/// =>
536/// stp x29, x30, [sp, #-16]!
537/// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
538///
539/// 2. With a helper
540/// HOM_Prolog x30, x29, x19, x20, x21, x22
541/// =>
542/// stp x29, x30, [sp, #-16]!
543/// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
544///
545/// 3. Without a helper
546/// HOM_Prolog x30, x29, x19, x20, x21, x22
547/// =>
548/// stp x22, x21, [sp, #-48]!
549/// stp x20, x19, [sp, #16]
550/// stp x29, x30, [sp, #32]
551bool AArch64LowerHomogeneousPE::lowerProlog(
552 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
553 MachineBasicBlock::iterator &NextMBBI) {
554 auto &MF = *MBB.getParent();
555 MachineInstr &MI = *MBBI;
556
557 DebugLoc DL = MI.getDebugLoc();
558 SmallVector<unsigned, 8> Regs;
559 bool HasUnpairedReg = false;
560 int LRIdx = 0;
561 std::optional<int> FpOffset;
562 for (auto &MO : MI.operands()) {
563 if (MO.isReg()) {
564 if (MO.getReg().isValid()) {
565 if (MO.getReg() == AArch64::LR)
566 LRIdx = Regs.size();
567 } else {
568 // For now we are only expecting unpaired GP registers which should
569 // occur exactly once.
570 assert(!HasUnpairedReg);
571 HasUnpairedReg = true;
572 }
573 Regs.push_back(Elt: MO.getReg());
574 } else if (MO.isImm()) {
575 FpOffset = MO.getImm();
576 }
577 }
578 (void)HasUnpairedReg;
579 int Size = (int)Regs.size();
580 if (Size == 0)
581 return false;
582 // Allow compact unwind case only for oww.
583 assert(Size % 2 == 0);
584 assert(MI.getOpcode() == AArch64::HOM_Prolog);
585
586 if (FpOffset &&
587 shouldUseFrameHelper(MBB, NextMBBI, Regs, Type: FrameHelperType::PrologFrame)) {
588 // FP/LR is stored at the top of stack before the prolog helper call.
589 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true);
590 auto *PrologFrameHelper = getOrCreateFrameHelper(
591 M, MMI, Regs, Type: FrameHelperType::PrologFrame, FpOffset: *FpOffset);
592 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
593 .addGlobalAddress(GV: PrologFrameHelper)
594 .setMIFlag(MachineInstr::FrameSetup)
595 .copyImplicitOps(OtherMI: MI)
596 .addReg(RegNo: AArch64::FP, flags: RegState::Implicit | RegState::Define)
597 .addReg(RegNo: AArch64::SP, flags: RegState::Implicit);
598 } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
599 Type: FrameHelperType::Prolog)) {
600 // FP/LR is stored at the top of stack before the prolog helper call.
601 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: AArch64::LR, Reg2: AArch64::FP, Offset: -LRIdx - 2, IsPreDec: true);
602 auto *PrologHelper =
603 getOrCreateFrameHelper(M, MMI, Regs, Type: FrameHelperType::Prolog);
604 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
605 .addGlobalAddress(GV: PrologHelper)
606 .setMIFlag(MachineInstr::FrameSetup)
607 .copyImplicitOps(OtherMI: MI);
608 } else {
609 // Fall back to no-helper.
610 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[Size - 2], Reg2: Regs[Size - 1], Offset: -Size, IsPreDec: true);
611 for (int I = Size - 3; I >= 0; I -= 2)
612 emitStore(MF, MBB, Pos: MBBI, TII: *TII, Reg1: Regs[I - 1], Reg2: Regs[I], Offset: Size - I - 1, IsPreDec: false);
613 if (FpOffset) {
614 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri))
615 .addDef(RegNo: AArch64::FP)
616 .addUse(RegNo: AArch64::SP)
617 .addImm(Val: *FpOffset)
618 .addImm(Val: 0)
619 .setMIFlag(MachineInstr::FrameSetup);
620 }
621 }
622
623 MBBI->removeFromParent();
624 return true;
625}
626
627/// Process each machine instruction
628/// @param MBB machine basic block
629/// @param MBBI current instruction iterator
630/// @param NextMBBI next instruction iterator which can be updated
631/// @return True when IR is changed.
632bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
633 MachineBasicBlock::iterator MBBI,
634 MachineBasicBlock::iterator &NextMBBI) {
635 MachineInstr &MI = *MBBI;
636 unsigned Opcode = MI.getOpcode();
637 switch (Opcode) {
638 default:
639 break;
640 case AArch64::HOM_Prolog:
641 return lowerProlog(MBB, MBBI, NextMBBI);
642 case AArch64::HOM_Epilog:
643 return lowerEpilog(MBB, MBBI, NextMBBI);
644 }
645 return false;
646}
647
648bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
649 bool Modified = false;
650
651 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
652 while (MBBI != E) {
653 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
654 Modified |= runOnMI(MBB, MBBI, NextMBBI&: NMBBI);
655 MBBI = NMBBI;
656 }
657
658 return Modified;
659}
660
661bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
662 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
663
664 bool Modified = false;
665 for (auto &MBB : MF)
666 Modified |= runOnMBB(MBB);
667 return Modified;
668}
669
670ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
671 return new AArch64LowerHomogeneousPrologEpilog();
672}
673