1 | //===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "SystemZFrameLowering.h" |
10 | #include "SystemZCallingConv.h" |
11 | #include "SystemZInstrBuilder.h" |
12 | #include "SystemZInstrInfo.h" |
13 | #include "SystemZMachineFunctionInfo.h" |
14 | #include "SystemZRegisterInfo.h" |
15 | #include "SystemZSubtarget.h" |
16 | #include "llvm/CodeGen/LivePhysRegs.h" |
17 | #include "llvm/CodeGen/MachineModuleInfo.h" |
18 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
19 | #include "llvm/CodeGen/RegisterScavenging.h" |
20 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
21 | #include "llvm/IR/Function.h" |
22 | #include "llvm/Target/TargetMachine.h" |
23 | |
24 | using namespace llvm; |
25 | |
26 | namespace { |
27 | // The ABI-defined register save slots, relative to the CFA (i.e. |
28 | // incoming stack pointer + SystemZMC::ELFCallFrameSize). |
29 | static const TargetFrameLowering::SpillSlot ELFSpillOffsetTable[] = { |
30 | { .Reg: SystemZ::R2D, .Offset: 0x10 }, |
31 | { .Reg: SystemZ::R3D, .Offset: 0x18 }, |
32 | { .Reg: SystemZ::R4D, .Offset: 0x20 }, |
33 | { .Reg: SystemZ::R5D, .Offset: 0x28 }, |
34 | { .Reg: SystemZ::R6D, .Offset: 0x30 }, |
35 | { .Reg: SystemZ::R7D, .Offset: 0x38 }, |
36 | { .Reg: SystemZ::R8D, .Offset: 0x40 }, |
37 | { .Reg: SystemZ::R9D, .Offset: 0x48 }, |
38 | { .Reg: SystemZ::R10D, .Offset: 0x50 }, |
39 | { .Reg: SystemZ::R11D, .Offset: 0x58 }, |
40 | { .Reg: SystemZ::R12D, .Offset: 0x60 }, |
41 | { .Reg: SystemZ::R13D, .Offset: 0x68 }, |
42 | { .Reg: SystemZ::R14D, .Offset: 0x70 }, |
43 | { .Reg: SystemZ::R15D, .Offset: 0x78 }, |
44 | { .Reg: SystemZ::F0D, .Offset: 0x80 }, |
45 | { .Reg: SystemZ::F2D, .Offset: 0x88 }, |
46 | { .Reg: SystemZ::F4D, .Offset: 0x90 }, |
47 | { .Reg: SystemZ::F6D, .Offset: 0x98 } |
48 | }; |
49 | |
50 | static const TargetFrameLowering::SpillSlot XPLINKSpillOffsetTable[] = { |
51 | {.Reg: SystemZ::R4D, .Offset: 0x00}, {.Reg: SystemZ::R5D, .Offset: 0x08}, {.Reg: SystemZ::R6D, .Offset: 0x10}, |
52 | {.Reg: SystemZ::R7D, .Offset: 0x18}, {.Reg: SystemZ::R8D, .Offset: 0x20}, {.Reg: SystemZ::R9D, .Offset: 0x28}, |
53 | {.Reg: SystemZ::R10D, .Offset: 0x30}, {.Reg: SystemZ::R11D, .Offset: 0x38}, {.Reg: SystemZ::R12D, .Offset: 0x40}, |
54 | {.Reg: SystemZ::R13D, .Offset: 0x48}, {.Reg: SystemZ::R14D, .Offset: 0x50}, {.Reg: SystemZ::R15D, .Offset: 0x58}}; |
55 | } // end anonymous namespace |
56 | |
57 | SystemZFrameLowering::SystemZFrameLowering(StackDirection D, Align StackAl, |
58 | int LAO, Align TransAl, |
59 | bool StackReal, unsigned PointerSize) |
60 | : TargetFrameLowering(D, StackAl, LAO, TransAl, StackReal), |
61 | PointerSize(PointerSize) {} |
62 | |
63 | std::unique_ptr<SystemZFrameLowering> |
64 | SystemZFrameLowering::create(const SystemZSubtarget &STI) { |
65 | unsigned PtrSz = |
66 | STI.getTargetLowering()->getTargetMachine().getPointerSize(AS: 0); |
67 | if (STI.isTargetXPLINK64()) |
68 | return std::make_unique<SystemZXPLINKFrameLowering>(args&: PtrSz); |
69 | return std::make_unique<SystemZELFFrameLowering>(args&: PtrSz); |
70 | } |
71 | |
72 | namespace { |
73 | struct SZFrameSortingObj { |
74 | bool IsValid = false; // True if we care about this Object. |
75 | uint32_t ObjectIndex = 0; // Index of Object into MFI list. |
76 | uint64_t ObjectSize = 0; // Size of Object in bytes. |
77 | uint32_t D12Count = 0; // 12-bit displacement only. |
78 | uint32_t DPairCount = 0; // 12 or 20 bit displacement. |
79 | }; |
80 | typedef std::vector<SZFrameSortingObj> SZFrameObjVec; |
81 | } // namespace |
82 | |
83 | // TODO: Move to base class. |
84 | void SystemZELFFrameLowering::orderFrameObjects( |
85 | const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { |
86 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
87 | auto *TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo(); |
88 | |
89 | // Make a vector of sorting objects to track all MFI objects and mark those |
90 | // to be sorted as valid. |
91 | if (ObjectsToAllocate.size() <= 1) |
92 | return; |
93 | SZFrameObjVec SortingObjects(MFI.getObjectIndexEnd()); |
94 | for (auto &Obj : ObjectsToAllocate) { |
95 | SortingObjects[Obj].IsValid = true; |
96 | SortingObjects[Obj].ObjectIndex = Obj; |
97 | SortingObjects[Obj].ObjectSize = MFI.getObjectSize(ObjectIdx: Obj); |
98 | } |
99 | |
100 | // Examine uses for each object and record short (12-bit) and "pair" |
101 | // displacement types. |
102 | for (auto &MBB : MF) |
103 | for (auto &MI : MBB) { |
104 | if (MI.isDebugInstr()) |
105 | continue; |
106 | for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { |
107 | const MachineOperand &MO = MI.getOperand(i: I); |
108 | if (!MO.isFI()) |
109 | continue; |
110 | int Index = MO.getIndex(); |
111 | if (Index >= 0 && Index < MFI.getObjectIndexEnd() && |
112 | SortingObjects[Index].IsValid) { |
113 | if (TII->hasDisplacementPairInsn(Opcode: MI.getOpcode())) |
114 | SortingObjects[Index].DPairCount++; |
115 | else if (!(MI.getDesc().TSFlags & SystemZII::Has20BitOffset)) |
116 | SortingObjects[Index].D12Count++; |
117 | } |
118 | } |
119 | } |
120 | |
121 | // Sort all objects for short/paired displacements, which should be |
122 | // sufficient as it seems like all frame objects typically are within the |
123 | // long displacement range. Sorting works by computing the "density" as |
124 | // Count / ObjectSize. The comparisons of two such fractions are refactored |
125 | // by multiplying both sides with A.ObjectSize * B.ObjectSize, in order to |
126 | // eliminate the (fp) divisions. A higher density object needs to go after |
127 | // in the list in order for it to end up lower on the stack. |
128 | auto CmpD12 = [](const SZFrameSortingObj &A, const SZFrameSortingObj &B) { |
129 | // Put all invalid and variable sized objects at the end. |
130 | if (!A.IsValid || !B.IsValid) |
131 | return A.IsValid; |
132 | if (!A.ObjectSize || !B.ObjectSize) |
133 | return A.ObjectSize > 0; |
134 | uint64_t ADensityCmp = A.D12Count * B.ObjectSize; |
135 | uint64_t BDensityCmp = B.D12Count * A.ObjectSize; |
136 | if (ADensityCmp != BDensityCmp) |
137 | return ADensityCmp < BDensityCmp; |
138 | return A.DPairCount * B.ObjectSize < B.DPairCount * A.ObjectSize; |
139 | }; |
140 | std::stable_sort(first: SortingObjects.begin(), last: SortingObjects.end(), comp: CmpD12); |
141 | |
142 | // Now modify the original list to represent the final order that |
143 | // we want. |
144 | unsigned Idx = 0; |
145 | for (auto &Obj : SortingObjects) { |
146 | // All invalid items are sorted at the end, so it's safe to stop. |
147 | if (!Obj.IsValid) |
148 | break; |
149 | ObjectsToAllocate[Idx++] = Obj.ObjectIndex; |
150 | } |
151 | } |
152 | |
153 | bool SystemZFrameLowering::hasReservedCallFrame( |
154 | const MachineFunction &MF) const { |
155 | // The ELF ABI requires us to allocate 160 bytes of stack space for the |
156 | // callee, with any outgoing stack arguments being placed above that. It |
157 | // seems better to make that area a permanent feature of the frame even if |
158 | // we're using a frame pointer. Similarly, 64-bit XPLINK requires 96 bytes |
159 | // of stack space for the register save area. |
160 | return true; |
161 | } |
162 | |
163 | bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots( |
164 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
165 | std::vector<CalleeSavedInfo> &CSI) const { |
166 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
167 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
168 | bool IsVarArg = MF.getFunction().isVarArg(); |
169 | if (CSI.empty()) |
170 | return true; // Early exit if no callee saved registers are modified! |
171 | |
172 | unsigned LowGPR = 0; |
173 | unsigned HighGPR = SystemZ::R15D; |
174 | int StartSPOffset = SystemZMC::ELFCallFrameSize; |
175 | for (auto &CS : CSI) { |
176 | Register Reg = CS.getReg(); |
177 | int Offset = getRegSpillOffset(MF, Reg); |
178 | if (Offset) { |
179 | if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { |
180 | LowGPR = Reg; |
181 | StartSPOffset = Offset; |
182 | } |
183 | Offset -= SystemZMC::ELFCallFrameSize; |
184 | int FrameIdx = |
185 | MFFrame.CreateFixedSpillStackObject(Size: getPointerSize(), SPOffset: Offset); |
186 | CS.setFrameIdx(FrameIdx); |
187 | } else |
188 | CS.setFrameIdx(INT32_MAX); |
189 | } |
190 | |
191 | // Save the range of call-saved registers, for use by the |
192 | // prologue/epilogue inserters. |
193 | ZFI->setRestoreGPRRegs(Low: LowGPR, High: HighGPR, Offs: StartSPOffset); |
194 | if (IsVarArg) { |
195 | // Also save the GPR varargs, if any. R6D is call-saved, so would |
196 | // already be included, but we also need to handle the call-clobbered |
197 | // argument registers. |
198 | Register FirstGPR = ZFI->getVarArgsFirstGPR(); |
199 | if (FirstGPR < SystemZ::ELFNumArgGPRs) { |
200 | unsigned Reg = SystemZ::ELFArgGPRs[FirstGPR]; |
201 | int Offset = getRegSpillOffset(MF, Reg); |
202 | if (StartSPOffset > Offset) { |
203 | LowGPR = Reg; StartSPOffset = Offset; |
204 | } |
205 | } |
206 | } |
207 | ZFI->setSpillGPRRegs(Low: LowGPR, High: HighGPR, Offs: StartSPOffset); |
208 | |
209 | // Create fixed stack objects for the remaining registers. |
210 | int CurrOffset = -SystemZMC::ELFCallFrameSize; |
211 | if (usePackedStack(MF)) |
212 | CurrOffset += StartSPOffset; |
213 | |
214 | for (auto &CS : CSI) { |
215 | if (CS.getFrameIdx() != INT32_MAX) |
216 | continue; |
217 | Register Reg = CS.getReg(); |
218 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
219 | unsigned Size = TRI->getSpillSize(RC: *RC); |
220 | CurrOffset -= Size; |
221 | assert(CurrOffset % 8 == 0 && |
222 | "8-byte alignment required for for all register save slots" ); |
223 | int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, SPOffset: CurrOffset); |
224 | CS.setFrameIdx(FrameIdx); |
225 | } |
226 | |
227 | return true; |
228 | } |
229 | |
230 | void SystemZELFFrameLowering::determineCalleeSaves(MachineFunction &MF, |
231 | BitVector &SavedRegs, |
232 | RegScavenger *RS) const { |
233 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
234 | |
235 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
236 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
237 | bool HasFP = hasFP(MF); |
238 | SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
239 | bool IsVarArg = MF.getFunction().isVarArg(); |
240 | |
241 | // va_start stores incoming FPR varargs in the normal way, but delegates |
242 | // the saving of incoming GPR varargs to spillCalleeSavedRegisters(). |
243 | // Record these pending uses, which typically include the call-saved |
244 | // argument register R6D. |
245 | if (IsVarArg) |
246 | for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::ELFNumArgGPRs; ++I) |
247 | SavedRegs.set(SystemZ::ELFArgGPRs[I]); |
248 | |
249 | // If there are any landing pads, entering them will modify r6/r7. |
250 | if (!MF.getLandingPads().empty()) { |
251 | SavedRegs.set(SystemZ::R6D); |
252 | SavedRegs.set(SystemZ::R7D); |
253 | } |
254 | |
255 | // If the function requires a frame pointer, record that the hard |
256 | // frame pointer will be clobbered. |
257 | if (HasFP) |
258 | SavedRegs.set(SystemZ::R11D); |
259 | |
260 | // If the function calls other functions, record that the return |
261 | // address register will be clobbered. |
262 | if (MFFrame.hasCalls()) |
263 | SavedRegs.set(SystemZ::R14D); |
264 | |
265 | // If we are saving GPRs other than the stack pointer, we might as well |
266 | // save and restore the stack pointer at the same time, via STMG and LMG. |
267 | // This allows the deallocation to be done by the LMG, rather than needing |
268 | // a separate %r15 addition. |
269 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF: &MF); |
270 | for (unsigned I = 0; CSRegs[I]; ++I) { |
271 | unsigned Reg = CSRegs[I]; |
272 | if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Idx: Reg)) { |
273 | SavedRegs.set(SystemZ::R15D); |
274 | break; |
275 | } |
276 | } |
277 | } |
278 | |
279 | SystemZELFFrameLowering::SystemZELFFrameLowering(unsigned PointerSize) |
280 | : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0, |
281 | Align(8), /* StackRealignable */ false, PointerSize), |
282 | RegSpillOffsets(0) { |
283 | |
284 | // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not |
285 | // equal to the incoming stack pointer, but to incoming stack pointer plus |
286 | // 160. Instead of using a Local Area Offset, the Register save area will |
287 | // be occupied by fixed frame objects, and all offsets are actually |
288 | // relative to CFA. |
289 | |
290 | // Create a mapping from register number to save slot offset. |
291 | // These offsets are relative to the start of the register save area. |
292 | RegSpillOffsets.grow(n: SystemZ::NUM_TARGET_REGS); |
293 | for (const auto &Entry : ELFSpillOffsetTable) |
294 | RegSpillOffsets[Entry.Reg] = Entry.Offset; |
295 | } |
296 | |
297 | // Add GPR64 to the save instruction being built by MIB, which is in basic |
298 | // block MBB. IsImplicit says whether this is an explicit operand to the |
299 | // instruction, or an implicit one that comes between the explicit start |
300 | // and end registers. |
301 | static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, |
302 | unsigned GPR64, bool IsImplicit) { |
303 | const TargetRegisterInfo *RI = |
304 | MBB.getParent()->getSubtarget().getRegisterInfo(); |
305 | Register GPR32 = RI->getSubReg(Reg: GPR64, Idx: SystemZ::subreg_l32); |
306 | bool IsLive = MBB.isLiveIn(Reg: GPR64) || MBB.isLiveIn(Reg: GPR32); |
307 | if (!IsLive || !IsImplicit) { |
308 | MIB.addReg(RegNo: GPR64, flags: getImplRegState(B: IsImplicit) | getKillRegState(B: !IsLive)); |
309 | if (!IsLive) |
310 | MBB.addLiveIn(PhysReg: GPR64); |
311 | } |
312 | } |
313 | |
314 | bool SystemZELFFrameLowering::spillCalleeSavedRegisters( |
315 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
316 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
317 | if (CSI.empty()) |
318 | return false; |
319 | |
320 | MachineFunction &MF = *MBB.getParent(); |
321 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
322 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
323 | bool IsVarArg = MF.getFunction().isVarArg(); |
324 | DebugLoc DL; |
325 | |
326 | // Save GPRs |
327 | SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs(); |
328 | if (SpillGPRs.LowGPR) { |
329 | assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR && |
330 | "Should be saving %r15 and something else" ); |
331 | |
332 | // Build an STMG instruction. |
333 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STMG)); |
334 | |
335 | // Add the explicit register operands. |
336 | addSavedGPR(MBB, MIB, GPR64: SpillGPRs.LowGPR, IsImplicit: false); |
337 | addSavedGPR(MBB, MIB, GPR64: SpillGPRs.HighGPR, IsImplicit: false); |
338 | |
339 | // Add the address. |
340 | MIB.addReg(RegNo: SystemZ::R15D).addImm(Val: SpillGPRs.GPROffset); |
341 | |
342 | // Make sure all call-saved GPRs are included as operands and are |
343 | // marked as live on entry. |
344 | for (const CalleeSavedInfo &I : CSI) { |
345 | Register Reg = I.getReg(); |
346 | if (SystemZ::GR64BitRegClass.contains(Reg)) |
347 | addSavedGPR(MBB, MIB, GPR64: Reg, IsImplicit: true); |
348 | } |
349 | |
350 | // ...likewise GPR varargs. |
351 | if (IsVarArg) |
352 | for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::ELFNumArgGPRs; ++I) |
353 | addSavedGPR(MBB, MIB, GPR64: SystemZ::ELFArgGPRs[I], IsImplicit: true); |
354 | } |
355 | |
356 | // Save FPRs/VRs in the normal TargetInstrInfo way. |
357 | for (const CalleeSavedInfo &I : CSI) { |
358 | Register Reg = I.getReg(); |
359 | if (SystemZ::FP64BitRegClass.contains(Reg)) { |
360 | MBB.addLiveIn(PhysReg: Reg); |
361 | TII->storeRegToStackSlot(MBB, MI: MBBI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), |
362 | RC: &SystemZ::FP64BitRegClass, TRI, VReg: Register()); |
363 | } |
364 | if (SystemZ::VR128BitRegClass.contains(Reg)) { |
365 | MBB.addLiveIn(PhysReg: Reg); |
366 | TII->storeRegToStackSlot(MBB, MI: MBBI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), |
367 | RC: &SystemZ::VR128BitRegClass, TRI, VReg: Register()); |
368 | } |
369 | } |
370 | |
371 | return true; |
372 | } |
373 | |
374 | bool SystemZELFFrameLowering::restoreCalleeSavedRegisters( |
375 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
376 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
377 | if (CSI.empty()) |
378 | return false; |
379 | |
380 | MachineFunction &MF = *MBB.getParent(); |
381 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
382 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
383 | bool HasFP = hasFP(MF); |
384 | DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); |
385 | |
386 | // Restore FPRs/VRs in the normal TargetInstrInfo way. |
387 | for (const CalleeSavedInfo &I : CSI) { |
388 | Register Reg = I.getReg(); |
389 | if (SystemZ::FP64BitRegClass.contains(Reg)) |
390 | TII->loadRegFromStackSlot(MBB, MI: MBBI, DestReg: Reg, FrameIndex: I.getFrameIdx(), |
391 | RC: &SystemZ::FP64BitRegClass, TRI, VReg: Register()); |
392 | if (SystemZ::VR128BitRegClass.contains(Reg)) |
393 | TII->loadRegFromStackSlot(MBB, MI: MBBI, DestReg: Reg, FrameIndex: I.getFrameIdx(), |
394 | RC: &SystemZ::VR128BitRegClass, TRI, VReg: Register()); |
395 | } |
396 | |
397 | // Restore call-saved GPRs (but not call-clobbered varargs, which at |
398 | // this point might hold return values). |
399 | SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs(); |
400 | if (RestoreGPRs.LowGPR) { |
401 | // If we saved any of %r2-%r5 as varargs, we should also be saving |
402 | // and restoring %r6. If we're saving %r6 or above, we should be |
403 | // restoring it too. |
404 | assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR && |
405 | "Should be loading %r15 and something else" ); |
406 | |
407 | // Build an LMG instruction. |
408 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LMG)); |
409 | |
410 | // Add the explicit register operands. |
411 | MIB.addReg(RegNo: RestoreGPRs.LowGPR, flags: RegState::Define); |
412 | MIB.addReg(RegNo: RestoreGPRs.HighGPR, flags: RegState::Define); |
413 | |
414 | // Add the address. |
415 | MIB.addReg(RegNo: HasFP ? SystemZ::R11D : SystemZ::R15D); |
416 | MIB.addImm(Val: RestoreGPRs.GPROffset); |
417 | |
418 | // Do a second scan adding regs as being defined by instruction |
419 | for (const CalleeSavedInfo &I : CSI) { |
420 | Register Reg = I.getReg(); |
421 | if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR && |
422 | SystemZ::GR64BitRegClass.contains(Reg)) |
423 | MIB.addReg(RegNo: Reg, flags: RegState::ImplicitDefine); |
424 | } |
425 | } |
426 | |
427 | return true; |
428 | } |
429 | |
430 | void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized( |
431 | MachineFunction &MF, RegScavenger *RS) const { |
432 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
433 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
434 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
435 | bool BackChain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); |
436 | |
437 | if (!usePackedStack(MF) || BackChain) |
438 | // Create the incoming register save area. |
439 | getOrCreateFramePointerSaveIndex(MF); |
440 | |
441 | // Get the size of our stack frame to be allocated ... |
442 | uint64_t StackSize = (MFFrame.estimateStackSize(MF) + |
443 | SystemZMC::ELFCallFrameSize); |
444 | // ... and the maximum offset we may need to reach into the |
445 | // caller's frame to access the save area or stack arguments. |
446 | int64_t MaxArgOffset = 0; |
447 | for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) |
448 | if (MFFrame.getObjectOffset(ObjectIdx: I) >= 0) { |
449 | int64_t ArgOffset = MFFrame.getObjectOffset(ObjectIdx: I) + |
450 | MFFrame.getObjectSize(ObjectIdx: I); |
451 | MaxArgOffset = std::max(a: MaxArgOffset, b: ArgOffset); |
452 | } |
453 | |
454 | uint64_t MaxReach = StackSize + MaxArgOffset; |
455 | if (!isUInt<12>(x: MaxReach)) { |
456 | // We may need register scavenging slots if some parts of the frame |
457 | // are outside the reach of an unsigned 12-bit displacement. |
458 | // Create 2 for the case where both addresses in an MVC are |
459 | // out of range. |
460 | RS->addScavengingFrameIndex( |
461 | FI: MFFrame.CreateStackObject(Size: getPointerSize(), Alignment: Align(8), isSpillSlot: false)); |
462 | RS->addScavengingFrameIndex( |
463 | FI: MFFrame.CreateStackObject(Size: getPointerSize(), Alignment: Align(8), isSpillSlot: false)); |
464 | } |
465 | |
466 | // If R6 is used as an argument register it is still callee saved. If it in |
467 | // this case is not clobbered (and restored) it should never be marked as |
468 | // killed. |
469 | if (MF.front().isLiveIn(Reg: SystemZ::R6D) && |
470 | ZFI->getRestoreGPRRegs().LowGPR != SystemZ::R6D) |
471 | for (auto &MO : MRI->use_nodbg_operands(Reg: SystemZ::R6D)) |
472 | MO.setIsKill(false); |
473 | } |
474 | |
475 | // Emit instructions before MBBI (in MBB) to add NumBytes to Reg. |
476 | static void emitIncrement(MachineBasicBlock &MBB, |
477 | MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, |
478 | Register Reg, int64_t NumBytes, |
479 | const TargetInstrInfo *TII) { |
480 | while (NumBytes) { |
481 | unsigned Opcode; |
482 | int64_t ThisVal = NumBytes; |
483 | if (isInt<16>(x: NumBytes)) |
484 | Opcode = SystemZ::AGHI; |
485 | else { |
486 | Opcode = SystemZ::AGFI; |
487 | // Make sure we maintain 8-byte stack alignment. |
488 | int64_t MinVal = -uint64_t(1) << 31; |
489 | int64_t MaxVal = (int64_t(1) << 31) - 8; |
490 | if (ThisVal < MinVal) |
491 | ThisVal = MinVal; |
492 | else if (ThisVal > MaxVal) |
493 | ThisVal = MaxVal; |
494 | } |
495 | MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode), DestReg: Reg) |
496 | .addReg(RegNo: Reg).addImm(Val: ThisVal); |
497 | // The CC implicit def is dead. |
498 | MI->getOperand(i: 3).setIsDead(); |
499 | NumBytes -= ThisVal; |
500 | } |
501 | } |
502 | |
503 | // Add CFI for the new CFA offset. |
504 | static void buildCFAOffs(MachineBasicBlock &MBB, |
505 | MachineBasicBlock::iterator MBBI, |
506 | const DebugLoc &DL, int Offset, |
507 | const SystemZInstrInfo *ZII) { |
508 | unsigned CFIIndex = MBB.getParent()->addFrameInst( |
509 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -Offset)); |
510 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
511 | .addCFIIndex(CFIIndex); |
512 | } |
513 | |
514 | // Add CFI for the new frame location. |
515 | static void buildDefCFAReg(MachineBasicBlock &MBB, |
516 | MachineBasicBlock::iterator MBBI, |
517 | const DebugLoc &DL, unsigned Reg, |
518 | const SystemZInstrInfo *ZII) { |
519 | MachineFunction &MF = *MBB.getParent(); |
520 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
521 | unsigned RegNum = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
522 | unsigned CFIIndex = MF.addFrameInst( |
523 | Inst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: RegNum)); |
524 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
525 | .addCFIIndex(CFIIndex); |
526 | } |
527 | |
528 | void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF, |
529 | MachineBasicBlock &MBB) const { |
530 | assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported" ); |
531 | const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); |
532 | const SystemZTargetLowering &TLI = *STI.getTargetLowering(); |
533 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
534 | auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo()); |
535 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
536 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
537 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
538 | const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo(); |
539 | bool HasFP = hasFP(MF); |
540 | |
541 | // In GHC calling convention C stack space, including the ABI-defined |
542 | // 160-byte base area, is (de)allocated by GHC itself. This stack space may |
543 | // be used by LLVM as spill slots for the tail recursive GHC functions. Thus |
544 | // do not allocate stack space here, too. |
545 | if (MF.getFunction().getCallingConv() == CallingConv::GHC) { |
546 | if (MFFrame.getStackSize() > 2048 * sizeof(long)) { |
547 | report_fatal_error( |
548 | reason: "Pre allocated stack space for GHC function is too small" ); |
549 | } |
550 | if (HasFP) { |
551 | report_fatal_error( |
552 | reason: "In GHC calling convention a frame pointer is not supported" ); |
553 | } |
554 | MFFrame.setStackSize(MFFrame.getStackSize() + SystemZMC::ELFCallFrameSize); |
555 | return; |
556 | } |
557 | |
558 | // Debug location must be unknown since the first debug location is used |
559 | // to determine the end of the prologue. |
560 | DebugLoc DL; |
561 | |
562 | // The current offset of the stack pointer from the CFA. |
563 | int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP; |
564 | |
565 | if (ZFI->getSpillGPRRegs().LowGPR) { |
566 | // Skip over the GPR saves. |
567 | if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG) |
568 | ++MBBI; |
569 | else |
570 | llvm_unreachable("Couldn't skip over GPR saves" ); |
571 | |
572 | // Add CFI for the GPR saves. |
573 | for (auto &Save : CSI) { |
574 | Register Reg = Save.getReg(); |
575 | if (SystemZ::GR64BitRegClass.contains(Reg)) { |
576 | int FI = Save.getFrameIdx(); |
577 | int64_t Offset = MFFrame.getObjectOffset(ObjectIdx: FI); |
578 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
579 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset)); |
580 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
581 | .addCFIIndex(CFIIndex); |
582 | } |
583 | } |
584 | } |
585 | |
586 | uint64_t StackSize = MFFrame.getStackSize(); |
587 | // We need to allocate the ABI-defined 160-byte base area whenever |
588 | // we allocate stack space for our own use and whenever we call another |
589 | // function. |
590 | bool HasStackObject = false; |
591 | for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i) |
592 | if (!MFFrame.isDeadObjectIndex(ObjectIdx: i)) { |
593 | HasStackObject = true; |
594 | break; |
595 | } |
596 | if (HasStackObject || MFFrame.hasCalls()) |
597 | StackSize += SystemZMC::ELFCallFrameSize; |
598 | // Don't allocate the incoming reg save area. |
599 | StackSize = StackSize > SystemZMC::ELFCallFrameSize |
600 | ? StackSize - SystemZMC::ELFCallFrameSize |
601 | : 0; |
602 | MFFrame.setStackSize(StackSize); |
603 | |
604 | if (StackSize) { |
605 | // Allocate StackSize bytes. |
606 | int64_t Delta = -int64_t(StackSize); |
607 | const unsigned ProbeSize = TLI.getStackProbeSize(MF); |
608 | bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset && |
609 | (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize); |
610 | if (!FreeProbe && |
611 | MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) { |
612 | // Stack probing may involve looping, but splitting the prologue block |
613 | // is not possible at this point since it would invalidate the |
614 | // SaveBlocks / RestoreBlocks sets of PEI in the single block function |
615 | // case. Build a pseudo to be handled later by inlineStackProbe(). |
616 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::PROBED_STACKALLOC)) |
617 | .addImm(Val: StackSize); |
618 | } |
619 | else { |
620 | bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); |
621 | // If we need backchain, save current stack pointer. R1 is free at |
622 | // this point. |
623 | if (StoreBackchain) |
624 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR)) |
625 | .addReg(RegNo: SystemZ::R1D, flags: RegState::Define).addReg(RegNo: SystemZ::R15D); |
626 | emitIncrement(MBB, MBBI, DL, Reg: SystemZ::R15D, NumBytes: Delta, TII: ZII); |
627 | buildCFAOffs(MBB, MBBI, DL, Offset: SPOffsetFromCFA + Delta, ZII); |
628 | if (StoreBackchain) |
629 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::STG)) |
630 | .addReg(RegNo: SystemZ::R1D, flags: RegState::Kill).addReg(RegNo: SystemZ::R15D) |
631 | .addImm(Val: getBackchainOffset(MF)).addReg(RegNo: 0); |
632 | } |
633 | SPOffsetFromCFA += Delta; |
634 | } |
635 | |
636 | if (HasFP) { |
637 | // Copy the base of the frame to R11. |
638 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR), DestReg: SystemZ::R11D) |
639 | .addReg(RegNo: SystemZ::R15D); |
640 | |
641 | // Add CFI for the new frame location. |
642 | buildDefCFAReg(MBB, MBBI, DL, Reg: SystemZ::R11D, ZII); |
643 | |
644 | // Mark the FramePtr as live at the beginning of every block except |
645 | // the entry block. (We'll have marked R11 as live on entry when |
646 | // saving the GPRs.) |
647 | for (MachineBasicBlock &MBBJ : llvm::drop_begin(RangeOrContainer&: MF)) |
648 | MBBJ.addLiveIn(PhysReg: SystemZ::R11D); |
649 | } |
650 | |
651 | // Skip over the FPR/VR saves. |
652 | SmallVector<unsigned, 8> CFIIndexes; |
653 | for (auto &Save : CSI) { |
654 | Register Reg = Save.getReg(); |
655 | if (SystemZ::FP64BitRegClass.contains(Reg)) { |
656 | if (MBBI != MBB.end() && |
657 | (MBBI->getOpcode() == SystemZ::STD || |
658 | MBBI->getOpcode() == SystemZ::STDY)) |
659 | ++MBBI; |
660 | else |
661 | llvm_unreachable("Couldn't skip over FPR save" ); |
662 | } else if (SystemZ::VR128BitRegClass.contains(Reg)) { |
663 | if (MBBI != MBB.end() && |
664 | MBBI->getOpcode() == SystemZ::VST) |
665 | ++MBBI; |
666 | else |
667 | llvm_unreachable("Couldn't skip over VR save" ); |
668 | } else |
669 | continue; |
670 | |
671 | // Add CFI for the this save. |
672 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
673 | Register IgnoredFrameReg; |
674 | int64_t Offset = |
675 | getFrameIndexReference(MF, FI: Save.getFrameIdx(), FrameReg&: IgnoredFrameReg) |
676 | .getFixed(); |
677 | |
678 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
679 | L: nullptr, Register: DwarfReg, Offset: SPOffsetFromCFA + Offset)); |
680 | CFIIndexes.push_back(Elt: CFIIndex); |
681 | } |
682 | // Complete the CFI for the FPR/VR saves, modelling them as taking effect |
683 | // after the last save. |
684 | for (auto CFIIndex : CFIIndexes) { |
685 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
686 | .addCFIIndex(CFIIndex); |
687 | } |
688 | } |
689 | |
690 | void SystemZELFFrameLowering::emitEpilogue(MachineFunction &MF, |
691 | MachineBasicBlock &MBB) const { |
692 | MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); |
693 | auto *ZII = |
694 | static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
695 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
696 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
697 | |
698 | // See SystemZELFFrameLowering::emitPrologue |
699 | if (MF.getFunction().getCallingConv() == CallingConv::GHC) |
700 | return; |
701 | |
702 | // Skip the return instruction. |
703 | assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks" ); |
704 | |
705 | uint64_t StackSize = MFFrame.getStackSize(); |
706 | if (ZFI->getRestoreGPRRegs().LowGPR) { |
707 | --MBBI; |
708 | unsigned Opcode = MBBI->getOpcode(); |
709 | if (Opcode != SystemZ::LMG) |
710 | llvm_unreachable("Expected to see callee-save register restore code" ); |
711 | |
712 | unsigned AddrOpNo = 2; |
713 | DebugLoc DL = MBBI->getDebugLoc(); |
714 | uint64_t Offset = StackSize + MBBI->getOperand(i: AddrOpNo + 1).getImm(); |
715 | unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); |
716 | |
717 | // If the offset is too large, use the largest stack-aligned offset |
718 | // and add the rest to the base register (the stack or frame pointer). |
719 | if (!NewOpcode) { |
720 | uint64_t NumBytes = Offset - 0x7fff8; |
721 | emitIncrement(MBB, MBBI, DL, Reg: MBBI->getOperand(i: AddrOpNo).getReg(), |
722 | NumBytes, TII: ZII); |
723 | Offset -= NumBytes; |
724 | NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); |
725 | assert(NewOpcode && "No restore instruction available" ); |
726 | } |
727 | |
728 | MBBI->setDesc(ZII->get(Opcode: NewOpcode)); |
729 | MBBI->getOperand(i: AddrOpNo + 1).ChangeToImmediate(ImmVal: Offset); |
730 | } else if (StackSize) { |
731 | DebugLoc DL = MBBI->getDebugLoc(); |
732 | emitIncrement(MBB, MBBI, DL, Reg: SystemZ::R15D, NumBytes: StackSize, TII: ZII); |
733 | } |
734 | } |
735 | |
736 | void SystemZELFFrameLowering::inlineStackProbe( |
737 | MachineFunction &MF, MachineBasicBlock &PrologMBB) const { |
738 | auto *ZII = |
739 | static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
740 | const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>(); |
741 | const SystemZTargetLowering &TLI = *STI.getTargetLowering(); |
742 | |
743 | MachineInstr *StackAllocMI = nullptr; |
744 | for (MachineInstr &MI : PrologMBB) |
745 | if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) { |
746 | StackAllocMI = &MI; |
747 | break; |
748 | } |
749 | if (StackAllocMI == nullptr) |
750 | return; |
751 | uint64_t StackSize = StackAllocMI->getOperand(i: 0).getImm(); |
752 | const unsigned ProbeSize = TLI.getStackProbeSize(MF); |
753 | uint64_t NumFullBlocks = StackSize / ProbeSize; |
754 | uint64_t Residual = StackSize % ProbeSize; |
755 | int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP; |
756 | MachineBasicBlock *MBB = &PrologMBB; |
757 | MachineBasicBlock::iterator MBBI = StackAllocMI; |
758 | const DebugLoc DL = StackAllocMI->getDebugLoc(); |
759 | |
760 | // Allocate a block of Size bytes on the stack and probe it. |
761 | auto allocateAndProbe = [&](MachineBasicBlock &InsMBB, |
762 | MachineBasicBlock::iterator InsPt, unsigned Size, |
763 | bool EmitCFI) -> void { |
764 | emitIncrement(MBB&: InsMBB, MBBI&: InsPt, DL, Reg: SystemZ::R15D, NumBytes: -int64_t(Size), TII: ZII); |
765 | if (EmitCFI) { |
766 | SPOffsetFromCFA -= Size; |
767 | buildCFAOffs(MBB&: InsMBB, MBBI: InsPt, DL, Offset: SPOffsetFromCFA, ZII); |
768 | } |
769 | // Probe by means of a volatile compare. |
770 | MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), |
771 | F: MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, Size: 8, BaseAlignment: Align(1)); |
772 | BuildMI(BB&: InsMBB, I: InsPt, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::CG)) |
773 | .addReg(RegNo: SystemZ::R0D, flags: RegState::Undef) |
774 | .addReg(RegNo: SystemZ::R15D).addImm(Val: Size - 8).addReg(RegNo: 0) |
775 | .addMemOperand(MMO); |
776 | }; |
777 | |
778 | bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); |
779 | if (StoreBackchain) |
780 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR)) |
781 | .addReg(RegNo: SystemZ::R1D, flags: RegState::Define).addReg(RegNo: SystemZ::R15D); |
782 | |
783 | MachineBasicBlock *DoneMBB = nullptr; |
784 | MachineBasicBlock *LoopMBB = nullptr; |
785 | if (NumFullBlocks < 3) { |
786 | // Emit unrolled probe statements. |
787 | for (unsigned int i = 0; i < NumFullBlocks; i++) |
788 | allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/); |
789 | } else { |
790 | // Emit a loop probing the pages. |
791 | uint64_t LoopAlloc = ProbeSize * NumFullBlocks; |
792 | SPOffsetFromCFA -= LoopAlloc; |
793 | |
794 | // Use R0D to hold the exit value. |
795 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR), DestReg: SystemZ::R0D) |
796 | .addReg(RegNo: SystemZ::R15D); |
797 | buildDefCFAReg(MBB&: *MBB, MBBI, DL, Reg: SystemZ::R0D, ZII); |
798 | emitIncrement(MBB&: *MBB, MBBI, DL, Reg: SystemZ::R0D, NumBytes: -int64_t(LoopAlloc), TII: ZII); |
799 | buildCFAOffs(MBB&: *MBB, MBBI, DL, Offset: -int64_t(SystemZMC::ELFCallFrameSize + LoopAlloc), |
800 | ZII); |
801 | |
802 | DoneMBB = SystemZ::splitBlockBefore(MI: MBBI, MBB); |
803 | LoopMBB = SystemZ::emitBlockAfter(MBB); |
804 | MBB->addSuccessor(Succ: LoopMBB); |
805 | LoopMBB->addSuccessor(Succ: LoopMBB); |
806 | LoopMBB->addSuccessor(Succ: DoneMBB); |
807 | |
808 | MBB = LoopMBB; |
809 | allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/); |
810 | BuildMI(BB&: *MBB, I: MBB->end(), MIMD: DL, MCID: ZII->get(Opcode: SystemZ::CLGR)) |
811 | .addReg(RegNo: SystemZ::R15D).addReg(RegNo: SystemZ::R0D); |
812 | BuildMI(BB&: *MBB, I: MBB->end(), MIMD: DL, MCID: ZII->get(Opcode: SystemZ::BRC)) |
813 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_GT).addMBB(MBB); |
814 | |
815 | MBB = DoneMBB; |
816 | MBBI = DoneMBB->begin(); |
817 | buildDefCFAReg(MBB&: *MBB, MBBI, DL, Reg: SystemZ::R15D, ZII); |
818 | } |
819 | |
820 | if (Residual) |
821 | allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); |
822 | |
823 | if (StoreBackchain) |
824 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::STG)) |
825 | .addReg(RegNo: SystemZ::R1D, flags: RegState::Kill).addReg(RegNo: SystemZ::R15D) |
826 | .addImm(Val: getBackchainOffset(MF)).addReg(RegNo: 0); |
827 | |
828 | StackAllocMI->eraseFromParent(); |
829 | if (DoneMBB != nullptr) { |
830 | // Compute the live-in lists for the new blocks. |
831 | fullyRecomputeLiveIns(MBBs: {DoneMBB, LoopMBB}); |
832 | } |
833 | } |
834 | |
835 | bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const { |
836 | return (MF.getTarget().Options.DisableFramePointerElim(MF) || |
837 | MF.getFrameInfo().hasVarSizedObjects()); |
838 | } |
839 | |
840 | StackOffset SystemZELFFrameLowering::getFrameIndexReference( |
841 | const MachineFunction &MF, int FI, Register &FrameReg) const { |
842 | // Our incoming SP is actually SystemZMC::ELFCallFrameSize below the CFA, so |
843 | // add that difference here. |
844 | StackOffset Offset = |
845 | TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg); |
846 | return Offset + StackOffset::getFixed(Fixed: SystemZMC::ELFCallFrameSize); |
847 | } |
848 | |
849 | unsigned SystemZELFFrameLowering::getRegSpillOffset(MachineFunction &MF, |
850 | Register Reg) const { |
851 | bool IsVarArg = MF.getFunction().isVarArg(); |
852 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
853 | bool BackChain = Subtarget.hasBackChain(); |
854 | bool SoftFloat = Subtarget.hasSoftFloat(); |
855 | unsigned Offset = RegSpillOffsets[Reg]; |
856 | if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) { |
857 | if (SystemZ::GR64BitRegClass.contains(Reg)) |
858 | // Put all GPRs at the top of the Register save area with packed |
859 | // stack. Make room for the backchain if needed. |
860 | Offset += BackChain ? 24 : 32; |
861 | else |
862 | Offset = 0; |
863 | } |
864 | return Offset; |
865 | } |
866 | |
867 | int SystemZELFFrameLowering::getOrCreateFramePointerSaveIndex( |
868 | MachineFunction &MF) const { |
869 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
870 | int FI = ZFI->getFramePointerSaveIndex(); |
871 | if (!FI) { |
872 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
873 | int Offset = getBackchainOffset(MF) - SystemZMC::ELFCallFrameSize; |
874 | FI = MFFrame.CreateFixedObject(Size: getPointerSize(), SPOffset: Offset, IsImmutable: false); |
875 | ZFI->setFramePointerSaveIndex(FI); |
876 | } |
877 | return FI; |
878 | } |
879 | |
880 | bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const { |
881 | bool HasPackedStackAttr = MF.getFunction().hasFnAttribute(Kind: "packed-stack" ); |
882 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
883 | bool BackChain = Subtarget.hasBackChain(); |
884 | bool SoftFloat = Subtarget.hasSoftFloat(); |
885 | if (HasPackedStackAttr && BackChain && !SoftFloat) |
886 | report_fatal_error(reason: "packed-stack + backchain + hard-float is unsupported." ); |
887 | bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; |
888 | return HasPackedStackAttr && CallConv; |
889 | } |
890 | |
891 | SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering(unsigned PointerSize) |
892 | : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0, |
893 | Align(32), /* StackRealignable */ false, |
894 | PointerSize), |
895 | RegSpillOffsets(-1) { |
896 | |
897 | // Create a mapping from register number to save slot offset. |
898 | // These offsets are relative to the start of the local are area. |
899 | RegSpillOffsets.grow(n: SystemZ::NUM_TARGET_REGS); |
900 | for (const auto &Entry : XPLINKSpillOffsetTable) |
901 | RegSpillOffsets[Entry.Reg] = Entry.Offset; |
902 | } |
903 | |
904 | int SystemZXPLINKFrameLowering::getOrCreateFramePointerSaveIndex( |
905 | MachineFunction &MF) const { |
906 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
907 | int FI = ZFI->getFramePointerSaveIndex(); |
908 | if (!FI) { |
909 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
910 | FI = MFFrame.CreateFixedObject(Size: getPointerSize(), SPOffset: 0, IsImmutable: false); |
911 | MFFrame.setStackID(ObjectIdx: FI, ID: TargetStackID::NoAlloc); |
912 | ZFI->setFramePointerSaveIndex(FI); |
913 | } |
914 | return FI; |
915 | } |
916 | |
917 | // Checks if the function is a potential candidate for being a XPLeaf routine. |
918 | static bool isXPLeafCandidate(const MachineFunction &MF) { |
919 | const MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
920 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
921 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
922 | auto *Regs = |
923 | static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); |
924 | |
925 | // If function calls other functions including alloca, then it is not a XPLeaf |
926 | // routine. |
927 | if (MFFrame.hasCalls()) |
928 | return false; |
929 | |
930 | // If the function has var Sized Objects, then it is not a XPLeaf routine. |
931 | if (MFFrame.hasVarSizedObjects()) |
932 | return false; |
933 | |
934 | // If the function adjusts the stack, then it is not a XPLeaf routine. |
935 | if (MFFrame.adjustsStack()) |
936 | return false; |
937 | |
938 | // If function modifies the stack pointer register, then it is not a XPLeaf |
939 | // routine. |
940 | if (MRI.isPhysRegModified(PhysReg: Regs->getStackPointerRegister())) |
941 | return false; |
942 | |
943 | // If function modifies the ADA register, then it is not a XPLeaf routine. |
944 | if (MRI.isPhysRegModified(PhysReg: Regs->getAddressOfCalleeRegister())) |
945 | return false; |
946 | |
947 | // If function modifies the return address register, then it is not a XPLeaf |
948 | // routine. |
949 | if (MRI.isPhysRegModified(PhysReg: Regs->getReturnFunctionAddressRegister())) |
950 | return false; |
951 | |
952 | // If the backchain pointer should be stored, then it is not a XPLeaf routine. |
953 | if (MF.getSubtarget<SystemZSubtarget>().hasBackChain()) |
954 | return false; |
955 | |
956 | // If function acquires its own stack frame, then it is not a XPLeaf routine. |
957 | // At the time this function is called, only slots for local variables are |
958 | // allocated, so this is a very rough estimate. |
959 | if (MFFrame.estimateStackSize(MF) > 0) |
960 | return false; |
961 | |
962 | return true; |
963 | } |
964 | |
965 | bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( |
966 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
967 | std::vector<CalleeSavedInfo> &CSI) const { |
968 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
969 | SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
970 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
971 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
972 | auto &GRRegClass = SystemZ::GR64BitRegClass; |
973 | |
974 | // At this point, the result of isXPLeafCandidate() is not accurate because |
975 | // the size of the save area has not yet been determined. If |
976 | // isXPLeafCandidate() indicates a potential leaf function, and there are no |
977 | // callee-save registers, then it is indeed a leaf function, and we can early |
978 | // exit. |
979 | // TODO: It is possible for leaf functions to use callee-saved registers. |
980 | // It can use the 0-2k range between R4 and the caller's stack frame without |
981 | // acquiring its own stack frame. |
982 | bool IsLeaf = CSI.empty() && isXPLeafCandidate(MF); |
983 | if (IsLeaf) |
984 | return true; |
985 | |
986 | // For non-leaf functions: |
987 | // - the address of callee (entry point) register R6 must be saved |
988 | CSI.push_back(x: CalleeSavedInfo(Regs.getAddressOfCalleeRegister())); |
989 | CSI.back().setRestored(false); |
990 | |
991 | // The return address register R7 must be saved and restored. |
992 | CSI.push_back(x: CalleeSavedInfo(Regs.getReturnFunctionAddressRegister())); |
993 | |
994 | // If the function needs a frame pointer, or if the backchain pointer should |
995 | // be stored, then save the stack pointer register R4. |
996 | if (hasFP(MF) || Subtarget.hasBackChain()) |
997 | CSI.push_back(x: CalleeSavedInfo(Regs.getStackPointerRegister())); |
998 | |
999 | // If this function has an associated personality function then the |
1000 | // environment register R5 must be saved in the DSA. |
1001 | if (!MF.getLandingPads().empty()) |
1002 | CSI.push_back(x: CalleeSavedInfo(Regs.getADARegister())); |
1003 | |
1004 | // Scan the call-saved GPRs and find the bounds of the register spill area. |
1005 | Register LowRestoreGPR = 0; |
1006 | int LowRestoreOffset = INT32_MAX; |
1007 | Register LowSpillGPR = 0; |
1008 | int LowSpillOffset = INT32_MAX; |
1009 | Register HighGPR = 0; |
1010 | int HighOffset = -1; |
1011 | |
1012 | // Query index of the saved frame pointer. |
1013 | int FPSI = MFI->getFramePointerSaveIndex(); |
1014 | |
1015 | for (auto &CS : CSI) { |
1016 | Register Reg = CS.getReg(); |
1017 | int Offset = RegSpillOffsets[Reg]; |
1018 | if (Offset >= 0) { |
1019 | if (GRRegClass.contains(Reg)) { |
1020 | if (LowSpillOffset > Offset) { |
1021 | LowSpillOffset = Offset; |
1022 | LowSpillGPR = Reg; |
1023 | } |
1024 | if (CS.isRestored() && LowRestoreOffset > Offset) { |
1025 | LowRestoreOffset = Offset; |
1026 | LowRestoreGPR = Reg; |
1027 | } |
1028 | |
1029 | if (Offset > HighOffset) { |
1030 | HighOffset = Offset; |
1031 | HighGPR = Reg; |
1032 | } |
1033 | // Non-volatile GPRs are saved in the dedicated register save area at |
1034 | // the bottom of the stack and are not truly part of the "normal" stack |
1035 | // frame. Mark the frame index as NoAlloc to indicate it as such. |
1036 | unsigned RegSize = getPointerSize(); |
1037 | int FrameIdx = |
1038 | (FPSI && Offset == 0) |
1039 | ? FPSI |
1040 | : MFFrame.CreateFixedSpillStackObject(Size: RegSize, SPOffset: Offset); |
1041 | CS.setFrameIdx(FrameIdx); |
1042 | MFFrame.setStackID(ObjectIdx: FrameIdx, ID: TargetStackID::NoAlloc); |
1043 | } |
1044 | } else { |
1045 | Register Reg = CS.getReg(); |
1046 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
1047 | Align Alignment = TRI->getSpillAlign(RC: *RC); |
1048 | unsigned Size = TRI->getSpillSize(RC: *RC); |
1049 | Alignment = std::min(a: Alignment, b: getStackAlign()); |
1050 | int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, isSpillSlot: true); |
1051 | CS.setFrameIdx(FrameIdx); |
1052 | } |
1053 | } |
1054 | |
1055 | // Save the range of call-saved registers, for use by the |
1056 | // prologue/epilogue inserters. |
1057 | if (LowRestoreGPR) |
1058 | MFI->setRestoreGPRRegs(Low: LowRestoreGPR, High: HighGPR, Offs: LowRestoreOffset); |
1059 | |
1060 | // Save the range of call-saved registers, for use by the epilogue inserter. |
1061 | assert(LowSpillGPR && "Expected registers to spill" ); |
1062 | MFI->setSpillGPRRegs(Low: LowSpillGPR, High: HighGPR, Offs: LowSpillOffset); |
1063 | |
1064 | return true; |
1065 | } |
1066 | |
1067 | void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF, |
1068 | BitVector &SavedRegs, |
1069 | RegScavenger *RS) const { |
1070 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
1071 | |
1072 | bool HasFP = hasFP(MF); |
1073 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
1074 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
1075 | |
1076 | // If the function requires a frame pointer, record that the hard |
1077 | // frame pointer will be clobbered. |
1078 | if (HasFP) |
1079 | SavedRegs.set(Regs.getFramePointerRegister()); |
1080 | } |
1081 | |
1082 | bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( |
1083 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
1084 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1085 | if (CSI.empty()) |
1086 | return true; |
1087 | |
1088 | MachineFunction &MF = *MBB.getParent(); |
1089 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
1090 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
1091 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
1092 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
1093 | SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs(); |
1094 | DebugLoc DL; |
1095 | |
1096 | // Save GPRs |
1097 | if (SpillGPRs.LowGPR) { |
1098 | assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR && |
1099 | "Should be saving multiple registers" ); |
1100 | |
1101 | // Build an STM/STMG instruction. |
1102 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STMG)); |
1103 | |
1104 | // Add the explicit register operands. |
1105 | addSavedGPR(MBB, MIB, GPR64: SpillGPRs.LowGPR, IsImplicit: false); |
1106 | addSavedGPR(MBB, MIB, GPR64: SpillGPRs.HighGPR, IsImplicit: false); |
1107 | |
1108 | // Add the address r4 |
1109 | MIB.addReg(RegNo: Regs.getStackPointerRegister()); |
1110 | |
1111 | // Add the partial offset |
1112 | // We cannot add the actual offset as, at the stack is not finalized |
1113 | MIB.addImm(Val: SpillGPRs.GPROffset); |
1114 | |
1115 | // Make sure all call-saved GPRs are included as operands and are |
1116 | // marked as live on entry. |
1117 | auto &GRRegClass = SystemZ::GR64BitRegClass; |
1118 | for (const CalleeSavedInfo &I : CSI) { |
1119 | Register Reg = I.getReg(); |
1120 | if (GRRegClass.contains(Reg)) |
1121 | addSavedGPR(MBB, MIB, GPR64: Reg, IsImplicit: true); |
1122 | } |
1123 | } |
1124 | |
1125 | // Spill FPRs to the stack in the normal TargetInstrInfo way |
1126 | for (const CalleeSavedInfo &I : CSI) { |
1127 | Register Reg = I.getReg(); |
1128 | if (SystemZ::FP64BitRegClass.contains(Reg)) { |
1129 | MBB.addLiveIn(PhysReg: Reg); |
1130 | TII->storeRegToStackSlot(MBB, MI: MBBI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), |
1131 | RC: &SystemZ::FP64BitRegClass, TRI, VReg: Register()); |
1132 | } |
1133 | if (SystemZ::VR128BitRegClass.contains(Reg)) { |
1134 | MBB.addLiveIn(PhysReg: Reg); |
1135 | TII->storeRegToStackSlot(MBB, MI: MBBI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), |
1136 | RC: &SystemZ::VR128BitRegClass, TRI, VReg: Register()); |
1137 | } |
1138 | } |
1139 | |
1140 | return true; |
1141 | } |
1142 | |
1143 | bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters( |
1144 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
1145 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1146 | |
1147 | if (CSI.empty()) |
1148 | return false; |
1149 | |
1150 | MachineFunction &MF = *MBB.getParent(); |
1151 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
1152 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
1153 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
1154 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
1155 | |
1156 | DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); |
1157 | |
1158 | // Restore FPRs in the normal TargetInstrInfo way. |
1159 | for (const CalleeSavedInfo &I : CSI) { |
1160 | Register Reg = I.getReg(); |
1161 | if (SystemZ::FP64BitRegClass.contains(Reg)) |
1162 | TII->loadRegFromStackSlot(MBB, MI: MBBI, DestReg: Reg, FrameIndex: I.getFrameIdx(), |
1163 | RC: &SystemZ::FP64BitRegClass, TRI, VReg: Register()); |
1164 | if (SystemZ::VR128BitRegClass.contains(Reg)) |
1165 | TII->loadRegFromStackSlot(MBB, MI: MBBI, DestReg: Reg, FrameIndex: I.getFrameIdx(), |
1166 | RC: &SystemZ::VR128BitRegClass, TRI, VReg: Register()); |
1167 | } |
1168 | |
1169 | // Restore call-saved GPRs (but not call-clobbered varargs, which at |
1170 | // this point might hold return values). |
1171 | SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs(); |
1172 | if (RestoreGPRs.LowGPR) { |
1173 | assert(isInt<20>(Regs.getStackPointerBias() + RestoreGPRs.GPROffset)); |
1174 | if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR) |
1175 | // Build an LG/L instruction. |
1176 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: RestoreGPRs.LowGPR) |
1177 | .addReg(RegNo: Regs.getStackPointerRegister()) |
1178 | .addImm(Val: Regs.getStackPointerBias() + RestoreGPRs.GPROffset) |
1179 | .addReg(RegNo: 0); |
1180 | else { |
1181 | // Build an LMG/LM instruction. |
1182 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LMG)); |
1183 | |
1184 | // Add the explicit register operands. |
1185 | MIB.addReg(RegNo: RestoreGPRs.LowGPR, flags: RegState::Define); |
1186 | MIB.addReg(RegNo: RestoreGPRs.HighGPR, flags: RegState::Define); |
1187 | |
1188 | // Add the address. |
1189 | MIB.addReg(RegNo: Regs.getStackPointerRegister()); |
1190 | MIB.addImm(Val: Regs.getStackPointerBias() + RestoreGPRs.GPROffset); |
1191 | |
1192 | // Do a second scan adding regs as being defined by instruction |
1193 | for (const CalleeSavedInfo &I : CSI) { |
1194 | Register Reg = I.getReg(); |
1195 | if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR) |
1196 | MIB.addReg(RegNo: Reg, flags: RegState::ImplicitDefine); |
1197 | } |
1198 | } |
1199 | } |
1200 | |
1201 | return true; |
1202 | } |
1203 | |
1204 | void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, |
1205 | MachineBasicBlock &MBB) const { |
1206 | assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported" ); |
1207 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
1208 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
1209 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
1210 | auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); |
1211 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
1212 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
1213 | MachineInstr *StoreInstr = nullptr; |
1214 | |
1215 | determineFrameLayout(MF); |
1216 | |
1217 | bool HasFP = hasFP(MF); |
1218 | // Debug location must be unknown since the first debug location is used |
1219 | // to determine the end of the prologue. |
1220 | DebugLoc DL; |
1221 | uint64_t Offset = 0; |
1222 | |
1223 | const uint64_t StackSize = MFFrame.getStackSize(); |
1224 | |
1225 | if (ZFI->getSpillGPRRegs().LowGPR) { |
1226 | // Skip over the GPR saves. |
1227 | if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) { |
1228 | const int Operand = 3; |
1229 | // Now we can set the offset for the operation, since now the Stack |
1230 | // has been finalized. |
1231 | Offset = Regs.getStackPointerBias() + MBBI->getOperand(i: Operand).getImm(); |
1232 | // Maximum displacement for STMG instruction. |
1233 | if (isInt<20>(x: Offset - StackSize)) |
1234 | Offset -= StackSize; |
1235 | else |
1236 | StoreInstr = &*MBBI; |
1237 | MBBI->getOperand(i: Operand).setImm(Offset); |
1238 | ++MBBI; |
1239 | } else |
1240 | llvm_unreachable("Couldn't skip over GPR saves" ); |
1241 | } |
1242 | |
1243 | if (StackSize) { |
1244 | MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI; |
1245 | // Allocate StackSize bytes. |
1246 | int64_t Delta = -int64_t(StackSize); |
1247 | |
1248 | // In case the STM(G) instruction also stores SP (R4), but the displacement |
1249 | // is too large, the SP register is manipulated first before storing, |
1250 | // resulting in the wrong value stored and retrieved later. In this case, we |
1251 | // need to temporarily save the value of SP, and store it later to memory. |
1252 | if (StoreInstr && HasFP) { |
1253 | // Insert LR r0,r4 before STMG instruction. |
1254 | BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR)) |
1255 | .addReg(RegNo: SystemZ::R0D, flags: RegState::Define) |
1256 | .addReg(RegNo: SystemZ::R4D); |
1257 | // Insert ST r0,xxx(,r4) after STMG instruction. |
1258 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::STG)) |
1259 | .addReg(RegNo: SystemZ::R0D, flags: RegState::Kill) |
1260 | .addReg(RegNo: SystemZ::R4D) |
1261 | .addImm(Val: Offset) |
1262 | .addReg(RegNo: 0); |
1263 | } |
1264 | |
1265 | emitIncrement(MBB, MBBI&: InsertPt, DL, Reg: Regs.getStackPointerRegister(), NumBytes: Delta, |
1266 | TII: ZII); |
1267 | |
1268 | // If the requested stack size is larger than the guard page, then we need |
1269 | // to check if we need to call the stack extender. This requires adding a |
1270 | // conditional branch, but splitting the prologue block is not possible at |
1271 | // this point since it would invalidate the SaveBlocks / RestoreBlocks sets |
1272 | // of PEI in the single block function case. Build a pseudo to be handled |
1273 | // later by inlineStackProbe(). |
1274 | const uint64_t GuardPageSize = 1024 * 1024; |
1275 | if (StackSize > GuardPageSize) { |
1276 | assert(StoreInstr && "Wrong insertion point" ); |
1277 | BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::XPLINK_STACKALLOC)); |
1278 | } |
1279 | } |
1280 | |
1281 | if (HasFP) { |
1282 | // Copy the base of the frame to Frame Pointer Register. |
1283 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR), |
1284 | DestReg: Regs.getFramePointerRegister()) |
1285 | .addReg(RegNo: Regs.getStackPointerRegister()); |
1286 | |
1287 | // Mark the FramePtr as live at the beginning of every block except |
1288 | // the entry block. (We'll have marked R8 as live on entry when |
1289 | // saving the GPRs.) |
1290 | for (MachineBasicBlock &B : llvm::drop_begin(RangeOrContainer&: MF)) |
1291 | B.addLiveIn(PhysReg: Regs.getFramePointerRegister()); |
1292 | } |
1293 | |
1294 | // Save GPRs used for varargs, if any. |
1295 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
1296 | bool IsVarArg = MF.getFunction().isVarArg(); |
1297 | |
1298 | if (IsVarArg) { |
1299 | // FixedRegs is the number of used registers, accounting for shadow |
1300 | // registers. |
1301 | unsigned FixedRegs = ZFI->getVarArgsFirstGPR() + ZFI->getVarArgsFirstFPR(); |
1302 | auto &GPRs = SystemZ::XPLINK64ArgGPRs; |
1303 | for (unsigned I = FixedRegs; I < SystemZ::XPLINK64NumArgGPRs; I++) { |
1304 | uint64_t StartOffset = MFFrame.getOffsetAdjustment() + |
1305 | MFFrame.getStackSize() + Regs.getCallFrameSize() + |
1306 | getOffsetOfLocalArea() + I * getPointerSize(); |
1307 | unsigned Reg = GPRs[I]; |
1308 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG)) |
1309 | .addReg(RegNo: Reg) |
1310 | .addReg(RegNo: Regs.getStackPointerRegister()) |
1311 | .addImm(Val: StartOffset) |
1312 | .addReg(RegNo: 0); |
1313 | if (!MBB.isLiveIn(Reg)) |
1314 | MBB.addLiveIn(PhysReg: Reg); |
1315 | } |
1316 | } |
1317 | } |
1318 | |
1319 | void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF, |
1320 | MachineBasicBlock &MBB) const { |
1321 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
1322 | MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); |
1323 | SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
1324 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
1325 | auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); |
1326 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
1327 | |
1328 | // Skip the return instruction. |
1329 | assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks" ); |
1330 | |
1331 | uint64_t StackSize = MFFrame.getStackSize(); |
1332 | if (StackSize) { |
1333 | unsigned SPReg = Regs.getStackPointerRegister(); |
1334 | if (ZFI->getRestoreGPRRegs().LowGPR != SPReg) { |
1335 | DebugLoc DL = MBBI->getDebugLoc(); |
1336 | emitIncrement(MBB, MBBI, DL, Reg: SPReg, NumBytes: StackSize, TII: ZII); |
1337 | } |
1338 | } |
1339 | } |
1340 | |
1341 | // Emit a compare of the stack pointer against the stack floor, and a call to |
1342 | // the LE stack extender if needed. |
1343 | void SystemZXPLINKFrameLowering::inlineStackProbe( |
1344 | MachineFunction &MF, MachineBasicBlock &PrologMBB) const { |
1345 | auto *ZII = |
1346 | static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
1347 | |
1348 | MachineInstr *StackAllocMI = nullptr; |
1349 | for (MachineInstr &MI : PrologMBB) |
1350 | if (MI.getOpcode() == SystemZ::XPLINK_STACKALLOC) { |
1351 | StackAllocMI = &MI; |
1352 | break; |
1353 | } |
1354 | if (StackAllocMI == nullptr) |
1355 | return; |
1356 | |
1357 | bool NeedSaveSP = hasFP(MF); |
1358 | bool NeedSaveArg = PrologMBB.isLiveIn(Reg: SystemZ::R3D); |
1359 | const int64_t SaveSlotR3 = 2192; |
1360 | |
1361 | MachineBasicBlock &MBB = PrologMBB; |
1362 | const DebugLoc DL = StackAllocMI->getDebugLoc(); |
1363 | |
1364 | // The 2nd half of block MBB after split. |
1365 | MachineBasicBlock *NextMBB; |
1366 | |
1367 | // Add new basic block for the call to the stack overflow function. |
1368 | MachineBasicBlock *StackExtMBB = |
1369 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1370 | MF.push_back(MBB: StackExtMBB); |
1371 | |
1372 | // LG r3,72(,r3) |
1373 | BuildMI(BB: StackExtMBB, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LG), DestReg: SystemZ::R3D) |
1374 | .addReg(RegNo: SystemZ::R3D) |
1375 | .addImm(Val: 72) |
1376 | .addReg(RegNo: 0); |
1377 | // BASR r3,r3 |
1378 | BuildMI(BB: StackExtMBB, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::CallBASR_STACKEXT)) |
1379 | .addReg(RegNo: SystemZ::R3D); |
1380 | if (NeedSaveArg) { |
1381 | if (!NeedSaveSP) { |
1382 | // LGR r0,r3 |
1383 | BuildMI(BB&: MBB, I: StackAllocMI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR)) |
1384 | .addReg(RegNo: SystemZ::R0D, flags: RegState::Define) |
1385 | .addReg(RegNo: SystemZ::R3D); |
1386 | } else { |
1387 | // In this case, the incoming value of r4 is saved in r0 so the |
1388 | // latter register is unavailable. Store r3 in its corresponding |
1389 | // slot in the parameter list instead. Do this at the start of |
1390 | // the prolog before r4 is manipulated by anything else. |
1391 | // STG r3, 2192(r4) |
1392 | BuildMI(BB&: MBB, I: MBB.begin(), MIMD: DL, MCID: ZII->get(Opcode: SystemZ::STG)) |
1393 | .addReg(RegNo: SystemZ::R3D) |
1394 | .addReg(RegNo: SystemZ::R4D) |
1395 | .addImm(Val: SaveSlotR3) |
1396 | .addReg(RegNo: 0); |
1397 | } |
1398 | } |
1399 | // LLGT r3,1208 |
1400 | BuildMI(BB&: MBB, I: StackAllocMI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LLGT), DestReg: SystemZ::R3D) |
1401 | .addReg(RegNo: 0) |
1402 | .addImm(Val: 1208) |
1403 | .addReg(RegNo: 0); |
1404 | // CG r4,64(,r3) |
1405 | BuildMI(BB&: MBB, I: StackAllocMI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::CG)) |
1406 | .addReg(RegNo: SystemZ::R4D) |
1407 | .addReg(RegNo: SystemZ::R3D) |
1408 | .addImm(Val: 64) |
1409 | .addReg(RegNo: 0); |
1410 | // JLL b'0100',F'37' |
1411 | BuildMI(BB&: MBB, I: StackAllocMI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::BRC)) |
1412 | .addImm(Val: SystemZ::CCMASK_ICMP) |
1413 | .addImm(Val: SystemZ::CCMASK_CMP_LT) |
1414 | .addMBB(MBB: StackExtMBB); |
1415 | |
1416 | NextMBB = SystemZ::splitBlockBefore(MI: StackAllocMI, MBB: &MBB); |
1417 | MBB.addSuccessor(Succ: NextMBB); |
1418 | MBB.addSuccessor(Succ: StackExtMBB); |
1419 | if (NeedSaveArg) { |
1420 | if (!NeedSaveSP) { |
1421 | // LGR r3, r0 |
1422 | BuildMI(BB&: *NextMBB, I: StackAllocMI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR)) |
1423 | .addReg(RegNo: SystemZ::R3D, flags: RegState::Define) |
1424 | .addReg(RegNo: SystemZ::R0D, flags: RegState::Kill); |
1425 | } else { |
1426 | // In this case, the incoming value of r4 is saved in r0 so the |
1427 | // latter register is unavailable. We stored r3 in its corresponding |
1428 | // slot in the parameter list instead and we now restore it from there. |
1429 | // LGR r3, r0 |
1430 | BuildMI(BB&: *NextMBB, I: StackAllocMI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LGR)) |
1431 | .addReg(RegNo: SystemZ::R3D, flags: RegState::Define) |
1432 | .addReg(RegNo: SystemZ::R0D); |
1433 | // LG r3, 2192(r3) |
1434 | BuildMI(BB&: *NextMBB, I: StackAllocMI, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::LG)) |
1435 | .addReg(RegNo: SystemZ::R3D, flags: RegState::Define) |
1436 | .addReg(RegNo: SystemZ::R3D) |
1437 | .addImm(Val: SaveSlotR3) |
1438 | .addReg(RegNo: 0); |
1439 | } |
1440 | } |
1441 | |
1442 | // Add jump back from stack extension BB. |
1443 | BuildMI(BB: StackExtMBB, MIMD: DL, MCID: ZII->get(Opcode: SystemZ::J)).addMBB(MBB: NextMBB); |
1444 | StackExtMBB->addSuccessor(Succ: NextMBB); |
1445 | |
1446 | StackAllocMI->eraseFromParent(); |
1447 | |
1448 | // Compute the live-in lists for the new blocks. |
1449 | fullyRecomputeLiveIns(MBBs: {StackExtMBB, NextMBB}); |
1450 | } |
1451 | |
1452 | bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const { |
1453 | return (MF.getFrameInfo().hasVarSizedObjects()); |
1454 | } |
1455 | |
1456 | void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized( |
1457 | MachineFunction &MF, RegScavenger *RS) const { |
1458 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
1459 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
1460 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
1461 | |
1462 | // Setup stack frame offset |
1463 | MFFrame.setOffsetAdjustment(Regs.getStackPointerBias()); |
1464 | |
1465 | // Nothing to do for leaf functions. |
1466 | uint64_t StackSize = MFFrame.estimateStackSize(MF); |
1467 | if (StackSize == 0 && MFFrame.getCalleeSavedInfo().empty()) |
1468 | return; |
1469 | |
1470 | // Although the XPLINK specifications for AMODE64 state that minimum size |
1471 | // of the param area is minimum 32 bytes and no rounding is otherwise |
1472 | // specified, we round this area in 64 bytes increments to be compatible |
1473 | // with existing compilers. |
1474 | MFFrame.setMaxCallFrameSize( |
1475 | std::max(a: 64U, b: (unsigned)alignTo(Value: MFFrame.getMaxCallFrameSize(), Align: 64))); |
1476 | |
1477 | // Add frame values with positive object offsets. Since the displacement from |
1478 | // the SP/FP is calculated by ObjectOffset + StackSize + Bias, object offsets |
1479 | // with positive values are in the caller's stack frame. We need to include |
1480 | // that since it is accessed by displacement to SP/FP. |
1481 | int64_t LargestArgOffset = 0; |
1482 | for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) { |
1483 | if (MFFrame.getObjectOffset(ObjectIdx: I) >= 0) { |
1484 | int64_t ObjOffset = MFFrame.getObjectOffset(ObjectIdx: I) + MFFrame.getObjectSize(ObjectIdx: I); |
1485 | LargestArgOffset = std::max(a: ObjOffset, b: LargestArgOffset); |
1486 | } |
1487 | } |
1488 | |
1489 | uint64_t MaxReach = (StackSize + Regs.getCallFrameSize() + |
1490 | Regs.getStackPointerBias() + LargestArgOffset); |
1491 | |
1492 | if (!isUInt<12>(x: MaxReach)) { |
1493 | // We may need register scavenging slots if some parts of the frame |
1494 | // are outside the reach of an unsigned 12-bit displacement. |
1495 | RS->addScavengingFrameIndex(FI: MFFrame.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false)); |
1496 | RS->addScavengingFrameIndex(FI: MFFrame.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false)); |
1497 | } |
1498 | } |
1499 | |
1500 | // Determines the size of the frame, and creates the deferred spill objects. |
1501 | void SystemZXPLINKFrameLowering::determineFrameLayout( |
1502 | MachineFunction &MF) const { |
1503 | MachineFrameInfo &MFFrame = MF.getFrameInfo(); |
1504 | const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); |
1505 | auto *Regs = |
1506 | static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); |
1507 | |
1508 | uint64_t StackSize = MFFrame.getStackSize(); |
1509 | if (StackSize == 0) |
1510 | return; |
1511 | |
1512 | // Add the size of the register save area and the reserved area to the size. |
1513 | StackSize += Regs->getCallFrameSize(); |
1514 | MFFrame.setStackSize(StackSize); |
1515 | |
1516 | // We now know the stack size. Update the stack objects for the register save |
1517 | // area now. This has no impact on the stack frame layout, as this is already |
1518 | // computed. However, it makes sure that all callee saved registers have a |
1519 | // valid offset assigned. |
1520 | for (int FrameIdx = MFFrame.getObjectIndexBegin(); FrameIdx != 0; |
1521 | ++FrameIdx) { |
1522 | if (MFFrame.getStackID(ObjectIdx: FrameIdx) == TargetStackID::NoAlloc) { |
1523 | int64_t SPOffset = MFFrame.getObjectOffset(ObjectIdx: FrameIdx); |
1524 | SPOffset -= StackSize; |
1525 | MFFrame.setObjectOffset(ObjectIdx: FrameIdx, SPOffset); |
1526 | } |
1527 | } |
1528 | } |
1529 | |