1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64.h"
10#include "AArch64InstrInfo.h"
11#include "AArch64MachineFunctionInfo.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/Statistic.h"
14#include "llvm/CodeGen/MachineFrameInfo.h"
15#include "llvm/CodeGen/MachineFunction.h"
16#include "llvm/CodeGen/MachineFunctionPass.h"
17#include "llvm/CodeGen/MachineInstrBuilder.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"
19#include "llvm/CodeGen/MachineTraceMetrics.h"
20#include "llvm/CodeGen/Passes.h"
21#include "llvm/CodeGen/TargetInstrInfo.h"
22#include "llvm/CodeGen/TargetRegisterInfo.h"
23#include "llvm/CodeGen/TargetSubtargetInfo.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
31
32enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
33
34static cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
35 "stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(Val: UncheckedSafe),
36 cl::desc(
37 "Unconditionally apply unchecked-ld-st optimization (even for large "
38 "stack frames, or in the presence of variable sized allocas)."),
39 cl::values(
40 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
41 clEnumValN(
42 UncheckedSafe, "safe",
43 "apply unchecked-ld-st when the target is definitely within range"),
44 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
45
46static cl::opt<bool>
47 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(Val: true),
48 cl::desc("Apply first slot optimization for stack tagging "
49 "(eliminate ADDG Rt, Rn, 0, 0)."));
50
51namespace {
52
53class AArch64StackTaggingPreRAImpl {
54 MachineFunction *MF;
55 AArch64FunctionInfo *AFI;
56 MachineFrameInfo *MFI;
57 MachineRegisterInfo *MRI;
58 const AArch64RegisterInfo *TRI;
59 const AArch64InstrInfo *TII;
60
61 SmallVector<MachineInstr*, 16> ReTags;
62
63public:
64 bool run(MachineFunction &Func);
65
66private:
67 bool mayUseUncheckedLoadStore();
68 void uncheckUsesOf(unsigned TaggedReg, int FI);
69 void uncheckLoadsAndStores();
70 std::optional<int> findFirstSlotCandidate();
71};
72
73class AArch64StackTaggingPreRALegacy : public MachineFunctionPass {
74public:
75 static char ID;
76 AArch64StackTaggingPreRALegacy() : MachineFunctionPass(ID) {}
77
78 bool runOnMachineFunction(MachineFunction &MF) override {
79 if (skipFunction(F: MF.getFunction()))
80 return false;
81 return AArch64StackTaggingPreRAImpl().run(Func&: MF);
82 }
83
84 StringRef getPassName() const override {
85 return "AArch64 Stack Tagging PreRA";
86 }
87
88 void getAnalysisUsage(AnalysisUsage &AU) const override {
89 AU.setPreservesCFG();
90 MachineFunctionPass::getAnalysisUsage(AU);
91 }
92};
93} // end anonymous namespace
94
95char AArch64StackTaggingPreRALegacy::ID = 0;
96
97INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRALegacy,
98 "aarch64-stack-tagging-pre-ra",
99 "AArch64 Stack Tagging PreRA Pass", false, false)
100INITIALIZE_PASS_END(AArch64StackTaggingPreRALegacy,
101 "aarch64-stack-tagging-pre-ra",
102 "AArch64 Stack Tagging PreRA Pass", false, false)
103
104FunctionPass *llvm::createAArch64StackTaggingPreRALegacyPass() {
105 return new AArch64StackTaggingPreRALegacy();
106}
107
108PreservedAnalyses
109AArch64StackTaggingPreRAPass::run(MachineFunction &MF,
110 MachineFunctionAnalysisManager &MFAM) {
111 if (AArch64StackTaggingPreRAImpl().run(Func&: MF)) {
112 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
113 PA.preserveSet<CFGAnalyses>();
114 return PA;
115 }
116 return PreservedAnalyses::all();
117}
118
119static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
120 switch (Opcode) {
121 case AArch64::LDRBBui:
122 case AArch64::LDRHHui:
123 case AArch64::LDRWui:
124 case AArch64::LDRXui:
125
126 case AArch64::LDRBui:
127 case AArch64::LDRHui:
128 case AArch64::LDRSui:
129 case AArch64::LDRDui:
130 case AArch64::LDRQui:
131
132 case AArch64::LDRSHWui:
133 case AArch64::LDRSHXui:
134
135 case AArch64::LDRSBWui:
136 case AArch64::LDRSBXui:
137
138 case AArch64::LDRSWui:
139
140 case AArch64::STRBBui:
141 case AArch64::STRHHui:
142 case AArch64::STRWui:
143 case AArch64::STRXui:
144
145 case AArch64::STRBui:
146 case AArch64::STRHui:
147 case AArch64::STRSui:
148 case AArch64::STRDui:
149 case AArch64::STRQui:
150
151 case AArch64::LDPWi:
152 case AArch64::LDPXi:
153 case AArch64::LDPSi:
154 case AArch64::LDPDi:
155 case AArch64::LDPQi:
156
157 case AArch64::LDPSWi:
158
159 case AArch64::STPWi:
160 case AArch64::STPXi:
161 case AArch64::STPSi:
162 case AArch64::STPDi:
163 case AArch64::STPQi:
164 return true;
165 default:
166 return false;
167 }
168}
169
170bool AArch64StackTaggingPreRAImpl::mayUseUncheckedLoadStore() {
171 if (ClUncheckedLdSt == UncheckedNever)
172 return false;
173 else if (ClUncheckedLdSt == UncheckedAlways)
174 return true;
175
176 // This estimate can be improved if we had harder guarantees about stack frame
177 // layout. With LocalStackAllocation we can estimate SP offset to any
178 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
179 // objects ahead of non-tagged ones, but that's not always desirable.
180 //
181 // Underestimating SP offset here may require the use of LDG to materialize
182 // the tagged address of the stack slot, along with a scratch register
183 // allocation (post-regalloc!).
184 //
185 // For now we do the safe thing here and require that the entire stack frame
186 // is within range of the shortest of the unchecked instructions.
187 unsigned FrameSize = 0;
188 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
189 FrameSize += MFI->getObjectSize(ObjectIdx: i);
190 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
191 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
192}
193
194void AArch64StackTaggingPreRAImpl::uncheckUsesOf(unsigned TaggedReg, int FI) {
195 for (MachineInstr &UseI :
196 llvm::make_early_inc_range(Range: MRI->use_instructions(Reg: TaggedReg))) {
197 if (isUncheckedLoadOrStoreOpcode(Opcode: UseI.getOpcode())) {
198 // FI operand is always the one before the immediate offset.
199 unsigned OpIdx = TII->getLoadStoreImmIdx(Opc: UseI.getOpcode()) - 1;
200 if (UseI.getOperand(i: OpIdx).isReg() &&
201 UseI.getOperand(i: OpIdx).getReg() == TaggedReg) {
202 UseI.getOperand(i: OpIdx).ChangeToFrameIndex(Idx: FI);
203 UseI.getOperand(i: OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
204 }
205 } else if (UseI.isCopy() && UseI.getOperand(i: 0).getReg().isVirtual()) {
206 uncheckUsesOf(TaggedReg: UseI.getOperand(i: 0).getReg(), FI);
207 }
208 }
209}
210
211void AArch64StackTaggingPreRAImpl::uncheckLoadsAndStores() {
212 for (auto *I : ReTags) {
213 Register TaggedReg = I->getOperand(i: 0).getReg();
214 int FI = I->getOperand(i: 1).getIndex();
215 uncheckUsesOf(TaggedReg, FI);
216 }
217}
218
219namespace {
220struct SlotWithTag {
221 int FI;
222 int Tag;
223 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
224 explicit SlotWithTag(const MachineInstr &MI)
225 : FI(MI.getOperand(i: 1).getIndex()), Tag(MI.getOperand(i: 4).getImm()) {}
226 bool operator==(const SlotWithTag &Other) const {
227 return FI == Other.FI && Tag == Other.Tag;
228 }
229};
230} // namespace
231
232namespace llvm {
233template <> struct DenseMapInfo<SlotWithTag> {
234 static unsigned getHashValue(const SlotWithTag &V) {
235 return hash_combine(args: DenseMapInfo<int>::getHashValue(Val: V.FI),
236 args: DenseMapInfo<int>::getHashValue(Val: V.Tag));
237 }
238 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
239 return A == B;
240 }
241};
242} // namespace llvm
243
244static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
245 return MFI->getUseLocalStackAllocationBlock() &&
246 MFI->isObjectPreAllocated(ObjectIdx: FI);
247}
248
249// Pin one of the tagged slots to offset 0 from the tagged base pointer.
250// This would make its address available in a virtual register (IRG's def), as
251// opposed to requiring an ADDG instruction to materialize. This effectively
252// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
253// live almost everywhere anyway), and therefore needs to happen before
254// regalloc.
255std::optional<int> AArch64StackTaggingPreRAImpl::findFirstSlotCandidate() {
256 // Find the best (FI, Tag) pair to pin to offset 0.
257 // Looking at the possible uses of a tagged address, the advantage of pinning
258 // is:
259 // - COPY to physical register.
260 // Does not matter, this would trade a MOV instruction for an ADDG.
261 // - ST*G matter, but those mostly appear near the function prologue where all
262 // the tagged addresses need to be materialized anyway; also, counting ST*G
263 // uses would overweight large allocas that require more than one ST*G
264 // instruction.
265 // - Load/Store instructions in the address operand do not require a tagged
266 // pointer, so they also do not benefit. These operands have already been
267 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
268 // instructions count.
269 // - Any other instruction may benefit from being pinned to offset 0.
270 LLVM_DEBUG(
271 dbgs() << "AArch64StackTaggingPreRAImpl::findFirstSlotCandidate\n");
272 if (!ClFirstSlot)
273 return std::nullopt;
274
275 DenseMap<SlotWithTag, int> RetagScore;
276 SlotWithTag MaxScoreST{-1, -1};
277 int MaxScore = -1;
278 for (auto *I : ReTags) {
279 SlotWithTag ST{*I};
280 if (isSlotPreAllocated(MFI, FI: ST.FI))
281 continue;
282
283 Register RetagReg = I->getOperand(i: 0).getReg();
284 if (!RetagReg.isVirtual())
285 continue;
286
287 int Score = 0;
288 SmallVector<Register, 8> WorkList;
289 WorkList.push_back(Elt: RetagReg);
290
291 while (!WorkList.empty()) {
292 Register UseReg = WorkList.pop_back_val();
293 for (auto &UseI : MRI->use_instructions(Reg: UseReg)) {
294 unsigned Opcode = UseI.getOpcode();
295 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
296 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
297 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
298 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
299 Opcode == AArch64::STZGloop_wback)
300 continue;
301 if (UseI.isCopy()) {
302 Register DstReg = UseI.getOperand(i: 0).getReg();
303 if (DstReg.isVirtual())
304 WorkList.push_back(Elt: DstReg);
305 continue;
306 }
307 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of "
308 << printReg(UseReg) << " in " << UseI << "\n");
309 Score++;
310 }
311 }
312
313 int TotalScore = RetagScore[ST] += Score;
314 if (TotalScore > MaxScore ||
315 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
316 MaxScore = TotalScore;
317 MaxScoreST = ST;
318 }
319 }
320
321 if (MaxScoreST.FI < 0)
322 return std::nullopt;
323
324 // If FI's tag is already 0, we are done.
325 if (MaxScoreST.Tag == 0)
326 return MaxScoreST.FI;
327
328 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
329 SlotWithTag SwapST{-1, -1};
330 for (auto *I : ReTags) {
331 SlotWithTag ST{*I};
332 if (ST.Tag == 0) {
333 SwapST = ST;
334 break;
335 }
336 }
337
338 // Swap tags between the victim and the highest scoring pair.
339 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
340 // the highest score slot without changing anything else.
341 for (auto *&I : ReTags) {
342 SlotWithTag ST{*I};
343 MachineOperand &TagOp = I->getOperand(i: 4);
344 if (ST == MaxScoreST) {
345 TagOp.setImm(0);
346 } else if (ST == SwapST) {
347 TagOp.setImm(MaxScoreST.Tag);
348 }
349 }
350 return MaxScoreST.FI;
351}
352
353bool AArch64StackTaggingPreRAImpl::run(MachineFunction &Func) {
354 MF = &Func;
355 MRI = &MF->getRegInfo();
356 AFI = MF->getInfo<AArch64FunctionInfo>();
357 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
358 TRI = static_cast<const AArch64RegisterInfo *>(
359 MF->getSubtarget().getRegisterInfo());
360 MFI = &MF->getFrameInfo();
361 ReTags.clear();
362
363 assert(MRI->isSSA());
364
365 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
366 << "********** Function: " << MF->getName() << '\n');
367
368 SmallSetVector<int, 8> TaggedSlots;
369 for (auto &BB : *MF) {
370 for (auto &I : BB) {
371 if (I.getOpcode() == AArch64::TAGPstack) {
372 ReTags.push_back(Elt: &I);
373 int FI = I.getOperand(i: 1).getIndex();
374 TaggedSlots.insert(X: FI);
375 // There should be no offsets in TAGP yet.
376 assert(I.getOperand(2).getImm() == 0);
377 }
378 }
379 }
380
381 // Take over from SSP. It does nothing for tagged slots, and should not really
382 // have been enabled in the first place.
383 for (int FI : TaggedSlots)
384 MFI->setObjectSSPLayout(ObjectIdx: FI, Kind: MachineFrameInfo::SSPLK_None);
385
386 if (ReTags.empty())
387 return false;
388
389 if (mayUseUncheckedLoadStore())
390 uncheckLoadsAndStores();
391
392 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
393 // If the base tagged pointer is set up to the address of this slot,
394 // the ADDG instruction can be eliminated.
395 std::optional<int> BaseSlot = findFirstSlotCandidate();
396 if (BaseSlot)
397 AFI->setTaggedBasePointerIndex(*BaseSlot);
398
399 for (auto *I : ReTags) {
400 int FI = I->getOperand(i: 1).getIndex();
401 int Tag = I->getOperand(i: 4).getImm();
402 Register Base = I->getOperand(i: 3).getReg();
403 if (Tag == 0 && FI == BaseSlot) {
404 BuildMI(BB&: *I->getParent(), I, MIMD: {}, MCID: TII->get(Opcode: AArch64::COPY),
405 DestReg: I->getOperand(i: 0).getReg())
406 .addReg(RegNo: Base);
407 I->eraseFromParent();
408 }
409 }
410
411 return true;
412}
413