1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9
10#include "AArch64.h"
11#include "AArch64MachineFunctionInfo.h"
12#include "AArch64InstrInfo.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/Statistic.h"
15#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
16#include "llvm/CodeGen/MachineFrameInfo.h"
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/MachineFunctionPass.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineLoopInfo.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/MachineTraceMetrics.h"
23#include "llvm/CodeGen/Passes.h"
24#include "llvm/CodeGen/TargetInstrInfo.h"
25#include "llvm/CodeGen/TargetRegisterInfo.h"
26#include "llvm/CodeGen/TargetSubtargetInfo.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
34
35enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
36
37cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
38 "stack-tagging-unchecked-ld-st", cl::Hidden,
39 cl::init(Val: UncheckedSafe),
40 cl::desc(
41 "Unconditionally apply unchecked-ld-st optimization (even for large "
42 "stack frames, or in the presence of variable sized allocas)."),
43 cl::values(
44 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
45 clEnumValN(
46 UncheckedSafe, "safe",
47 "apply unchecked-ld-st when the target is definitely within range"),
48 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
49
50static cl::opt<bool>
51 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(Val: true),
52 cl::desc("Apply first slot optimization for stack tagging "
53 "(eliminate ADDG Rt, Rn, 0, 0)."));
54
55namespace {
56
57class AArch64StackTaggingPreRA : public MachineFunctionPass {
58 MachineFunction *MF;
59 AArch64FunctionInfo *AFI;
60 MachineFrameInfo *MFI;
61 MachineRegisterInfo *MRI;
62 const AArch64RegisterInfo *TRI;
63 const AArch64InstrInfo *TII;
64
65 SmallVector<MachineInstr*, 16> ReTags;
66
67public:
68 static char ID;
69 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
70 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
71 }
72
73 bool mayUseUncheckedLoadStore();
74 void uncheckUsesOf(unsigned TaggedReg, int FI);
75 void uncheckLoadsAndStores();
76 std::optional<int> findFirstSlotCandidate();
77
78 bool runOnMachineFunction(MachineFunction &Func) override;
79 StringRef getPassName() const override {
80 return "AArch64 Stack Tagging PreRA";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override {
84 AU.setPreservesCFG();
85 MachineFunctionPass::getAnalysisUsage(AU);
86 }
87};
88} // end anonymous namespace
89
90char AArch64StackTaggingPreRA::ID = 0;
91
92INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
93 "AArch64 Stack Tagging PreRA Pass", false, false)
94INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
95 "AArch64 Stack Tagging PreRA Pass", false, false)
96
97FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
98 return new AArch64StackTaggingPreRA();
99}
100
101static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
102 switch (Opcode) {
103 case AArch64::LDRBBui:
104 case AArch64::LDRHHui:
105 case AArch64::LDRWui:
106 case AArch64::LDRXui:
107
108 case AArch64::LDRBui:
109 case AArch64::LDRHui:
110 case AArch64::LDRSui:
111 case AArch64::LDRDui:
112 case AArch64::LDRQui:
113
114 case AArch64::LDRSHWui:
115 case AArch64::LDRSHXui:
116
117 case AArch64::LDRSBWui:
118 case AArch64::LDRSBXui:
119
120 case AArch64::LDRSWui:
121
122 case AArch64::STRBBui:
123 case AArch64::STRHHui:
124 case AArch64::STRWui:
125 case AArch64::STRXui:
126
127 case AArch64::STRBui:
128 case AArch64::STRHui:
129 case AArch64::STRSui:
130 case AArch64::STRDui:
131 case AArch64::STRQui:
132
133 case AArch64::LDPWi:
134 case AArch64::LDPXi:
135 case AArch64::LDPSi:
136 case AArch64::LDPDi:
137 case AArch64::LDPQi:
138
139 case AArch64::LDPSWi:
140
141 case AArch64::STPWi:
142 case AArch64::STPXi:
143 case AArch64::STPSi:
144 case AArch64::STPDi:
145 case AArch64::STPQi:
146 return true;
147 default:
148 return false;
149 }
150}
151
152bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
153 if (ClUncheckedLdSt == UncheckedNever)
154 return false;
155 else if (ClUncheckedLdSt == UncheckedAlways)
156 return true;
157
158 // This estimate can be improved if we had harder guarantees about stack frame
159 // layout. With LocalStackAllocation we can estimate SP offset to any
160 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
161 // objects ahead of non-tagged ones, but that's not always desirable.
162 //
163 // Underestimating SP offset here may require the use of LDG to materialize
164 // the tagged address of the stack slot, along with a scratch register
165 // allocation (post-regalloc!).
166 //
167 // For now we do the safe thing here and require that the entire stack frame
168 // is within range of the shortest of the unchecked instructions.
169 unsigned FrameSize = 0;
170 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
171 FrameSize += MFI->getObjectSize(ObjectIdx: i);
172 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
173 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
174}
175
176void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
177 for (MachineInstr &UseI :
178 llvm::make_early_inc_range(Range: MRI->use_instructions(Reg: TaggedReg))) {
179 if (isUncheckedLoadOrStoreOpcode(Opcode: UseI.getOpcode())) {
180 // FI operand is always the one before the immediate offset.
181 unsigned OpIdx = TII->getLoadStoreImmIdx(Opc: UseI.getOpcode()) - 1;
182 if (UseI.getOperand(i: OpIdx).isReg() &&
183 UseI.getOperand(i: OpIdx).getReg() == TaggedReg) {
184 UseI.getOperand(i: OpIdx).ChangeToFrameIndex(Idx: FI);
185 UseI.getOperand(i: OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
186 }
187 } else if (UseI.isCopy() && UseI.getOperand(i: 0).getReg().isVirtual()) {
188 uncheckUsesOf(TaggedReg: UseI.getOperand(i: 0).getReg(), FI);
189 }
190 }
191}
192
193void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
194 for (auto *I : ReTags) {
195 Register TaggedReg = I->getOperand(i: 0).getReg();
196 int FI = I->getOperand(i: 1).getIndex();
197 uncheckUsesOf(TaggedReg, FI);
198 }
199}
200
201namespace {
202struct SlotWithTag {
203 int FI;
204 int Tag;
205 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
206 explicit SlotWithTag(const MachineInstr &MI)
207 : FI(MI.getOperand(i: 1).getIndex()), Tag(MI.getOperand(i: 4).getImm()) {}
208 bool operator==(const SlotWithTag &Other) const {
209 return FI == Other.FI && Tag == Other.Tag;
210 }
211};
212} // namespace
213
214namespace llvm {
215template <> struct DenseMapInfo<SlotWithTag> {
216 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
217 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
218 static unsigned getHashValue(const SlotWithTag &V) {
219 return hash_combine(args: DenseMapInfo<int>::getHashValue(Val: V.FI),
220 args: DenseMapInfo<int>::getHashValue(Val: V.Tag));
221 }
222 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
223 return A == B;
224 }
225};
226} // namespace llvm
227
228static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
229 return MFI->getUseLocalStackAllocationBlock() &&
230 MFI->isObjectPreAllocated(ObjectIdx: FI);
231}
232
233// Pin one of the tagged slots to offset 0 from the tagged base pointer.
234// This would make its address available in a virtual register (IRG's def), as
235// opposed to requiring an ADDG instruction to materialize. This effectively
236// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
237// live almost everywhere anyway), and therefore needs to happen before
238// regalloc.
239std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
240 // Find the best (FI, Tag) pair to pin to offset 0.
241 // Looking at the possible uses of a tagged address, the advantage of pinning
242 // is:
243 // - COPY to physical register.
244 // Does not matter, this would trade a MOV instruction for an ADDG.
245 // - ST*G matter, but those mostly appear near the function prologue where all
246 // the tagged addresses need to be materialized anyway; also, counting ST*G
247 // uses would overweight large allocas that require more than one ST*G
248 // instruction.
249 // - Load/Store instructions in the address operand do not require a tagged
250 // pointer, so they also do not benefit. These operands have already been
251 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
252 // instructions count.
253 // - Any other instruction may benefit from being pinned to offset 0.
254 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
255 if (!ClFirstSlot)
256 return std::nullopt;
257
258 DenseMap<SlotWithTag, int> RetagScore;
259 SlotWithTag MaxScoreST{-1, -1};
260 int MaxScore = -1;
261 for (auto *I : ReTags) {
262 SlotWithTag ST{*I};
263 if (isSlotPreAllocated(MFI, FI: ST.FI))
264 continue;
265
266 Register RetagReg = I->getOperand(i: 0).getReg();
267 if (!RetagReg.isVirtual())
268 continue;
269
270 int Score = 0;
271 SmallVector<Register, 8> WorkList;
272 WorkList.push_back(Elt: RetagReg);
273
274 while (!WorkList.empty()) {
275 Register UseReg = WorkList.pop_back_val();
276 for (auto &UseI : MRI->use_instructions(Reg: UseReg)) {
277 unsigned Opcode = UseI.getOpcode();
278 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
279 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
280 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
281 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
282 Opcode == AArch64::STZGloop_wback)
283 continue;
284 if (UseI.isCopy()) {
285 Register DstReg = UseI.getOperand(i: 0).getReg();
286 if (DstReg.isVirtual())
287 WorkList.push_back(Elt: DstReg);
288 continue;
289 }
290 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
291 << Register::virtReg2Index(UseReg) << " in " << UseI
292 << "\n");
293 Score++;
294 }
295 }
296
297 int TotalScore = RetagScore[ST] += Score;
298 if (TotalScore > MaxScore ||
299 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
300 MaxScore = TotalScore;
301 MaxScoreST = ST;
302 }
303 }
304
305 if (MaxScoreST.FI < 0)
306 return std::nullopt;
307
308 // If FI's tag is already 0, we are done.
309 if (MaxScoreST.Tag == 0)
310 return MaxScoreST.FI;
311
312 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
313 SlotWithTag SwapST{-1, -1};
314 for (auto *I : ReTags) {
315 SlotWithTag ST{*I};
316 if (ST.Tag == 0) {
317 SwapST = ST;
318 break;
319 }
320 }
321
322 // Swap tags between the victim and the highest scoring pair.
323 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
324 // the highest score slot without changing anything else.
325 for (auto *&I : ReTags) {
326 SlotWithTag ST{*I};
327 MachineOperand &TagOp = I->getOperand(i: 4);
328 if (ST == MaxScoreST) {
329 TagOp.setImm(0);
330 } else if (ST == SwapST) {
331 TagOp.setImm(MaxScoreST.Tag);
332 }
333 }
334 return MaxScoreST.FI;
335}
336
337bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
338 MF = &Func;
339 MRI = &MF->getRegInfo();
340 AFI = MF->getInfo<AArch64FunctionInfo>();
341 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
342 TRI = static_cast<const AArch64RegisterInfo *>(
343 MF->getSubtarget().getRegisterInfo());
344 MFI = &MF->getFrameInfo();
345 ReTags.clear();
346
347 assert(MRI->isSSA());
348
349 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
350 << "********** Function: " << MF->getName() << '\n');
351
352 SmallSetVector<int, 8> TaggedSlots;
353 for (auto &BB : *MF) {
354 for (auto &I : BB) {
355 if (I.getOpcode() == AArch64::TAGPstack) {
356 ReTags.push_back(Elt: &I);
357 int FI = I.getOperand(i: 1).getIndex();
358 TaggedSlots.insert(X: FI);
359 // There should be no offsets in TAGP yet.
360 assert(I.getOperand(2).getImm() == 0);
361 }
362 }
363 }
364
365 // Take over from SSP. It does nothing for tagged slots, and should not really
366 // have been enabled in the first place.
367 for (int FI : TaggedSlots)
368 MFI->setObjectSSPLayout(ObjectIdx: FI, Kind: MachineFrameInfo::SSPLK_None);
369
370 if (ReTags.empty())
371 return false;
372
373 if (mayUseUncheckedLoadStore())
374 uncheckLoadsAndStores();
375
376 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
377 // If the base tagged pointer is set up to the address of this slot,
378 // the ADDG instruction can be eliminated.
379 std::optional<int> BaseSlot = findFirstSlotCandidate();
380 if (BaseSlot)
381 AFI->setTaggedBasePointerIndex(*BaseSlot);
382
383 for (auto *I : ReTags) {
384 int FI = I->getOperand(i: 1).getIndex();
385 int Tag = I->getOperand(i: 4).getImm();
386 Register Base = I->getOperand(i: 3).getReg();
387 if (Tag == 0 && FI == BaseSlot) {
388 BuildMI(BB&: *I->getParent(), I, MIMD: {}, MCID: TII->get(Opcode: AArch64::COPY),
389 DestReg: I->getOperand(i: 0).getReg())
390 .addReg(RegNo: Base);
391 I->eraseFromParent();
392 }
393 }
394
395 return true;
396}
397