1 | //===- AArch64StackTagging.cpp - Stack tagging in IR --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #include "AArch64.h" |
11 | #include "AArch64InstrInfo.h" |
12 | #include "AArch64Subtarget.h" |
13 | #include "AArch64TargetMachine.h" |
14 | #include "llvm/ADT/APInt.h" |
15 | #include "llvm/ADT/MapVector.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/Analysis/AliasAnalysis.h" |
19 | #include "llvm/Analysis/CFG.h" |
20 | #include "llvm/Analysis/LoopInfo.h" |
21 | #include "llvm/Analysis/PostDominators.h" |
22 | #include "llvm/Analysis/ScalarEvolution.h" |
23 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
24 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
25 | #include "llvm/BinaryFormat/Dwarf.h" |
26 | #include "llvm/CodeGen/LiveRegUnits.h" |
27 | #include "llvm/CodeGen/MachineBasicBlock.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | #include "llvm/CodeGen/MachineInstr.h" |
31 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
32 | #include "llvm/CodeGen/MachineLoopInfo.h" |
33 | #include "llvm/CodeGen/MachineOperand.h" |
34 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
35 | #include "llvm/CodeGen/TargetPassConfig.h" |
36 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
37 | #include "llvm/IR/DebugLoc.h" |
38 | #include "llvm/IR/Dominators.h" |
39 | #include "llvm/IR/Function.h" |
40 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
41 | #include "llvm/IR/IRBuilder.h" |
42 | #include "llvm/IR/InstIterator.h" |
43 | #include "llvm/IR/Instruction.h" |
44 | #include "llvm/IR/Instructions.h" |
45 | #include "llvm/IR/IntrinsicInst.h" |
46 | #include "llvm/IR/IntrinsicsAArch64.h" |
47 | #include "llvm/IR/Metadata.h" |
48 | #include "llvm/IR/ValueHandle.h" |
49 | #include "llvm/InitializePasses.h" |
50 | #include "llvm/Pass.h" |
51 | #include "llvm/Support/Casting.h" |
52 | #include "llvm/Support/Debug.h" |
53 | #include "llvm/Support/raw_ostream.h" |
54 | #include "llvm/Transforms/Utils/Local.h" |
55 | #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" |
56 | #include <cassert> |
57 | #include <iterator> |
58 | #include <memory> |
59 | #include <utility> |
60 | |
61 | using namespace llvm; |
62 | |
63 | #define DEBUG_TYPE "aarch64-stack-tagging" |
64 | |
65 | static cl::opt<bool> ClMergeInit( |
66 | "stack-tagging-merge-init" , cl::Hidden, cl::init(Val: true), |
67 | cl::desc("merge stack variable initializers with tagging when possible" )); |
68 | |
69 | static cl::opt<bool> |
70 | ClUseStackSafety("stack-tagging-use-stack-safety" , cl::Hidden, |
71 | cl::init(Val: true), |
72 | cl::desc("Use Stack Safety analysis results" )); |
73 | |
74 | static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit" , |
75 | cl::init(Val: 40), cl::Hidden); |
76 | |
77 | static cl::opt<unsigned> |
78 | ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit" , cl::init(Val: 272), |
79 | cl::Hidden); |
80 | |
81 | static cl::opt<size_t> ClMaxLifetimes( |
82 | "stack-tagging-max-lifetimes-for-alloca" , cl::Hidden, cl::init(Val: 3), |
83 | cl::ReallyHidden, |
84 | cl::desc("How many lifetime ends to handle for a single alloca." ), |
85 | cl::Optional); |
86 | |
87 | // Mode for selecting how to insert frame record info into the stack ring |
88 | // buffer. |
89 | enum StackTaggingRecordStackHistoryMode { |
90 | // Do not record frame record info. |
91 | none, |
92 | |
93 | // Insert instructions into the prologue for storing into the stack ring |
94 | // buffer directly. |
95 | instr, |
96 | }; |
97 | |
98 | static cl::opt<StackTaggingRecordStackHistoryMode> ClRecordStackHistory( |
99 | "stack-tagging-record-stack-history" , |
100 | cl::desc("Record stack frames with tagged allocations in a thread-local " |
101 | "ring buffer" ), |
102 | cl::values(clEnumVal(none, "Do not record stack ring history" ), |
103 | clEnumVal(instr, "Insert instructions into the prologue for " |
104 | "storing into the stack ring buffer" )), |
105 | cl::Hidden, cl::init(Val: none)); |
106 | |
107 | static const Align kTagGranuleSize = Align(16); |
108 | |
109 | namespace { |
110 | |
111 | class InitializerBuilder { |
112 | uint64_t Size; |
113 | const DataLayout *DL; |
114 | Value *BasePtr; |
115 | Function *SetTagFn; |
116 | Function *SetTagZeroFn; |
117 | Function *StgpFn; |
118 | |
119 | // List of initializers sorted by start offset. |
120 | struct Range { |
121 | uint64_t Start, End; |
122 | Instruction *Inst; |
123 | }; |
124 | SmallVector<Range, 4> Ranges; |
125 | // 8-aligned offset => 8-byte initializer |
126 | // Missing keys are zero initialized. |
127 | std::map<uint64_t, Value *> Out; |
128 | |
129 | public: |
130 | InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, |
131 | Function *SetTagFn, Function *SetTagZeroFn, |
132 | Function *StgpFn) |
133 | : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), |
134 | SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} |
135 | |
136 | bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { |
137 | auto I = |
138 | llvm::lower_bound(Range&: Ranges, Value&: Start, C: [](const Range &LHS, uint64_t RHS) { |
139 | return LHS.End <= RHS; |
140 | }); |
141 | if (I != Ranges.end() && End > I->Start) { |
142 | // Overlap - bail. |
143 | return false; |
144 | } |
145 | Ranges.insert(I, Elt: {.Start: Start, .End: End, .Inst: Inst}); |
146 | return true; |
147 | } |
148 | |
149 | bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { |
150 | int64_t StoreSize = DL->getTypeStoreSize(Ty: SI->getOperand(i_nocapture: 0)->getType()); |
151 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: SI)) |
152 | return false; |
153 | IRBuilder<> IRB(SI); |
154 | applyStore(IRB, Start: Offset, End: Offset + StoreSize, StoredValue: SI->getOperand(i_nocapture: 0)); |
155 | return true; |
156 | } |
157 | |
158 | bool addMemSet(uint64_t Offset, MemSetInst *MSI) { |
159 | uint64_t StoreSize = cast<ConstantInt>(Val: MSI->getLength())->getZExtValue(); |
160 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: MSI)) |
161 | return false; |
162 | IRBuilder<> IRB(MSI); |
163 | applyMemSet(IRB, Start: Offset, End: Offset + StoreSize, |
164 | V: cast<ConstantInt>(Val: MSI->getValue())); |
165 | return true; |
166 | } |
167 | |
168 | void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, |
169 | ConstantInt *V) { |
170 | // Out[] does not distinguish between zero and undef, and we already know |
171 | // that this memset does not overlap with any other initializer. Nothing to |
172 | // do for memset(0). |
173 | if (V->isZero()) |
174 | return; |
175 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
176 | uint64_t Cst = 0x0101010101010101UL; |
177 | int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; |
178 | if (LowBits) |
179 | Cst = (Cst >> LowBits) << LowBits; |
180 | int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; |
181 | if (HighBits) |
182 | Cst = (Cst << HighBits) >> HighBits; |
183 | ConstantInt *C = |
184 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Cst * V->getZExtValue()); |
185 | |
186 | Value *&CurrentV = Out[Offset]; |
187 | if (!CurrentV) { |
188 | CurrentV = C; |
189 | } else { |
190 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: C); |
191 | } |
192 | } |
193 | } |
194 | |
195 | // Take a 64-bit slice of the value starting at the given offset (in bytes). |
196 | // Offset can be negative. Pad with zeroes on both sides when necessary. |
197 | Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { |
198 | if (Offset > 0) { |
199 | V = IRB.CreateLShr(LHS: V, RHS: Offset * 8); |
200 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
201 | } else if (Offset < 0) { |
202 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
203 | V = IRB.CreateShl(LHS: V, RHS: -Offset * 8); |
204 | } else { |
205 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
206 | } |
207 | return V; |
208 | } |
209 | |
210 | void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, |
211 | Value *StoredValue) { |
212 | StoredValue = flatten(IRB, V: StoredValue); |
213 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
214 | Value *V = sliceValue(IRB, V: StoredValue, Offset: Offset - Start); |
215 | Value *&CurrentV = Out[Offset]; |
216 | if (!CurrentV) { |
217 | CurrentV = V; |
218 | } else { |
219 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: V); |
220 | } |
221 | } |
222 | } |
223 | |
224 | void generate(IRBuilder<> &IRB) { |
225 | LLVM_DEBUG(dbgs() << "Combined initializer\n" ); |
226 | // No initializers => the entire allocation is undef. |
227 | if (Ranges.empty()) { |
228 | emitUndef(IRB, Offset: 0, Size); |
229 | return; |
230 | } |
231 | |
232 | // Look through 8-byte initializer list 16 bytes at a time; |
233 | // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. |
234 | // Otherwise, emit zeroes up to next available item. |
235 | uint64_t LastOffset = 0; |
236 | for (uint64_t Offset = 0; Offset < Size; Offset += 16) { |
237 | auto I1 = Out.find(x: Offset); |
238 | auto I2 = Out.find(x: Offset + 8); |
239 | if (I1 == Out.end() && I2 == Out.end()) |
240 | continue; |
241 | |
242 | if (Offset > LastOffset) |
243 | emitZeroes(IRB, Offset: LastOffset, Size: Offset - LastOffset); |
244 | |
245 | Value *Store1 = I1 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
246 | : I1->second; |
247 | Value *Store2 = I2 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
248 | : I2->second; |
249 | emitPair(IRB, Offset, A: Store1, B: Store2); |
250 | LastOffset = Offset + 16; |
251 | } |
252 | |
253 | // memset(0) does not update Out[], therefore the tail can be either undef |
254 | // or zero. |
255 | if (LastOffset < Size) |
256 | emitZeroes(IRB, Offset: LastOffset, Size: Size - LastOffset); |
257 | |
258 | for (const auto &R : Ranges) { |
259 | R.Inst->eraseFromParent(); |
260 | } |
261 | } |
262 | |
263 | void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
264 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
265 | << ") zero\n" ); |
266 | Value *Ptr = BasePtr; |
267 | if (Offset) |
268 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
269 | IRB.CreateCall(Callee: SetTagZeroFn, |
270 | Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
271 | } |
272 | |
273 | void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
274 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
275 | << ") undef\n" ); |
276 | Value *Ptr = BasePtr; |
277 | if (Offset) |
278 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
279 | IRB.CreateCall(Callee: SetTagFn, Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
280 | } |
281 | |
282 | void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { |
283 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n" ); |
284 | LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n" ); |
285 | Value *Ptr = BasePtr; |
286 | if (Offset) |
287 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
288 | IRB.CreateCall(Callee: StgpFn, Args: {Ptr, A, B}); |
289 | } |
290 | |
291 | Value *flatten(IRBuilder<> &IRB, Value *V) { |
292 | if (V->getType()->isIntegerTy()) |
293 | return V; |
294 | // vector of pointers -> vector of ints |
295 | if (VectorType *VecTy = dyn_cast<VectorType>(Val: V->getType())) { |
296 | LLVMContext &Ctx = IRB.getContext(); |
297 | Type *EltTy = VecTy->getElementType(); |
298 | if (EltTy->isPointerTy()) { |
299 | uint32_t EltSize = DL->getTypeSizeInBits(Ty: EltTy); |
300 | auto *NewTy = FixedVectorType::get( |
301 | ElementType: IntegerType::get(C&: Ctx, NumBits: EltSize), |
302 | NumElts: cast<FixedVectorType>(Val: VecTy)->getNumElements()); |
303 | V = IRB.CreatePointerCast(V, DestTy: NewTy); |
304 | } |
305 | } |
306 | return IRB.CreateBitOrPointerCast( |
307 | V, DestTy: IRB.getIntNTy(N: DL->getTypeStoreSize(Ty: V->getType()) * 8)); |
308 | } |
309 | }; |
310 | |
311 | class AArch64StackTagging : public FunctionPass { |
312 | const bool MergeInit; |
313 | const bool UseStackSafety; |
314 | |
315 | public: |
316 | static char ID; // Pass ID, replacement for typeid |
317 | |
318 | AArch64StackTagging(bool IsOptNone = false) |
319 | : FunctionPass(ID), |
320 | MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone), |
321 | UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety |
322 | : !IsOptNone) { |
323 | initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry()); |
324 | } |
325 | |
326 | void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, |
327 | uint64_t Size); |
328 | void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); |
329 | |
330 | Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, |
331 | uint64_t Size, InitializerBuilder &IB); |
332 | |
333 | Instruction *insertBaseTaggedPointer( |
334 | const Module &M, |
335 | const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas, |
336 | const DominatorTree *DT); |
337 | bool runOnFunction(Function &F) override; |
338 | |
339 | StringRef getPassName() const override { return "AArch64 Stack Tagging" ; } |
340 | |
341 | private: |
342 | Function *F = nullptr; |
343 | Function *SetTagFunc = nullptr; |
344 | const DataLayout *DL = nullptr; |
345 | AAResults *AA = nullptr; |
346 | const StackSafetyGlobalInfo *SSI = nullptr; |
347 | |
348 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
349 | AU.setPreservesCFG(); |
350 | if (UseStackSafety) |
351 | AU.addRequired<StackSafetyGlobalInfoWrapperPass>(); |
352 | if (MergeInit) |
353 | AU.addRequired<AAResultsWrapperPass>(); |
354 | } |
355 | }; |
356 | |
357 | } // end anonymous namespace |
358 | |
359 | char AArch64StackTagging::ID = 0; |
360 | |
361 | INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
362 | false, false) |
363 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
364 | INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) |
365 | INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
366 | false, false) |
367 | |
368 | FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) { |
369 | return new AArch64StackTagging(IsOptNone); |
370 | } |
371 | |
372 | Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, |
373 | Value *StartPtr, |
374 | uint64_t Size, |
375 | InitializerBuilder &IB) { |
376 | MemoryLocation AllocaLoc{StartPtr, Size}; |
377 | Instruction *LastInst = StartInst; |
378 | BasicBlock::iterator BI(StartInst); |
379 | |
380 | unsigned Count = 0; |
381 | for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { |
382 | if (!isa<DbgInfoIntrinsic>(Val: *BI)) |
383 | ++Count; |
384 | |
385 | if (isNoModRef(MRI: AA->getModRefInfo(I: &*BI, OptLoc: AllocaLoc))) |
386 | continue; |
387 | |
388 | if (!isa<StoreInst>(Val: BI) && !isa<MemSetInst>(Val: BI)) { |
389 | // If the instruction is readnone, ignore it, otherwise bail out. We |
390 | // don't even allow readonly here because we don't want something like: |
391 | // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). |
392 | if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) |
393 | break; |
394 | continue; |
395 | } |
396 | |
397 | if (StoreInst *NextStore = dyn_cast<StoreInst>(Val&: BI)) { |
398 | if (!NextStore->isSimple()) |
399 | break; |
400 | |
401 | // Check to see if this store is to a constant offset from the start ptr. |
402 | std::optional<int64_t> Offset = |
403 | NextStore->getPointerOperand()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
404 | if (!Offset) |
405 | break; |
406 | |
407 | if (!IB.addStore(Offset: *Offset, SI: NextStore, DL)) |
408 | break; |
409 | LastInst = NextStore; |
410 | } else { |
411 | MemSetInst *MSI = cast<MemSetInst>(Val&: BI); |
412 | |
413 | if (MSI->isVolatile() || !isa<ConstantInt>(Val: MSI->getLength())) |
414 | break; |
415 | |
416 | if (!isa<ConstantInt>(Val: MSI->getValue())) |
417 | break; |
418 | |
419 | // Check to see if this store is to a constant offset from the start ptr. |
420 | std::optional<int64_t> Offset = |
421 | MSI->getDest()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
422 | if (!Offset) |
423 | break; |
424 | |
425 | if (!IB.addMemSet(Offset: *Offset, MSI)) |
426 | break; |
427 | LastInst = MSI; |
428 | } |
429 | } |
430 | return LastInst; |
431 | } |
432 | |
433 | void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
434 | Value *Ptr, uint64_t Size) { |
435 | auto SetTagZeroFunc = |
436 | Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::aarch64_settag_zero); |
437 | auto StgpFunc = |
438 | Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::aarch64_stgp); |
439 | |
440 | InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); |
441 | bool LittleEndian = |
442 | Triple(AI->getModule()->getTargetTriple()).isLittleEndian(); |
443 | // Current implementation of initializer merging assumes little endianness. |
444 | if (MergeInit && !F->hasOptNone() && LittleEndian && |
445 | Size < ClMergeInitSizeLimit) { |
446 | LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI |
447 | << ", size = " << Size << "\n" ); |
448 | InsertBefore = collectInitializers(StartInst: InsertBefore, StartPtr: Ptr, Size, IB); |
449 | } |
450 | |
451 | IRBuilder<> IRB(InsertBefore); |
452 | IB.generate(IRB); |
453 | } |
454 | |
455 | void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
456 | uint64_t Size) { |
457 | IRBuilder<> IRB(InsertBefore); |
458 | IRB.CreateCall(Callee: SetTagFunc, Args: {IRB.CreatePointerCast(V: AI, DestTy: IRB.getPtrTy()), |
459 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
460 | } |
461 | |
462 | Instruction *AArch64StackTagging::insertBaseTaggedPointer( |
463 | const Module &M, |
464 | const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument, |
465 | const DominatorTree *DT) { |
466 | BasicBlock *PrologueBB = nullptr; |
467 | // Try sinking IRG as deep as possible to avoid hurting shrink wrap. |
468 | for (auto &I : AllocasToInstrument) { |
469 | const memtag::AllocaInfo &Info = I.second; |
470 | AllocaInst *AI = Info.AI; |
471 | if (!PrologueBB) { |
472 | PrologueBB = AI->getParent(); |
473 | continue; |
474 | } |
475 | PrologueBB = DT->findNearestCommonDominator(A: PrologueBB, B: AI->getParent()); |
476 | } |
477 | assert(PrologueBB); |
478 | |
479 | IRBuilder<> IRB(&PrologueBB->front()); |
480 | Function *IRG_SP = |
481 | Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::aarch64_irg_sp); |
482 | Instruction *Base = |
483 | IRB.CreateCall(Callee: IRG_SP, Args: {Constant::getNullValue(Ty: IRB.getInt64Ty())}); |
484 | Base->setName("basetag" ); |
485 | auto TargetTriple = Triple(M.getTargetTriple()); |
486 | // This is not a stable ABI for now, so only allow in dev builds with API |
487 | // level 10000. |
488 | // The ThreadLong format is the same as with HWASan, but the entries for |
489 | // stack MTE take two slots (16 bytes). |
490 | if (ClRecordStackHistory == instr && TargetTriple.isAndroid() && |
491 | TargetTriple.isAArch64() && !TargetTriple.isAndroidVersionLT(Major: 10000) && |
492 | !AllocasToInstrument.empty()) { |
493 | constexpr int StackMteSlot = -3; |
494 | constexpr uint64_t TagMask = 0xFULL << 56; |
495 | |
496 | auto *IntptrTy = IRB.getIntPtrTy(DL: M.getDataLayout()); |
497 | Value *SlotPtr = memtag::getAndroidSlotPtr(IRB, Slot: StackMteSlot); |
498 | auto *ThreadLong = IRB.CreateLoad(Ty: IntptrTy, Ptr: SlotPtr); |
499 | Value *FP = memtag::getFP(IRB); |
500 | Value *Tag = IRB.CreateAnd(LHS: IRB.CreatePtrToInt(V: Base, DestTy: IntptrTy), RHS: TagMask); |
501 | Value *TaggedFP = IRB.CreateOr(LHS: FP, RHS: Tag); |
502 | Value *PC = memtag::getPC(TargetTriple, IRB); |
503 | Value *RecordPtr = IRB.CreateIntToPtr(V: ThreadLong, DestTy: IRB.getPtrTy(AddrSpace: 0)); |
504 | IRB.CreateStore(Val: PC, Ptr: RecordPtr); |
505 | IRB.CreateStore(Val: TaggedFP, Ptr: IRB.CreateConstGEP1_64(Ty: IntptrTy, Ptr: RecordPtr, Idx0: 1)); |
506 | // Update the ring buffer. Top byte of ThreadLong defines the size of the |
507 | // buffer in pages, it must be a power of two, and the start of the buffer |
508 | // must be aligned by twice that much. Therefore wrap around of the ring |
509 | // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). |
510 | // The use of AShr instead of LShr is due to |
511 | // https://bugs.llvm.org/show_bug.cgi?id=39030 |
512 | // Runtime library makes sure not to use the highest bit. |
513 | Value *WrapMask = IRB.CreateXor( |
514 | LHS: IRB.CreateShl(LHS: IRB.CreateAShr(LHS: ThreadLong, RHS: 56), RHS: 12, Name: "" , HasNUW: true, HasNSW: true), |
515 | RHS: ConstantInt::get(Ty: IntptrTy, V: (uint64_t)-1)); |
516 | Value *ThreadLongNew = IRB.CreateAnd( |
517 | LHS: IRB.CreateAdd(LHS: ThreadLong, RHS: ConstantInt::get(Ty: IntptrTy, V: 16)), RHS: WrapMask); |
518 | IRB.CreateStore(Val: ThreadLongNew, Ptr: SlotPtr); |
519 | } |
520 | return Base; |
521 | } |
522 | |
523 | // FIXME: check for MTE extension |
524 | bool AArch64StackTagging::runOnFunction(Function &Fn) { |
525 | if (!Fn.hasFnAttribute(Kind: Attribute::SanitizeMemTag)) |
526 | return false; |
527 | |
528 | if (UseStackSafety) |
529 | SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult(); |
530 | F = &Fn; |
531 | DL = &Fn.getDataLayout(); |
532 | if (MergeInit) |
533 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
534 | |
535 | memtag::StackInfoBuilder SIB(SSI); |
536 | for (Instruction &I : instructions(F)) |
537 | SIB.visit(Inst&: I); |
538 | memtag::StackInfo &SInfo = SIB.get(); |
539 | |
540 | if (SInfo.AllocasToInstrument.empty()) |
541 | return false; |
542 | |
543 | std::unique_ptr<DominatorTree> DeleteDT; |
544 | DominatorTree *DT = nullptr; |
545 | if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) |
546 | DT = &P->getDomTree(); |
547 | |
548 | if (DT == nullptr) { |
549 | DeleteDT = std::make_unique<DominatorTree>(args&: *F); |
550 | DT = DeleteDT.get(); |
551 | } |
552 | |
553 | std::unique_ptr<PostDominatorTree> DeletePDT; |
554 | PostDominatorTree *PDT = nullptr; |
555 | if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>()) |
556 | PDT = &P->getPostDomTree(); |
557 | |
558 | if (PDT == nullptr) { |
559 | DeletePDT = std::make_unique<PostDominatorTree>(args&: *F); |
560 | PDT = DeletePDT.get(); |
561 | } |
562 | |
563 | std::unique_ptr<LoopInfo> DeleteLI; |
564 | LoopInfo *LI = nullptr; |
565 | if (auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>()) { |
566 | LI = &LIWP->getLoopInfo(); |
567 | } else { |
568 | DeleteLI = std::make_unique<LoopInfo>(args&: *DT); |
569 | LI = DeleteLI.get(); |
570 | } |
571 | |
572 | SetTagFunc = |
573 | Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::aarch64_settag); |
574 | |
575 | Instruction *Base = |
576 | insertBaseTaggedPointer(M: *Fn.getParent(), AllocasToInstrument: SInfo.AllocasToInstrument, DT); |
577 | |
578 | int NextTag = 0; |
579 | for (auto &I : SInfo.AllocasToInstrument) { |
580 | memtag::AllocaInfo &Info = I.second; |
581 | assert(Info.AI && SIB.isInterestingAlloca(*Info.AI)); |
582 | memtag::alignAndPadAlloca(Info, Align: kTagGranuleSize); |
583 | AllocaInst *AI = Info.AI; |
584 | int Tag = NextTag; |
585 | NextTag = (NextTag + 1) % 16; |
586 | // Replace alloca with tagp(alloca). |
587 | IRBuilder<> IRB(Info.AI->getNextNode()); |
588 | Function *TagP = Intrinsic::getDeclaration( |
589 | M: F->getParent(), id: Intrinsic::aarch64_tagp, Tys: {Info.AI->getType()}); |
590 | Instruction *TagPCall = |
591 | IRB.CreateCall(Callee: TagP, Args: {Constant::getNullValue(Ty: Info.AI->getType()), Base, |
592 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Tag)}); |
593 | if (Info.AI->hasName()) |
594 | TagPCall->setName(Info.AI->getName() + ".tag" ); |
595 | // Does not replace metadata, so we don't have to handle DbgVariableRecords. |
596 | Info.AI->replaceUsesWithIf(New: TagPCall, ShouldReplace: [&](const Use &U) { |
597 | return !memtag::isLifetimeIntrinsic(V: U.getUser()); |
598 | }); |
599 | TagPCall->setOperand(i: 0, Val: Info.AI); |
600 | |
601 | // Calls to functions that may return twice (e.g. setjmp) confuse the |
602 | // postdominator analysis, and will leave us to keep memory tagged after |
603 | // function return. Work around this by always untagging at every return |
604 | // statement if return_twice functions are called. |
605 | bool StandardLifetime = |
606 | !SInfo.CallsReturnTwice && |
607 | SInfo.UnrecognizedLifetimes.empty() && |
608 | memtag::isStandardLifetime(LifetimeStart: Info.LifetimeStart, LifetimeEnd: Info.LifetimeEnd, DT, LI, |
609 | MaxLifetimes: ClMaxLifetimes); |
610 | if (StandardLifetime) { |
611 | IntrinsicInst *Start = Info.LifetimeStart[0]; |
612 | uint64_t Size = |
613 | cast<ConstantInt>(Val: Start->getArgOperand(i: 0))->getZExtValue(); |
614 | Size = alignTo(Size, A: kTagGranuleSize); |
615 | tagAlloca(AI, InsertBefore: Start->getNextNode(), Ptr: TagPCall, Size); |
616 | |
617 | auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, InsertBefore: Node, Size); }; |
618 | if (!DT || !PDT || |
619 | !memtag::forAllReachableExits(DT: *DT, PDT: *PDT, LI: *LI, Start, Ends: Info.LifetimeEnd, |
620 | RetVec: SInfo.RetVec, Callback: TagEnd)) { |
621 | for (auto *End : Info.LifetimeEnd) |
622 | End->eraseFromParent(); |
623 | } |
624 | } else { |
625 | uint64_t Size = *Info.AI->getAllocationSize(DL: *DL); |
626 | Value *Ptr = IRB.CreatePointerCast(V: TagPCall, DestTy: IRB.getPtrTy()); |
627 | tagAlloca(AI, InsertBefore: &*IRB.GetInsertPoint(), Ptr, Size); |
628 | for (auto *RI : SInfo.RetVec) { |
629 | untagAlloca(AI, InsertBefore: RI, Size); |
630 | } |
631 | // We may have inserted tag/untag outside of any lifetime interval. |
632 | // Remove all lifetime intrinsics for this alloca. |
633 | for (auto *II : Info.LifetimeStart) |
634 | II->eraseFromParent(); |
635 | for (auto *II : Info.LifetimeEnd) |
636 | II->eraseFromParent(); |
637 | } |
638 | |
639 | memtag::annotateDebugRecords(Info, Tag: static_cast<unsigned long>(Tag)); |
640 | } |
641 | |
642 | // If we have instrumented at least one alloca, all unrecognized lifetime |
643 | // intrinsics have to go. |
644 | for (auto *I : SInfo.UnrecognizedLifetimes) |
645 | I->eraseFromParent(); |
646 | |
647 | return true; |
648 | } |
649 | |