1 | //===- AArch64StackTagging.cpp - Stack tagging in IR --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #include "AArch64.h" |
11 | #include "AArch64Subtarget.h" |
12 | #include "llvm/ADT/APInt.h" |
13 | #include "llvm/ADT/MapVector.h" |
14 | #include "llvm/ADT/SmallVector.h" |
15 | #include "llvm/ADT/Statistic.h" |
16 | #include "llvm/Analysis/AliasAnalysis.h" |
17 | #include "llvm/Analysis/CFG.h" |
18 | #include "llvm/Analysis/LoopInfo.h" |
19 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
20 | #include "llvm/Analysis/PostDominators.h" |
21 | #include "llvm/Analysis/ScalarEvolution.h" |
22 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
23 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
24 | #include "llvm/BinaryFormat/Dwarf.h" |
25 | #include "llvm/CodeGen/MachineBasicBlock.h" |
26 | #include "llvm/CodeGen/MachineFunction.h" |
27 | #include "llvm/CodeGen/MachineInstr.h" |
28 | #include "llvm/CodeGen/MachineOperand.h" |
29 | #include "llvm/IR/DebugLoc.h" |
30 | #include "llvm/IR/Dominators.h" |
31 | #include "llvm/IR/Function.h" |
32 | #include "llvm/IR/IRBuilder.h" |
33 | #include "llvm/IR/InstIterator.h" |
34 | #include "llvm/IR/Instruction.h" |
35 | #include "llvm/IR/Instructions.h" |
36 | #include "llvm/IR/IntrinsicInst.h" |
37 | #include "llvm/IR/IntrinsicsAArch64.h" |
38 | #include "llvm/IR/Metadata.h" |
39 | #include "llvm/IR/PassManager.h" |
40 | #include "llvm/InitializePasses.h" |
41 | #include "llvm/Pass.h" |
42 | #include "llvm/Support/Casting.h" |
43 | #include "llvm/Support/Debug.h" |
44 | #include "llvm/Support/raw_ostream.h" |
45 | #include "llvm/Transforms/Utils/Local.h" |
46 | #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" |
47 | #include <cassert> |
48 | #include <memory> |
49 | #include <utility> |
50 | |
51 | using namespace llvm; |
52 | |
53 | #define DEBUG_TYPE "aarch64-stack-tagging" |
54 | |
55 | static cl::opt<bool> ClMergeInit( |
56 | "stack-tagging-merge-init" , cl::Hidden, cl::init(Val: true), |
57 | cl::desc("merge stack variable initializers with tagging when possible" )); |
58 | |
59 | static cl::opt<bool> |
60 | ClUseStackSafety("stack-tagging-use-stack-safety" , cl::Hidden, |
61 | cl::init(Val: true), |
62 | cl::desc("Use Stack Safety analysis results" )); |
63 | |
64 | static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit" , |
65 | cl::init(Val: 40), cl::Hidden); |
66 | |
67 | static cl::opt<unsigned> |
68 | ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit" , cl::init(Val: 272), |
69 | cl::Hidden); |
70 | |
71 | static cl::opt<size_t> ClMaxLifetimes( |
72 | "stack-tagging-max-lifetimes-for-alloca" , cl::Hidden, cl::init(Val: 3), |
73 | cl::ReallyHidden, |
74 | cl::desc("How many lifetime ends to handle for a single alloca." ), |
75 | cl::Optional); |
76 | |
77 | // Mode for selecting how to insert frame record info into the stack ring |
78 | // buffer. |
79 | enum StackTaggingRecordStackHistoryMode { |
80 | // Do not record frame record info. |
81 | none, |
82 | |
83 | // Insert instructions into the prologue for storing into the stack ring |
84 | // buffer directly. |
85 | instr, |
86 | }; |
87 | |
88 | static cl::opt<StackTaggingRecordStackHistoryMode> ClRecordStackHistory( |
89 | "stack-tagging-record-stack-history" , |
90 | cl::desc("Record stack frames with tagged allocations in a thread-local " |
91 | "ring buffer" ), |
92 | cl::values(clEnumVal(none, "Do not record stack ring history" ), |
93 | clEnumVal(instr, "Insert instructions into the prologue for " |
94 | "storing into the stack ring buffer" )), |
95 | cl::Hidden, cl::init(Val: none)); |
96 | |
97 | static const Align kTagGranuleSize = Align(16); |
98 | |
99 | namespace { |
100 | |
101 | class InitializerBuilder { |
102 | uint64_t Size; |
103 | const DataLayout *DL; |
104 | Value *BasePtr; |
105 | Function *SetTagFn; |
106 | Function *SetTagZeroFn; |
107 | Function *StgpFn; |
108 | |
109 | // List of initializers sorted by start offset. |
110 | struct Range { |
111 | uint64_t Start, End; |
112 | Instruction *Inst; |
113 | }; |
114 | SmallVector<Range, 4> Ranges; |
115 | // 8-aligned offset => 8-byte initializer |
116 | // Missing keys are zero initialized. |
117 | std::map<uint64_t, Value *> Out; |
118 | |
119 | public: |
120 | InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, |
121 | Function *SetTagFn, Function *SetTagZeroFn, |
122 | Function *StgpFn) |
123 | : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), |
124 | SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} |
125 | |
126 | bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { |
127 | auto I = |
128 | llvm::lower_bound(Range&: Ranges, Value&: Start, C: [](const Range &LHS, uint64_t RHS) { |
129 | return LHS.End <= RHS; |
130 | }); |
131 | if (I != Ranges.end() && End > I->Start) { |
132 | // Overlap - bail. |
133 | return false; |
134 | } |
135 | Ranges.insert(I, Elt: {.Start: Start, .End: End, .Inst: Inst}); |
136 | return true; |
137 | } |
138 | |
139 | bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { |
140 | int64_t StoreSize = DL->getTypeStoreSize(Ty: SI->getOperand(i_nocapture: 0)->getType()); |
141 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: SI)) |
142 | return false; |
143 | IRBuilder<> IRB(SI); |
144 | applyStore(IRB, Start: Offset, End: Offset + StoreSize, StoredValue: SI->getOperand(i_nocapture: 0)); |
145 | return true; |
146 | } |
147 | |
148 | bool addMemSet(uint64_t Offset, MemSetInst *MSI) { |
149 | uint64_t StoreSize = cast<ConstantInt>(Val: MSI->getLength())->getZExtValue(); |
150 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: MSI)) |
151 | return false; |
152 | IRBuilder<> IRB(MSI); |
153 | applyMemSet(IRB, Start: Offset, End: Offset + StoreSize, |
154 | V: cast<ConstantInt>(Val: MSI->getValue())); |
155 | return true; |
156 | } |
157 | |
158 | void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, |
159 | ConstantInt *V) { |
160 | // Out[] does not distinguish between zero and undef, and we already know |
161 | // that this memset does not overlap with any other initializer. Nothing to |
162 | // do for memset(0). |
163 | if (V->isZero()) |
164 | return; |
165 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
166 | uint64_t Cst = 0x0101010101010101UL; |
167 | int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; |
168 | if (LowBits) |
169 | Cst = (Cst >> LowBits) << LowBits; |
170 | int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; |
171 | if (HighBits) |
172 | Cst = (Cst << HighBits) >> HighBits; |
173 | ConstantInt *C = |
174 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Cst * V->getZExtValue()); |
175 | |
176 | Value *&CurrentV = Out[Offset]; |
177 | if (!CurrentV) { |
178 | CurrentV = C; |
179 | } else { |
180 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: C); |
181 | } |
182 | } |
183 | } |
184 | |
185 | // Take a 64-bit slice of the value starting at the given offset (in bytes). |
186 | // Offset can be negative. Pad with zeroes on both sides when necessary. |
187 | Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { |
188 | if (Offset > 0) { |
189 | V = IRB.CreateLShr(LHS: V, RHS: Offset * 8); |
190 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
191 | } else if (Offset < 0) { |
192 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
193 | V = IRB.CreateShl(LHS: V, RHS: -Offset * 8); |
194 | } else { |
195 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
196 | } |
197 | return V; |
198 | } |
199 | |
200 | void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, |
201 | Value *StoredValue) { |
202 | StoredValue = flatten(IRB, V: StoredValue); |
203 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
204 | Value *V = sliceValue(IRB, V: StoredValue, Offset: Offset - Start); |
205 | Value *&CurrentV = Out[Offset]; |
206 | if (!CurrentV) { |
207 | CurrentV = V; |
208 | } else { |
209 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: V); |
210 | } |
211 | } |
212 | } |
213 | |
214 | void generate(IRBuilder<> &IRB) { |
215 | LLVM_DEBUG(dbgs() << "Combined initializer\n" ); |
216 | // No initializers => the entire allocation is undef. |
217 | if (Ranges.empty()) { |
218 | emitUndef(IRB, Offset: 0, Size); |
219 | return; |
220 | } |
221 | |
222 | // Look through 8-byte initializer list 16 bytes at a time; |
223 | // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. |
224 | // Otherwise, emit zeroes up to next available item. |
225 | uint64_t LastOffset = 0; |
226 | for (uint64_t Offset = 0; Offset < Size; Offset += 16) { |
227 | auto I1 = Out.find(x: Offset); |
228 | auto I2 = Out.find(x: Offset + 8); |
229 | if (I1 == Out.end() && I2 == Out.end()) |
230 | continue; |
231 | |
232 | if (Offset > LastOffset) |
233 | emitZeroes(IRB, Offset: LastOffset, Size: Offset - LastOffset); |
234 | |
235 | Value *Store1 = I1 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
236 | : I1->second; |
237 | Value *Store2 = I2 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
238 | : I2->second; |
239 | emitPair(IRB, Offset, A: Store1, B: Store2); |
240 | LastOffset = Offset + 16; |
241 | } |
242 | |
243 | // memset(0) does not update Out[], therefore the tail can be either undef |
244 | // or zero. |
245 | if (LastOffset < Size) |
246 | emitZeroes(IRB, Offset: LastOffset, Size: Size - LastOffset); |
247 | |
248 | for (const auto &R : Ranges) { |
249 | R.Inst->eraseFromParent(); |
250 | } |
251 | } |
252 | |
253 | void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
254 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
255 | << ") zero\n" ); |
256 | Value *Ptr = BasePtr; |
257 | if (Offset) |
258 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
259 | IRB.CreateCall(Callee: SetTagZeroFn, |
260 | Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
261 | } |
262 | |
263 | void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
264 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
265 | << ") undef\n" ); |
266 | Value *Ptr = BasePtr; |
267 | if (Offset) |
268 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
269 | IRB.CreateCall(Callee: SetTagFn, Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
270 | } |
271 | |
272 | void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { |
273 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n" ); |
274 | LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n" ); |
275 | Value *Ptr = BasePtr; |
276 | if (Offset) |
277 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
278 | IRB.CreateCall(Callee: StgpFn, Args: {Ptr, A, B}); |
279 | } |
280 | |
281 | Value *flatten(IRBuilder<> &IRB, Value *V) { |
282 | if (V->getType()->isIntegerTy()) |
283 | return V; |
284 | // vector of pointers -> vector of ints |
285 | if (VectorType *VecTy = dyn_cast<VectorType>(Val: V->getType())) { |
286 | LLVMContext &Ctx = IRB.getContext(); |
287 | Type *EltTy = VecTy->getElementType(); |
288 | if (EltTy->isPointerTy()) { |
289 | uint32_t EltSize = DL->getTypeSizeInBits(Ty: EltTy); |
290 | auto *NewTy = FixedVectorType::get( |
291 | ElementType: IntegerType::get(C&: Ctx, NumBits: EltSize), |
292 | NumElts: cast<FixedVectorType>(Val: VecTy)->getNumElements()); |
293 | V = IRB.CreatePointerCast(V, DestTy: NewTy); |
294 | } |
295 | } |
296 | return IRB.CreateBitOrPointerCast( |
297 | V, DestTy: IRB.getIntNTy(N: DL->getTypeStoreSize(Ty: V->getType()) * 8)); |
298 | } |
299 | }; |
300 | |
301 | class AArch64StackTagging : public FunctionPass { |
302 | const bool MergeInit; |
303 | const bool UseStackSafety; |
304 | |
305 | public: |
306 | static char ID; // Pass ID, replacement for typeid |
307 | |
308 | AArch64StackTagging(bool IsOptNone = false) |
309 | : FunctionPass(ID), |
310 | MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone), |
311 | UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety |
312 | : !IsOptNone) {} |
313 | |
314 | void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, |
315 | uint64_t Size); |
316 | void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); |
317 | |
318 | Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, |
319 | uint64_t Size, InitializerBuilder &IB); |
320 | |
321 | Instruction *insertBaseTaggedPointer( |
322 | const Module &M, |
323 | const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas, |
324 | const DominatorTree *DT); |
325 | bool runOnFunction(Function &F) override; |
326 | |
327 | StringRef getPassName() const override { return "AArch64 Stack Tagging" ; } |
328 | |
329 | private: |
330 | Function *F = nullptr; |
331 | Function *SetTagFunc = nullptr; |
332 | const DataLayout *DL = nullptr; |
333 | AAResults *AA = nullptr; |
334 | const StackSafetyGlobalInfo *SSI = nullptr; |
335 | |
336 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
337 | AU.setPreservesCFG(); |
338 | if (UseStackSafety) |
339 | AU.addRequired<StackSafetyGlobalInfoWrapperPass>(); |
340 | if (MergeInit) |
341 | AU.addRequired<AAResultsWrapperPass>(); |
342 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
343 | } |
344 | }; |
345 | |
346 | } // end anonymous namespace |
347 | |
348 | char AArch64StackTagging::ID = 0; |
349 | |
350 | INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
351 | false, false) |
352 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
353 | INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) |
354 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
355 | INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
356 | false, false) |
357 | |
358 | FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) { |
359 | return new AArch64StackTagging(IsOptNone); |
360 | } |
361 | |
362 | Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, |
363 | Value *StartPtr, |
364 | uint64_t Size, |
365 | InitializerBuilder &IB) { |
366 | MemoryLocation AllocaLoc{StartPtr, Size}; |
367 | Instruction *LastInst = StartInst; |
368 | BasicBlock::iterator BI(StartInst); |
369 | |
370 | unsigned Count = 0; |
371 | for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { |
372 | ++Count; |
373 | |
374 | if (isNoModRef(MRI: AA->getModRefInfo(I: &*BI, OptLoc: AllocaLoc))) |
375 | continue; |
376 | |
377 | if (!isa<StoreInst>(Val: BI) && !isa<MemSetInst>(Val: BI)) { |
378 | // If the instruction is readnone, ignore it, otherwise bail out. We |
379 | // don't even allow readonly here because we don't want something like: |
380 | // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). |
381 | if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) |
382 | break; |
383 | continue; |
384 | } |
385 | |
386 | if (StoreInst *NextStore = dyn_cast<StoreInst>(Val&: BI)) { |
387 | if (!NextStore->isSimple()) |
388 | break; |
389 | |
390 | // Check to see if this store is to a constant offset from the start ptr. |
391 | std::optional<int64_t> Offset = |
392 | NextStore->getPointerOperand()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
393 | if (!Offset) |
394 | break; |
395 | |
396 | if (!IB.addStore(Offset: *Offset, SI: NextStore, DL)) |
397 | break; |
398 | LastInst = NextStore; |
399 | } else { |
400 | MemSetInst *MSI = cast<MemSetInst>(Val&: BI); |
401 | |
402 | if (MSI->isVolatile() || !isa<ConstantInt>(Val: MSI->getLength())) |
403 | break; |
404 | |
405 | if (!isa<ConstantInt>(Val: MSI->getValue())) |
406 | break; |
407 | |
408 | // Check to see if this store is to a constant offset from the start ptr. |
409 | std::optional<int64_t> Offset = |
410 | MSI->getDest()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
411 | if (!Offset) |
412 | break; |
413 | |
414 | if (!IB.addMemSet(Offset: *Offset, MSI)) |
415 | break; |
416 | LastInst = MSI; |
417 | } |
418 | } |
419 | return LastInst; |
420 | } |
421 | |
422 | void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
423 | Value *Ptr, uint64_t Size) { |
424 | auto SetTagZeroFunc = Intrinsic::getOrInsertDeclaration( |
425 | M: F->getParent(), id: Intrinsic::aarch64_settag_zero); |
426 | auto StgpFunc = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
427 | id: Intrinsic::aarch64_stgp); |
428 | |
429 | InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); |
430 | bool LittleEndian = AI->getModule()->getTargetTriple().isLittleEndian(); |
431 | // Current implementation of initializer merging assumes little endianness. |
432 | if (MergeInit && !F->hasOptNone() && LittleEndian && |
433 | Size < ClMergeInitSizeLimit) { |
434 | LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI |
435 | << ", size = " << Size << "\n" ); |
436 | InsertBefore = collectInitializers(StartInst: InsertBefore, StartPtr: Ptr, Size, IB); |
437 | } |
438 | |
439 | IRBuilder<> IRB(InsertBefore); |
440 | IB.generate(IRB); |
441 | } |
442 | |
443 | void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
444 | uint64_t Size) { |
445 | IRBuilder<> IRB(InsertBefore); |
446 | IRB.CreateCall(Callee: SetTagFunc, Args: {IRB.CreatePointerCast(V: AI, DestTy: IRB.getPtrTy()), |
447 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
448 | } |
449 | |
450 | Instruction *AArch64StackTagging::insertBaseTaggedPointer( |
451 | const Module &M, |
452 | const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument, |
453 | const DominatorTree *DT) { |
454 | BasicBlock *PrologueBB = nullptr; |
455 | // Try sinking IRG as deep as possible to avoid hurting shrink wrap. |
456 | for (auto &I : AllocasToInstrument) { |
457 | const memtag::AllocaInfo &Info = I.second; |
458 | AllocaInst *AI = Info.AI; |
459 | if (!PrologueBB) { |
460 | PrologueBB = AI->getParent(); |
461 | continue; |
462 | } |
463 | PrologueBB = DT->findNearestCommonDominator(A: PrologueBB, B: AI->getParent()); |
464 | } |
465 | assert(PrologueBB); |
466 | |
467 | IRBuilder<> IRB(&PrologueBB->front()); |
468 | Instruction *Base = |
469 | IRB.CreateIntrinsic(ID: Intrinsic::aarch64_irg_sp, Types: {}, |
470 | Args: {Constant::getNullValue(Ty: IRB.getInt64Ty())}); |
471 | Base->setName("basetag" ); |
472 | const Triple &TargetTriple = M.getTargetTriple(); |
473 | // This ABI will make it into Android API level 35. |
474 | // The ThreadLong format is the same as with HWASan, but the entries for |
475 | // stack MTE take two slots (16 bytes). |
476 | if (ClRecordStackHistory == instr && TargetTriple.isAndroid() && |
477 | TargetTriple.isAArch64() && !TargetTriple.isAndroidVersionLT(Major: 35) && |
478 | !AllocasToInstrument.empty()) { |
479 | constexpr int StackMteSlot = -3; |
480 | constexpr uint64_t TagMask = 0xFULL << 56; |
481 | |
482 | auto *IntptrTy = IRB.getIntPtrTy(DL: M.getDataLayout()); |
483 | Value *SlotPtr = memtag::getAndroidSlotPtr(IRB, Slot: StackMteSlot); |
484 | auto *ThreadLong = IRB.CreateLoad(Ty: IntptrTy, Ptr: SlotPtr); |
485 | Value *FP = memtag::getFP(IRB); |
486 | Value *Tag = IRB.CreateAnd(LHS: IRB.CreatePtrToInt(V: Base, DestTy: IntptrTy), RHS: TagMask); |
487 | Value *TaggedFP = IRB.CreateOr(LHS: FP, RHS: Tag); |
488 | Value *PC = memtag::getPC(TargetTriple, IRB); |
489 | Value *RecordPtr = IRB.CreateIntToPtr(V: ThreadLong, DestTy: IRB.getPtrTy(AddrSpace: 0)); |
490 | IRB.CreateStore(Val: PC, Ptr: RecordPtr); |
491 | IRB.CreateStore(Val: TaggedFP, Ptr: IRB.CreateConstGEP1_64(Ty: IntptrTy, Ptr: RecordPtr, Idx0: 1)); |
492 | |
493 | IRB.CreateStore(Val: memtag::incrementThreadLong(IRB, ThreadLong, Inc: 16), Ptr: SlotPtr); |
494 | } |
495 | return Base; |
496 | } |
497 | |
498 | // FIXME: check for MTE extension |
499 | bool AArch64StackTagging::runOnFunction(Function &Fn) { |
500 | if (!Fn.hasFnAttribute(Kind: Attribute::SanitizeMemTag)) |
501 | return false; |
502 | |
503 | if (UseStackSafety) |
504 | SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult(); |
505 | F = &Fn; |
506 | DL = &Fn.getDataLayout(); |
507 | if (MergeInit) |
508 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
509 | OptimizationRemarkEmitter &ORE = |
510 | getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
511 | |
512 | memtag::StackInfoBuilder SIB(SSI, DEBUG_TYPE); |
513 | for (Instruction &I : instructions(F)) |
514 | SIB.visit(ORE, Inst&: I); |
515 | memtag::StackInfo &SInfo = SIB.get(); |
516 | |
517 | if (SInfo.AllocasToInstrument.empty()) |
518 | return false; |
519 | |
520 | std::unique_ptr<DominatorTree> DeleteDT; |
521 | DominatorTree *DT = nullptr; |
522 | if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) |
523 | DT = &P->getDomTree(); |
524 | |
525 | if (DT == nullptr) { |
526 | DeleteDT = std::make_unique<DominatorTree>(args&: *F); |
527 | DT = DeleteDT.get(); |
528 | } |
529 | |
530 | std::unique_ptr<PostDominatorTree> DeletePDT; |
531 | PostDominatorTree *PDT = nullptr; |
532 | if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>()) |
533 | PDT = &P->getPostDomTree(); |
534 | |
535 | if (PDT == nullptr) { |
536 | DeletePDT = std::make_unique<PostDominatorTree>(args&: *F); |
537 | PDT = DeletePDT.get(); |
538 | } |
539 | |
540 | std::unique_ptr<LoopInfo> DeleteLI; |
541 | LoopInfo *LI = nullptr; |
542 | if (auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>()) { |
543 | LI = &LIWP->getLoopInfo(); |
544 | } else { |
545 | DeleteLI = std::make_unique<LoopInfo>(args&: *DT); |
546 | LI = DeleteLI.get(); |
547 | } |
548 | |
549 | SetTagFunc = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
550 | id: Intrinsic::aarch64_settag); |
551 | |
552 | Instruction *Base = |
553 | insertBaseTaggedPointer(M: *Fn.getParent(), AllocasToInstrument: SInfo.AllocasToInstrument, DT); |
554 | |
555 | unsigned int NextTag = 0; |
556 | for (auto &I : SInfo.AllocasToInstrument) { |
557 | memtag::AllocaInfo &Info = I.second; |
558 | assert(Info.AI && SIB.getAllocaInterestingness(*Info.AI) == |
559 | llvm::memtag::AllocaInterestingness::kInteresting); |
560 | memtag::alignAndPadAlloca(Info, Align: kTagGranuleSize); |
561 | AllocaInst *AI = Info.AI; |
562 | unsigned int Tag = NextTag; |
563 | NextTag = (NextTag + 1) % 16; |
564 | // Replace alloca with tagp(alloca). |
565 | IRBuilder<> IRB(Info.AI->getNextNode()); |
566 | Instruction *TagPCall = |
567 | IRB.CreateIntrinsic(ID: Intrinsic::aarch64_tagp, Types: {Info.AI->getType()}, |
568 | Args: {Constant::getNullValue(Ty: Info.AI->getType()), Base, |
569 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Tag)}); |
570 | if (Info.AI->hasName()) |
571 | TagPCall->setName(Info.AI->getName() + ".tag" ); |
572 | // Does not replace metadata, so we don't have to handle DbgVariableRecords. |
573 | Info.AI->replaceUsesWithIf(New: TagPCall, ShouldReplace: [&](const Use &U) { |
574 | return !isa<LifetimeIntrinsic>(Val: U.getUser()); |
575 | }); |
576 | TagPCall->setOperand(i: 0, Val: Info.AI); |
577 | |
578 | // Calls to functions that may return twice (e.g. setjmp) confuse the |
579 | // postdominator analysis, and will leave us to keep memory tagged after |
580 | // function return. Work around this by always untagging at every return |
581 | // statement if return_twice functions are called. |
582 | bool StandardLifetime = |
583 | !SInfo.CallsReturnTwice && |
584 | SInfo.UnrecognizedLifetimes.empty() && |
585 | memtag::isStandardLifetime(LifetimeStart: Info.LifetimeStart, LifetimeEnd: Info.LifetimeEnd, DT, LI, |
586 | MaxLifetimes: ClMaxLifetimes); |
587 | if (StandardLifetime) { |
588 | IntrinsicInst *Start = Info.LifetimeStart[0]; |
589 | uint64_t Size = |
590 | cast<ConstantInt>(Val: Start->getArgOperand(i: 0))->getZExtValue(); |
591 | Size = alignTo(Size, A: kTagGranuleSize); |
592 | tagAlloca(AI, InsertBefore: Start->getNextNode(), Ptr: TagPCall, Size); |
593 | |
594 | auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, InsertBefore: Node, Size); }; |
595 | if (!DT || !PDT || |
596 | !memtag::forAllReachableExits(DT: *DT, PDT: *PDT, LI: *LI, Start, Ends: Info.LifetimeEnd, |
597 | RetVec: SInfo.RetVec, Callback: TagEnd)) { |
598 | for (auto *End : Info.LifetimeEnd) |
599 | End->eraseFromParent(); |
600 | } |
601 | } else { |
602 | uint64_t Size = *Info.AI->getAllocationSize(DL: *DL); |
603 | Value *Ptr = IRB.CreatePointerCast(V: TagPCall, DestTy: IRB.getPtrTy()); |
604 | tagAlloca(AI, InsertBefore: &*IRB.GetInsertPoint(), Ptr, Size); |
605 | for (auto *RI : SInfo.RetVec) { |
606 | untagAlloca(AI, InsertBefore: RI, Size); |
607 | } |
608 | // We may have inserted tag/untag outside of any lifetime interval. |
609 | // Remove all lifetime intrinsics for this alloca. |
610 | for (auto *II : Info.LifetimeStart) |
611 | II->eraseFromParent(); |
612 | for (auto *II : Info.LifetimeEnd) |
613 | II->eraseFromParent(); |
614 | } |
615 | |
616 | memtag::annotateDebugRecords(Info, Tag); |
617 | } |
618 | |
619 | // If we have instrumented at least one alloca, all unrecognized lifetime |
620 | // intrinsics have to go. |
621 | for (auto *I : SInfo.UnrecognizedLifetimes) |
622 | I->eraseFromParent(); |
623 | |
624 | return true; |
625 | } |
626 | |