| 1 | //===- AArch64StackTagging.cpp - Stack tagging in IR --===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "AArch64.h" |
| 11 | #include "AArch64Subtarget.h" |
| 12 | #include "llvm/ADT/APInt.h" |
| 13 | #include "llvm/ADT/MapVector.h" |
| 14 | #include "llvm/ADT/SmallVector.h" |
| 15 | #include "llvm/ADT/Statistic.h" |
| 16 | #include "llvm/Analysis/AliasAnalysis.h" |
| 17 | #include "llvm/Analysis/CFG.h" |
| 18 | #include "llvm/Analysis/LoopInfo.h" |
| 19 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 20 | #include "llvm/Analysis/PostDominators.h" |
| 21 | #include "llvm/Analysis/ScalarEvolution.h" |
| 22 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 23 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
| 24 | #include "llvm/BinaryFormat/Dwarf.h" |
| 25 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 26 | #include "llvm/CodeGen/MachineFunction.h" |
| 27 | #include "llvm/CodeGen/MachineInstr.h" |
| 28 | #include "llvm/CodeGen/MachineOperand.h" |
| 29 | #include "llvm/IR/DebugLoc.h" |
| 30 | #include "llvm/IR/Dominators.h" |
| 31 | #include "llvm/IR/Function.h" |
| 32 | #include "llvm/IR/IRBuilder.h" |
| 33 | #include "llvm/IR/InstIterator.h" |
| 34 | #include "llvm/IR/Instruction.h" |
| 35 | #include "llvm/IR/Instructions.h" |
| 36 | #include "llvm/IR/IntrinsicInst.h" |
| 37 | #include "llvm/IR/IntrinsicsAArch64.h" |
| 38 | #include "llvm/IR/Metadata.h" |
| 39 | #include "llvm/IR/PassManager.h" |
| 40 | #include "llvm/InitializePasses.h" |
| 41 | #include "llvm/Pass.h" |
| 42 | #include "llvm/Support/Casting.h" |
| 43 | #include "llvm/Support/Debug.h" |
| 44 | #include "llvm/Support/raw_ostream.h" |
| 45 | #include "llvm/Transforms/Utils/Local.h" |
| 46 | #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" |
| 47 | #include <cassert> |
| 48 | #include <memory> |
| 49 | #include <utility> |
| 50 | |
| 51 | using namespace llvm; |
| 52 | |
| 53 | #define DEBUG_TYPE "aarch64-stack-tagging" |
| 54 | |
| 55 | static cl::opt<bool> ClMergeInit( |
| 56 | "stack-tagging-merge-init" , cl::Hidden, cl::init(Val: true), |
| 57 | cl::desc("merge stack variable initializers with tagging when possible" )); |
| 58 | |
| 59 | static cl::opt<bool> |
| 60 | ClUseStackSafety("stack-tagging-use-stack-safety" , cl::Hidden, |
| 61 | cl::init(Val: true), |
| 62 | cl::desc("Use Stack Safety analysis results" )); |
| 63 | |
| 64 | static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit" , |
| 65 | cl::init(Val: 40), cl::Hidden); |
| 66 | |
| 67 | static cl::opt<unsigned> |
| 68 | ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit" , cl::init(Val: 272), |
| 69 | cl::Hidden); |
| 70 | |
| 71 | static cl::opt<size_t> ClMaxLifetimes( |
| 72 | "stack-tagging-max-lifetimes-for-alloca" , cl::Hidden, cl::init(Val: 3), |
| 73 | cl::ReallyHidden, |
| 74 | cl::desc("How many lifetime ends to handle for a single alloca." ), |
| 75 | cl::Optional); |
| 76 | |
| 77 | // Mode for selecting how to insert frame record info into the stack ring |
| 78 | // buffer. |
| 79 | enum StackTaggingRecordStackHistoryMode { |
| 80 | // Do not record frame record info. |
| 81 | none, |
| 82 | |
| 83 | // Insert instructions into the prologue for storing into the stack ring |
| 84 | // buffer directly. |
| 85 | instr, |
| 86 | }; |
| 87 | |
| 88 | static cl::opt<StackTaggingRecordStackHistoryMode> ClRecordStackHistory( |
| 89 | "stack-tagging-record-stack-history" , |
| 90 | cl::desc("Record stack frames with tagged allocations in a thread-local " |
| 91 | "ring buffer" ), |
| 92 | cl::values(clEnumVal(none, "Do not record stack ring history" ), |
| 93 | clEnumVal(instr, "Insert instructions into the prologue for " |
| 94 | "storing into the stack ring buffer" )), |
| 95 | cl::Hidden, cl::init(Val: none)); |
| 96 | |
| 97 | static const Align kTagGranuleSize = Align(16); |
| 98 | |
| 99 | namespace { |
| 100 | |
| 101 | class InitializerBuilder { |
| 102 | uint64_t Size; |
| 103 | const DataLayout *DL; |
| 104 | Value *BasePtr; |
| 105 | Function *SetTagFn; |
| 106 | Function *SetTagZeroFn; |
| 107 | Function *StgpFn; |
| 108 | |
| 109 | // List of initializers sorted by start offset. |
| 110 | struct Range { |
| 111 | uint64_t Start, End; |
| 112 | Instruction *Inst; |
| 113 | }; |
| 114 | SmallVector<Range, 4> Ranges; |
| 115 | // 8-aligned offset => 8-byte initializer |
| 116 | // Missing keys are zero initialized. |
| 117 | std::map<uint64_t, Value *> Out; |
| 118 | |
| 119 | public: |
| 120 | InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, |
| 121 | Function *SetTagFn, Function *SetTagZeroFn, |
| 122 | Function *StgpFn) |
| 123 | : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), |
| 124 | SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} |
| 125 | |
| 126 | bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { |
| 127 | auto I = |
| 128 | llvm::lower_bound(Range&: Ranges, Value&: Start, C: [](const Range &LHS, uint64_t RHS) { |
| 129 | return LHS.End <= RHS; |
| 130 | }); |
| 131 | if (I != Ranges.end() && End > I->Start) { |
| 132 | // Overlap - bail. |
| 133 | return false; |
| 134 | } |
| 135 | Ranges.insert(I, Elt: {.Start: Start, .End: End, .Inst: Inst}); |
| 136 | return true; |
| 137 | } |
| 138 | |
| 139 | bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { |
| 140 | int64_t StoreSize = DL->getTypeStoreSize(Ty: SI->getOperand(i_nocapture: 0)->getType()); |
| 141 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: SI)) |
| 142 | return false; |
| 143 | IRBuilder<> IRB(SI); |
| 144 | applyStore(IRB, Start: Offset, End: Offset + StoreSize, StoredValue: SI->getOperand(i_nocapture: 0)); |
| 145 | return true; |
| 146 | } |
| 147 | |
| 148 | bool addMemSet(uint64_t Offset, MemSetInst *MSI) { |
| 149 | uint64_t StoreSize = cast<ConstantInt>(Val: MSI->getLength())->getZExtValue(); |
| 150 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: MSI)) |
| 151 | return false; |
| 152 | IRBuilder<> IRB(MSI); |
| 153 | applyMemSet(IRB, Start: Offset, End: Offset + StoreSize, |
| 154 | V: cast<ConstantInt>(Val: MSI->getValue())); |
| 155 | return true; |
| 156 | } |
| 157 | |
| 158 | void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, |
| 159 | ConstantInt *V) { |
| 160 | // Out[] does not distinguish between zero and undef, and we already know |
| 161 | // that this memset does not overlap with any other initializer. Nothing to |
| 162 | // do for memset(0). |
| 163 | if (V->isZero()) |
| 164 | return; |
| 165 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
| 166 | uint64_t Cst = 0x0101010101010101UL; |
| 167 | int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; |
| 168 | if (LowBits) |
| 169 | Cst = (Cst >> LowBits) << LowBits; |
| 170 | int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; |
| 171 | if (HighBits) |
| 172 | Cst = (Cst << HighBits) >> HighBits; |
| 173 | ConstantInt *C = |
| 174 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Cst * V->getZExtValue()); |
| 175 | |
| 176 | Value *&CurrentV = Out[Offset]; |
| 177 | if (!CurrentV) { |
| 178 | CurrentV = C; |
| 179 | } else { |
| 180 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: C); |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | // Take a 64-bit slice of the value starting at the given offset (in bytes). |
| 186 | // Offset can be negative. Pad with zeroes on both sides when necessary. |
| 187 | Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { |
| 188 | if (Offset > 0) { |
| 189 | V = IRB.CreateLShr(LHS: V, RHS: Offset * 8); |
| 190 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
| 191 | } else if (Offset < 0) { |
| 192 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
| 193 | V = IRB.CreateShl(LHS: V, RHS: -Offset * 8); |
| 194 | } else { |
| 195 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
| 196 | } |
| 197 | return V; |
| 198 | } |
| 199 | |
| 200 | void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, |
| 201 | Value *StoredValue) { |
| 202 | StoredValue = flatten(IRB, V: StoredValue); |
| 203 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
| 204 | Value *V = sliceValue(IRB, V: StoredValue, Offset: Offset - Start); |
| 205 | Value *&CurrentV = Out[Offset]; |
| 206 | if (!CurrentV) { |
| 207 | CurrentV = V; |
| 208 | } else { |
| 209 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: V); |
| 210 | } |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | void generate(IRBuilder<> &IRB) { |
| 215 | LLVM_DEBUG(dbgs() << "Combined initializer\n" ); |
| 216 | // No initializers => the entire allocation is undef. |
| 217 | if (Ranges.empty()) { |
| 218 | emitUndef(IRB, Offset: 0, Size); |
| 219 | return; |
| 220 | } |
| 221 | |
| 222 | // Look through 8-byte initializer list 16 bytes at a time; |
| 223 | // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. |
| 224 | // Otherwise, emit zeroes up to next available item. |
| 225 | uint64_t LastOffset = 0; |
| 226 | for (uint64_t Offset = 0; Offset < Size; Offset += 16) { |
| 227 | auto I1 = Out.find(x: Offset); |
| 228 | auto I2 = Out.find(x: Offset + 8); |
| 229 | if (I1 == Out.end() && I2 == Out.end()) |
| 230 | continue; |
| 231 | |
| 232 | if (Offset > LastOffset) |
| 233 | emitZeroes(IRB, Offset: LastOffset, Size: Offset - LastOffset); |
| 234 | |
| 235 | Value *Store1 = I1 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
| 236 | : I1->second; |
| 237 | Value *Store2 = I2 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
| 238 | : I2->second; |
| 239 | emitPair(IRB, Offset, A: Store1, B: Store2); |
| 240 | LastOffset = Offset + 16; |
| 241 | } |
| 242 | |
| 243 | // memset(0) does not update Out[], therefore the tail can be either undef |
| 244 | // or zero. |
| 245 | if (LastOffset < Size) |
| 246 | emitZeroes(IRB, Offset: LastOffset, Size: Size - LastOffset); |
| 247 | |
| 248 | for (const auto &R : Ranges) { |
| 249 | R.Inst->eraseFromParent(); |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
| 254 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
| 255 | << ") zero\n" ); |
| 256 | Value *Ptr = BasePtr; |
| 257 | if (Offset) |
| 258 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
| 259 | IRB.CreateCall(Callee: SetTagZeroFn, |
| 260 | Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
| 261 | } |
| 262 | |
| 263 | void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
| 264 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
| 265 | << ") undef\n" ); |
| 266 | Value *Ptr = BasePtr; |
| 267 | if (Offset) |
| 268 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
| 269 | IRB.CreateCall(Callee: SetTagFn, Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
| 270 | } |
| 271 | |
| 272 | void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { |
| 273 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n" ); |
| 274 | LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n" ); |
| 275 | Value *Ptr = BasePtr; |
| 276 | if (Offset) |
| 277 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
| 278 | IRB.CreateCall(Callee: StgpFn, Args: {Ptr, A, B}); |
| 279 | } |
| 280 | |
| 281 | Value *flatten(IRBuilder<> &IRB, Value *V) { |
| 282 | if (V->getType()->isIntegerTy()) |
| 283 | return V; |
| 284 | // vector of pointers -> vector of ints |
| 285 | if (VectorType *VecTy = dyn_cast<VectorType>(Val: V->getType())) { |
| 286 | LLVMContext &Ctx = IRB.getContext(); |
| 287 | Type *EltTy = VecTy->getElementType(); |
| 288 | if (EltTy->isPointerTy()) { |
| 289 | uint32_t EltSize = DL->getTypeSizeInBits(Ty: EltTy); |
| 290 | auto *NewTy = FixedVectorType::get( |
| 291 | ElementType: IntegerType::get(C&: Ctx, NumBits: EltSize), |
| 292 | NumElts: cast<FixedVectorType>(Val: VecTy)->getNumElements()); |
| 293 | V = IRB.CreatePointerCast(V, DestTy: NewTy); |
| 294 | } |
| 295 | } |
| 296 | return IRB.CreateBitOrPointerCast( |
| 297 | V, DestTy: IRB.getIntNTy(N: DL->getTypeStoreSize(Ty: V->getType()) * 8)); |
| 298 | } |
| 299 | }; |
| 300 | |
| 301 | class AArch64StackTagging : public FunctionPass { |
| 302 | const bool MergeInit; |
| 303 | const bool UseStackSafety; |
| 304 | |
| 305 | public: |
| 306 | static char ID; // Pass ID, replacement for typeid |
| 307 | |
| 308 | AArch64StackTagging(bool IsOptNone = false) |
| 309 | : FunctionPass(ID), |
| 310 | MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone), |
| 311 | UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety |
| 312 | : !IsOptNone) {} |
| 313 | |
| 314 | void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, |
| 315 | uint64_t Size); |
| 316 | void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); |
| 317 | |
| 318 | Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, |
| 319 | uint64_t Size, InitializerBuilder &IB); |
| 320 | |
| 321 | Instruction *insertBaseTaggedPointer( |
| 322 | const Module &M, |
| 323 | const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas, |
| 324 | const DominatorTree *DT); |
| 325 | bool runOnFunction(Function &F) override; |
| 326 | |
| 327 | StringRef getPassName() const override { return "AArch64 Stack Tagging" ; } |
| 328 | |
| 329 | private: |
| 330 | Function *F = nullptr; |
| 331 | Function *SetTagFunc = nullptr; |
| 332 | const DataLayout *DL = nullptr; |
| 333 | AAResults *AA = nullptr; |
| 334 | const StackSafetyGlobalInfo *SSI = nullptr; |
| 335 | |
| 336 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 337 | AU.setPreservesCFG(); |
| 338 | if (UseStackSafety) |
| 339 | AU.addRequired<StackSafetyGlobalInfoWrapperPass>(); |
| 340 | if (MergeInit) |
| 341 | AU.addRequired<AAResultsWrapperPass>(); |
| 342 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
| 343 | } |
| 344 | }; |
| 345 | |
| 346 | } // end anonymous namespace |
| 347 | |
| 348 | char AArch64StackTagging::ID = 0; |
| 349 | |
| 350 | INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
| 351 | false, false) |
| 352 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
| 353 | INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) |
| 354 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
| 355 | INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
| 356 | false, false) |
| 357 | |
| 358 | FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) { |
| 359 | return new AArch64StackTagging(IsOptNone); |
| 360 | } |
| 361 | |
| 362 | Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, |
| 363 | Value *StartPtr, |
| 364 | uint64_t Size, |
| 365 | InitializerBuilder &IB) { |
| 366 | MemoryLocation AllocaLoc{StartPtr, Size}; |
| 367 | Instruction *LastInst = StartInst; |
| 368 | BasicBlock::iterator BI(StartInst); |
| 369 | |
| 370 | unsigned Count = 0; |
| 371 | for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { |
| 372 | ++Count; |
| 373 | |
| 374 | if (isNoModRef(MRI: AA->getModRefInfo(I: &*BI, OptLoc: AllocaLoc))) |
| 375 | continue; |
| 376 | |
| 377 | if (!isa<StoreInst>(Val: BI) && !isa<MemSetInst>(Val: BI)) { |
| 378 | // If the instruction is readnone, ignore it, otherwise bail out. We |
| 379 | // don't even allow readonly here because we don't want something like: |
| 380 | // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). |
| 381 | if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) |
| 382 | break; |
| 383 | continue; |
| 384 | } |
| 385 | |
| 386 | if (StoreInst *NextStore = dyn_cast<StoreInst>(Val&: BI)) { |
| 387 | if (!NextStore->isSimple()) |
| 388 | break; |
| 389 | |
| 390 | // Check to see if this store is to a constant offset from the start ptr. |
| 391 | std::optional<int64_t> Offset = |
| 392 | NextStore->getPointerOperand()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
| 393 | if (!Offset) |
| 394 | break; |
| 395 | |
| 396 | if (!IB.addStore(Offset: *Offset, SI: NextStore, DL)) |
| 397 | break; |
| 398 | LastInst = NextStore; |
| 399 | } else { |
| 400 | MemSetInst *MSI = cast<MemSetInst>(Val&: BI); |
| 401 | |
| 402 | if (MSI->isVolatile() || !isa<ConstantInt>(Val: MSI->getLength())) |
| 403 | break; |
| 404 | |
| 405 | if (!isa<ConstantInt>(Val: MSI->getValue())) |
| 406 | break; |
| 407 | |
| 408 | // Check to see if this store is to a constant offset from the start ptr. |
| 409 | std::optional<int64_t> Offset = |
| 410 | MSI->getDest()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
| 411 | if (!Offset) |
| 412 | break; |
| 413 | |
| 414 | if (!IB.addMemSet(Offset: *Offset, MSI)) |
| 415 | break; |
| 416 | LastInst = MSI; |
| 417 | } |
| 418 | } |
| 419 | return LastInst; |
| 420 | } |
| 421 | |
| 422 | void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
| 423 | Value *Ptr, uint64_t Size) { |
| 424 | auto SetTagZeroFunc = Intrinsic::getOrInsertDeclaration( |
| 425 | M: F->getParent(), id: Intrinsic::aarch64_settag_zero); |
| 426 | auto StgpFunc = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
| 427 | id: Intrinsic::aarch64_stgp); |
| 428 | |
| 429 | InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); |
| 430 | bool LittleEndian = AI->getModule()->getTargetTriple().isLittleEndian(); |
| 431 | // Current implementation of initializer merging assumes little endianness. |
| 432 | if (MergeInit && !F->hasOptNone() && LittleEndian && |
| 433 | Size < ClMergeInitSizeLimit) { |
| 434 | LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI |
| 435 | << ", size = " << Size << "\n" ); |
| 436 | InsertBefore = collectInitializers(StartInst: InsertBefore, StartPtr: Ptr, Size, IB); |
| 437 | } |
| 438 | |
| 439 | IRBuilder<> IRB(InsertBefore); |
| 440 | IB.generate(IRB); |
| 441 | } |
| 442 | |
| 443 | void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
| 444 | uint64_t Size) { |
| 445 | IRBuilder<> IRB(InsertBefore); |
| 446 | IRB.CreateCall(Callee: SetTagFunc, Args: {IRB.CreatePointerCast(V: AI, DestTy: IRB.getPtrTy()), |
| 447 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
| 448 | } |
| 449 | |
| 450 | Instruction *AArch64StackTagging::insertBaseTaggedPointer( |
| 451 | const Module &M, |
| 452 | const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument, |
| 453 | const DominatorTree *DT) { |
| 454 | BasicBlock *PrologueBB = nullptr; |
| 455 | // Try sinking IRG as deep as possible to avoid hurting shrink wrap. |
| 456 | for (auto &I : AllocasToInstrument) { |
| 457 | const memtag::AllocaInfo &Info = I.second; |
| 458 | AllocaInst *AI = Info.AI; |
| 459 | if (!PrologueBB) { |
| 460 | PrologueBB = AI->getParent(); |
| 461 | continue; |
| 462 | } |
| 463 | PrologueBB = DT->findNearestCommonDominator(A: PrologueBB, B: AI->getParent()); |
| 464 | } |
| 465 | assert(PrologueBB); |
| 466 | |
| 467 | IRBuilder<> IRB(&PrologueBB->front()); |
| 468 | Instruction *Base = |
| 469 | IRB.CreateIntrinsic(ID: Intrinsic::aarch64_irg_sp, Types: {}, |
| 470 | Args: {Constant::getNullValue(Ty: IRB.getInt64Ty())}); |
| 471 | Base->setName("basetag" ); |
| 472 | const Triple &TargetTriple = M.getTargetTriple(); |
| 473 | // This ABI will make it into Android API level 35. |
| 474 | // The ThreadLong format is the same as with HWASan, but the entries for |
| 475 | // stack MTE take two slots (16 bytes). |
| 476 | if (ClRecordStackHistory == instr && TargetTriple.isAndroid() && |
| 477 | TargetTriple.isAArch64() && !TargetTriple.isAndroidVersionLT(Major: 35) && |
| 478 | !AllocasToInstrument.empty()) { |
| 479 | constexpr int StackMteSlot = -3; |
| 480 | constexpr uint64_t TagMask = 0xFULL << 56; |
| 481 | |
| 482 | auto *IntptrTy = IRB.getIntPtrTy(DL: M.getDataLayout()); |
| 483 | Value *SlotPtr = memtag::getAndroidSlotPtr(IRB, Slot: StackMteSlot); |
| 484 | auto *ThreadLong = IRB.CreateLoad(Ty: IntptrTy, Ptr: SlotPtr); |
| 485 | Value *FP = memtag::getFP(IRB); |
| 486 | Value *Tag = IRB.CreateAnd(LHS: IRB.CreatePtrToInt(V: Base, DestTy: IntptrTy), RHS: TagMask); |
| 487 | Value *TaggedFP = IRB.CreateOr(LHS: FP, RHS: Tag); |
| 488 | Value *PC = memtag::getPC(TargetTriple, IRB); |
| 489 | Value *RecordPtr = IRB.CreateIntToPtr(V: ThreadLong, DestTy: IRB.getPtrTy(AddrSpace: 0)); |
| 490 | IRB.CreateStore(Val: PC, Ptr: RecordPtr); |
| 491 | IRB.CreateStore(Val: TaggedFP, Ptr: IRB.CreateConstGEP1_64(Ty: IntptrTy, Ptr: RecordPtr, Idx0: 1)); |
| 492 | |
| 493 | IRB.CreateStore(Val: memtag::incrementThreadLong(IRB, ThreadLong, Inc: 16), Ptr: SlotPtr); |
| 494 | } |
| 495 | return Base; |
| 496 | } |
| 497 | |
| 498 | // FIXME: check for MTE extension |
| 499 | bool AArch64StackTagging::runOnFunction(Function &Fn) { |
| 500 | if (!Fn.hasFnAttribute(Kind: Attribute::SanitizeMemTag)) |
| 501 | return false; |
| 502 | |
| 503 | if (UseStackSafety) |
| 504 | SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult(); |
| 505 | F = &Fn; |
| 506 | DL = &Fn.getDataLayout(); |
| 507 | if (MergeInit) |
| 508 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
| 509 | OptimizationRemarkEmitter &ORE = |
| 510 | getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
| 511 | |
| 512 | memtag::StackInfoBuilder SIB(SSI, DEBUG_TYPE); |
| 513 | for (Instruction &I : instructions(F)) |
| 514 | SIB.visit(ORE, Inst&: I); |
| 515 | memtag::StackInfo &SInfo = SIB.get(); |
| 516 | |
| 517 | if (SInfo.AllocasToInstrument.empty()) |
| 518 | return false; |
| 519 | |
| 520 | std::unique_ptr<DominatorTree> DeleteDT; |
| 521 | DominatorTree *DT = nullptr; |
| 522 | if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) |
| 523 | DT = &P->getDomTree(); |
| 524 | |
| 525 | if (DT == nullptr) { |
| 526 | DeleteDT = std::make_unique<DominatorTree>(args&: *F); |
| 527 | DT = DeleteDT.get(); |
| 528 | } |
| 529 | |
| 530 | std::unique_ptr<PostDominatorTree> DeletePDT; |
| 531 | PostDominatorTree *PDT = nullptr; |
| 532 | if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>()) |
| 533 | PDT = &P->getPostDomTree(); |
| 534 | |
| 535 | if (PDT == nullptr) { |
| 536 | DeletePDT = std::make_unique<PostDominatorTree>(args&: *F); |
| 537 | PDT = DeletePDT.get(); |
| 538 | } |
| 539 | |
| 540 | std::unique_ptr<LoopInfo> DeleteLI; |
| 541 | LoopInfo *LI = nullptr; |
| 542 | if (auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>()) { |
| 543 | LI = &LIWP->getLoopInfo(); |
| 544 | } else { |
| 545 | DeleteLI = std::make_unique<LoopInfo>(args&: *DT); |
| 546 | LI = DeleteLI.get(); |
| 547 | } |
| 548 | |
| 549 | SetTagFunc = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
| 550 | id: Intrinsic::aarch64_settag); |
| 551 | |
| 552 | Instruction *Base = |
| 553 | insertBaseTaggedPointer(M: *Fn.getParent(), AllocasToInstrument: SInfo.AllocasToInstrument, DT); |
| 554 | |
| 555 | unsigned int NextTag = 0; |
| 556 | for (auto &I : SInfo.AllocasToInstrument) { |
| 557 | memtag::AllocaInfo &Info = I.second; |
| 558 | assert(Info.AI && SIB.getAllocaInterestingness(*Info.AI) == |
| 559 | llvm::memtag::AllocaInterestingness::kInteresting); |
| 560 | memtag::alignAndPadAlloca(Info, Align: kTagGranuleSize); |
| 561 | AllocaInst *AI = Info.AI; |
| 562 | unsigned int Tag = NextTag; |
| 563 | NextTag = (NextTag + 1) % 16; |
| 564 | // Replace alloca with tagp(alloca). |
| 565 | IRBuilder<> IRB(Info.AI->getNextNode()); |
| 566 | Instruction *TagPCall = |
| 567 | IRB.CreateIntrinsic(ID: Intrinsic::aarch64_tagp, Types: {Info.AI->getType()}, |
| 568 | Args: {Constant::getNullValue(Ty: Info.AI->getType()), Base, |
| 569 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Tag)}); |
| 570 | if (Info.AI->hasName()) |
| 571 | TagPCall->setName(Info.AI->getName() + ".tag" ); |
| 572 | // Does not replace metadata, so we don't have to handle DbgVariableRecords. |
| 573 | Info.AI->replaceUsesWithIf(New: TagPCall, ShouldReplace: [&](const Use &U) { |
| 574 | return !isa<LifetimeIntrinsic>(Val: U.getUser()); |
| 575 | }); |
| 576 | TagPCall->setOperand(i: 0, Val: Info.AI); |
| 577 | |
| 578 | // Calls to functions that may return twice (e.g. setjmp) confuse the |
| 579 | // postdominator analysis, and will leave us to keep memory tagged after |
| 580 | // function return. Work around this by always untagging at every return |
| 581 | // statement if return_twice functions are called. |
| 582 | bool StandardLifetime = |
| 583 | !SInfo.CallsReturnTwice && |
| 584 | SInfo.UnrecognizedLifetimes.empty() && |
| 585 | memtag::isStandardLifetime(LifetimeStart: Info.LifetimeStart, LifetimeEnd: Info.LifetimeEnd, DT, LI, |
| 586 | MaxLifetimes: ClMaxLifetimes); |
| 587 | if (StandardLifetime) { |
| 588 | IntrinsicInst *Start = Info.LifetimeStart[0]; |
| 589 | uint64_t Size = |
| 590 | cast<ConstantInt>(Val: Start->getArgOperand(i: 0))->getZExtValue(); |
| 591 | Size = alignTo(Size, A: kTagGranuleSize); |
| 592 | tagAlloca(AI, InsertBefore: Start->getNextNode(), Ptr: TagPCall, Size); |
| 593 | |
| 594 | auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, InsertBefore: Node, Size); }; |
| 595 | if (!DT || !PDT || |
| 596 | !memtag::forAllReachableExits(DT: *DT, PDT: *PDT, LI: *LI, Start, Ends: Info.LifetimeEnd, |
| 597 | RetVec: SInfo.RetVec, Callback: TagEnd)) { |
| 598 | for (auto *End : Info.LifetimeEnd) |
| 599 | End->eraseFromParent(); |
| 600 | } |
| 601 | } else { |
| 602 | uint64_t Size = *Info.AI->getAllocationSize(DL: *DL); |
| 603 | Value *Ptr = IRB.CreatePointerCast(V: TagPCall, DestTy: IRB.getPtrTy()); |
| 604 | tagAlloca(AI, InsertBefore: &*IRB.GetInsertPoint(), Ptr, Size); |
| 605 | for (auto *RI : SInfo.RetVec) { |
| 606 | untagAlloca(AI, InsertBefore: RI, Size); |
| 607 | } |
| 608 | // We may have inserted tag/untag outside of any lifetime interval. |
| 609 | // Remove all lifetime intrinsics for this alloca. |
| 610 | for (auto *II : Info.LifetimeStart) |
| 611 | II->eraseFromParent(); |
| 612 | for (auto *II : Info.LifetimeEnd) |
| 613 | II->eraseFromParent(); |
| 614 | } |
| 615 | |
| 616 | memtag::annotateDebugRecords(Info, Tag); |
| 617 | } |
| 618 | |
| 619 | // If we have instrumented at least one alloca, all unrecognized lifetime |
| 620 | // intrinsics have to go. |
| 621 | for (auto *I : SInfo.UnrecognizedLifetimes) |
| 622 | I->eraseFromParent(); |
| 623 | |
| 624 | return true; |
| 625 | } |
| 626 | |