1 | //===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===-------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPUAsanInstrumentation.h" |
10 | |
11 | #define DEBUG_TYPE "amdgpu-asan-instrumentation" |
12 | |
13 | using namespace llvm; |
14 | |
15 | namespace llvm { |
16 | namespace AMDGPU { |
17 | |
18 | static uint64_t getRedzoneSizeForScale(int AsanScale) { |
19 | // Redzone used for stack and globals is at least 32 bytes. |
20 | // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. |
21 | return std::max(a: 32U, b: 1U << AsanScale); |
22 | } |
23 | |
24 | static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) { |
25 | return getRedzoneSizeForScale(AsanScale); |
26 | } |
27 | |
28 | uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) { |
29 | constexpr uint64_t kMaxRZ = 1 << 18; |
30 | const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale); |
31 | |
32 | uint64_t RZ = 0; |
33 | if (SizeInBytes <= MinRZ / 2) { |
34 | // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is |
35 | // at least 32 bytes, optimize when SizeInBytes is less than or equal to |
36 | // half of MinRZ. |
37 | RZ = MinRZ - SizeInBytes; |
38 | } else { |
39 | // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes. |
40 | RZ = std::clamp(val: (SizeInBytes / MinRZ / 4) * MinRZ, lo: MinRZ, hi: kMaxRZ); |
41 | |
42 | // Round up to multiple of MinRZ. |
43 | if (SizeInBytes % MinRZ) |
44 | RZ += MinRZ - (SizeInBytes % MinRZ); |
45 | } |
46 | |
47 | assert((RZ + SizeInBytes) % MinRZ == 0); |
48 | |
49 | return RZ; |
50 | } |
51 | |
52 | static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) { |
53 | size_t Res = llvm::countr_zero(Val: TypeSize / 8); |
54 | return Res; |
55 | } |
56 | |
57 | static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB, |
58 | Value *Cond, bool Recover) { |
59 | Value *ReportCond = Cond; |
60 | if (!Recover) { |
61 | auto *Ballot = |
62 | IRB.CreateIntrinsic(ID: Intrinsic::amdgcn_ballot, Types: IRB.getInt64Ty(), Args: {Cond}); |
63 | ReportCond = IRB.CreateIsNotNull(Arg: Ballot); |
64 | } |
65 | |
66 | auto *Trm = SplitBlockAndInsertIfThen( |
67 | Cond: ReportCond, SplitBefore: &*IRB.GetInsertPoint(), Unreachable: false, |
68 | BranchWeights: MDBuilder(M.getContext()).createUnlikelyBranchWeights()); |
69 | Trm->getParent()->setName("asan.report" ); |
70 | |
71 | if (Recover) |
72 | return Trm; |
73 | |
74 | Trm = SplitBlockAndInsertIfThen(Cond, SplitBefore: Trm, Unreachable: false); |
75 | IRB.SetInsertPoint(Trm); |
76 | return IRB.CreateIntrinsic(ID: Intrinsic::amdgcn_unreachable, Args: {}); |
77 | } |
78 | |
79 | static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy, |
80 | Value *AddrLong, Value *ShadowValue, |
81 | uint32_t TypeStoreSize, int AsanScale) { |
82 | uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale; |
83 | // Addr & (Granularity - 1) |
84 | Value *LastAccessedByte = |
85 | IRB.CreateAnd(LHS: AddrLong, RHS: ConstantInt::get(Ty: IntptrTy, V: Granularity - 1)); |
86 | // (Addr & (Granularity - 1)) + size - 1 |
87 | if (TypeStoreSize / 8 > 1) |
88 | LastAccessedByte = IRB.CreateAdd( |
89 | LHS: LastAccessedByte, RHS: ConstantInt::get(Ty: IntptrTy, V: TypeStoreSize / 8 - 1)); |
90 | // (uint8_t) ((Addr & (Granularity-1)) + size - 1) |
91 | LastAccessedByte = |
92 | IRB.CreateIntCast(V: LastAccessedByte, DestTy: ShadowValue->getType(), isSigned: false); |
93 | // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue |
94 | return IRB.CreateICmpSGE(LHS: LastAccessedByte, RHS: ShadowValue); |
95 | } |
96 | |
97 | static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB, |
98 | Type *IntptrTy, Instruction *InsertBefore, |
99 | Value *Addr, bool IsWrite, |
100 | size_t AccessSizeIndex, |
101 | Value *SizeArgument, bool Recover) { |
102 | IRB.SetInsertPoint(InsertBefore); |
103 | CallInst *Call = nullptr; |
104 | SmallString<128> kAsanReportErrorTemplate{"__asan_report_" }; |
105 | SmallString<64> TypeStr{IsWrite ? "store" : "load" }; |
106 | SmallString<64> EndingStr{Recover ? "_noabort" : "" }; |
107 | |
108 | SmallString<128> AsanErrorCallbackSizedString; |
109 | raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString); |
110 | AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n" |
111 | << EndingStr; |
112 | |
113 | SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy}; |
114 | AttributeList AL2; |
115 | FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction( |
116 | Name: AsanErrorCallbackSizedOS.str(), |
117 | T: FunctionType::get(Result: IRB.getVoidTy(), Params: Args2, isVarArg: false), AttributeList: AL2); |
118 | SmallVector<Type *, 2> Args1{1, IntptrTy}; |
119 | AttributeList AL1; |
120 | |
121 | SmallString<128> AsanErrorCallbackString; |
122 | raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString); |
123 | AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr |
124 | << (1ULL << AccessSizeIndex) << EndingStr; |
125 | |
126 | FunctionCallee AsanErrorCallback = M.getOrInsertFunction( |
127 | Name: AsanErrorCallbackOS.str(), |
128 | T: FunctionType::get(Result: IRB.getVoidTy(), Params: Args1, isVarArg: false), AttributeList: AL1); |
129 | if (SizeArgument) { |
130 | Call = IRB.CreateCall(Callee: AsanErrorCallbackSized, Args: {Addr, SizeArgument}); |
131 | } else { |
132 | Call = IRB.CreateCall(Callee: AsanErrorCallback, Args: Addr); |
133 | } |
134 | |
135 | Call->setCannotMerge(); |
136 | return Call; |
137 | } |
138 | |
139 | static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy, |
140 | Value *Shadow, int AsanScale, uint32_t AsanOffset) { |
141 | // Shadow >> scale |
142 | Shadow = IRB.CreateLShr(LHS: Shadow, RHS: AsanScale); |
143 | if (AsanOffset == 0) |
144 | return Shadow; |
145 | // (Shadow >> scale) | offset |
146 | Value *ShadowBase = ConstantInt::get(Ty: IntptrTy, V: AsanOffset); |
147 | return IRB.CreateAdd(LHS: Shadow, RHS: ShadowBase); |
148 | } |
149 | |
150 | static void instrumentAddressImpl(Module &M, IRBuilder<> &IRB, |
151 | Instruction *OrigIns, |
152 | Instruction *InsertBefore, Value *Addr, |
153 | Align Alignment, uint32_t TypeStoreSize, |
154 | bool IsWrite, Value *SizeArgument, |
155 | bool UseCalls, bool Recover, int AsanScale, |
156 | int AsanOffset) { |
157 | Type *AddrTy = Addr->getType(); |
158 | Type *IntptrTy = M.getDataLayout().getIntPtrType( |
159 | C&: M.getContext(), AddressSpace: AddrTy->getPointerAddressSpace()); |
160 | IRB.SetInsertPoint(InsertBefore); |
161 | size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeSize: TypeStoreSize); |
162 | Type *ShadowTy = IntegerType::get(C&: M.getContext(), |
163 | NumBits: std::max(a: 8U, b: TypeStoreSize >> AsanScale)); |
164 | Type *ShadowPtrTy = PointerType::get(C&: M.getContext(), AddressSpace: 0); |
165 | Value *AddrLong = IRB.CreatePtrToInt(V: Addr, DestTy: IntptrTy); |
166 | Value *ShadowPtr = |
167 | memToShadow(M, IRB, IntptrTy, Shadow: AddrLong, AsanScale, AsanOffset); |
168 | const uint64_t ShadowAlign = |
169 | std::max<uint64_t>(a: Alignment.value() >> AsanScale, b: 1); |
170 | Value *ShadowValue = IRB.CreateAlignedLoad( |
171 | Ty: ShadowTy, Ptr: IRB.CreateIntToPtr(V: ShadowPtr, DestTy: ShadowPtrTy), Align: Align(ShadowAlign)); |
172 | Value *Cmp = IRB.CreateIsNotNull(Arg: ShadowValue); |
173 | auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue, |
174 | TypeStoreSize, AsanScale); |
175 | Cmp = IRB.CreateAnd(LHS: Cmp, RHS: Cmp2); |
176 | Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cond: Cmp, Recover); |
177 | Instruction *Crash = |
178 | generateCrashCode(M, IRB, IntptrTy, InsertBefore: CrashTerm, Addr: AddrLong, IsWrite, |
179 | AccessSizeIndex, SizeArgument, Recover); |
180 | Crash->setDebugLoc(OrigIns->getDebugLoc()); |
181 | } |
182 | |
183 | void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns, |
184 | Instruction *InsertBefore, Value *Addr, Align Alignment, |
185 | TypeSize TypeStoreSize, bool IsWrite, |
186 | Value *SizeArgument, bool UseCalls, bool Recover, |
187 | int AsanScale, int AsanOffset) { |
188 | if (!TypeStoreSize.isScalable()) { |
189 | unsigned Granularity = 1 << AsanScale; |
190 | const auto FixedSize = TypeStoreSize.getFixedValue(); |
191 | switch (FixedSize) { |
192 | case 8: |
193 | case 16: |
194 | case 32: |
195 | case 64: |
196 | case 128: |
197 | if (Alignment.value() >= Granularity || |
198 | Alignment.value() >= FixedSize / 8) |
199 | return instrumentAddressImpl( |
200 | M, IRB, OrigIns, InsertBefore, Addr, Alignment, TypeStoreSize: FixedSize, IsWrite, |
201 | SizeArgument, UseCalls, Recover, AsanScale, AsanOffset); |
202 | } |
203 | } |
204 | // Instrument unusual size or unusual alignment. |
205 | IRB.SetInsertPoint(InsertBefore); |
206 | Type *AddrTy = Addr->getType(); |
207 | Type *IntptrTy = M.getDataLayout().getIntPtrType(AddrTy); |
208 | Value *NumBits = IRB.CreateTypeSize(Ty: IntptrTy, Size: TypeStoreSize); |
209 | Value *Size = IRB.CreateLShr(LHS: NumBits, RHS: ConstantInt::get(Ty: IntptrTy, V: 3)); |
210 | Value *AddrLong = IRB.CreatePtrToInt(V: Addr, DestTy: IntptrTy); |
211 | Value *SizeMinusOne = IRB.CreateAdd(LHS: Size, RHS: ConstantInt::get(Ty: IntptrTy, V: -1)); |
212 | Value *LastByte = |
213 | IRB.CreateIntToPtr(V: IRB.CreateAdd(LHS: AddrLong, RHS: SizeMinusOne), DestTy: AddrTy); |
214 | instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, Addr, Alignment: {}, TypeStoreSize: 8, IsWrite, |
215 | SizeArgument, UseCalls, Recover, AsanScale, AsanOffset); |
216 | instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, Addr: LastByte, Alignment: {}, TypeStoreSize: 8, IsWrite, |
217 | SizeArgument, UseCalls, Recover, AsanScale, AsanOffset); |
218 | } |
219 | |
220 | void getInterestingMemoryOperands( |
221 | Module &M, Instruction *I, |
222 | SmallVectorImpl<InterestingMemoryOperand> &Interesting) { |
223 | const DataLayout &DL = M.getDataLayout(); |
224 | if (LoadInst *LI = dyn_cast<LoadInst>(Val: I)) { |
225 | Interesting.emplace_back(Args&: I, Args: LI->getPointerOperandIndex(), Args: false, |
226 | Args: LI->getType(), Args: LI->getAlign()); |
227 | } else if (StoreInst *SI = dyn_cast<StoreInst>(Val: I)) { |
228 | Interesting.emplace_back(Args&: I, Args: SI->getPointerOperandIndex(), Args: true, |
229 | Args: SI->getValueOperand()->getType(), Args: SI->getAlign()); |
230 | } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: I)) { |
231 | Interesting.emplace_back(Args&: I, Args: RMW->getPointerOperandIndex(), Args: true, |
232 | Args: RMW->getValOperand()->getType(), Args: std::nullopt); |
233 | } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Val: I)) { |
234 | Interesting.emplace_back(Args&: I, Args: XCHG->getPointerOperandIndex(), Args: true, |
235 | Args: XCHG->getCompareOperand()->getType(), |
236 | Args: std::nullopt); |
237 | } else if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
238 | switch (CI->getIntrinsicID()) { |
239 | case Intrinsic::masked_load: |
240 | case Intrinsic::masked_store: |
241 | case Intrinsic::masked_gather: |
242 | case Intrinsic::masked_scatter: { |
243 | bool IsWrite = CI->getType()->isVoidTy(); |
244 | // Masked store has an initial operand for the value. |
245 | unsigned OpOffset = IsWrite ? 1 : 0; |
246 | Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType(); |
247 | MaybeAlign Alignment = Align(1); |
248 | // Otherwise no alignment guarantees. We probably got Undef. |
249 | if (auto *Op = dyn_cast<ConstantInt>(Val: CI->getOperand(i_nocapture: 1 + OpOffset))) |
250 | Alignment = Op->getMaybeAlignValue(); |
251 | Value *Mask = CI->getOperand(i_nocapture: 2 + OpOffset); |
252 | Interesting.emplace_back(Args&: I, Args&: OpOffset, Args&: IsWrite, Args&: Ty, Args&: Alignment, Args&: Mask); |
253 | break; |
254 | } |
255 | case Intrinsic::masked_expandload: |
256 | case Intrinsic::masked_compressstore: { |
257 | bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore; |
258 | unsigned OpOffset = IsWrite ? 1 : 0; |
259 | auto *BasePtr = CI->getOperand(i_nocapture: OpOffset); |
260 | MaybeAlign Alignment = BasePtr->getPointerAlignment(DL); |
261 | Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType(); |
262 | IRBuilder<> IB(I); |
263 | Value *Mask = CI->getOperand(i_nocapture: 1 + OpOffset); |
264 | Type *IntptrTy = M.getDataLayout().getIntPtrType( |
265 | C&: M.getContext(), AddressSpace: BasePtr->getType()->getPointerAddressSpace()); |
266 | // Use the popcount of Mask as the effective vector length. |
267 | Type *ExtTy = VectorType::get(ElementType: IntptrTy, Other: cast<VectorType>(Val: Ty)); |
268 | Value *ExtMask = IB.CreateZExt(V: Mask, DestTy: ExtTy); |
269 | Value *EVL = IB.CreateAddReduce(Src: ExtMask); |
270 | Value *TrueMask = ConstantInt::get(Ty: Mask->getType(), V: 1); |
271 | Interesting.emplace_back(Args&: I, Args&: OpOffset, Args&: IsWrite, Args&: Ty, Args&: Alignment, Args&: TrueMask, |
272 | Args&: EVL); |
273 | break; |
274 | } |
275 | case Intrinsic::vp_load: |
276 | case Intrinsic::vp_store: |
277 | case Intrinsic::experimental_vp_strided_load: |
278 | case Intrinsic::experimental_vp_strided_store: { |
279 | auto *VPI = cast<VPIntrinsic>(Val: CI); |
280 | unsigned IID = CI->getIntrinsicID(); |
281 | bool IsWrite = CI->getType()->isVoidTy(); |
282 | unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); |
283 | Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType(); |
284 | MaybeAlign Alignment = VPI->getOperand(i_nocapture: PtrOpNo)->getPointerAlignment(DL); |
285 | Value *Stride = nullptr; |
286 | if (IID == Intrinsic::experimental_vp_strided_store || |
287 | IID == Intrinsic::experimental_vp_strided_load) { |
288 | Stride = VPI->getOperand(i_nocapture: PtrOpNo + 1); |
289 | // Use the pointer alignment as the element alignment if the stride is a |
290 | // mutiple of the pointer alignment. Otherwise, the element alignment |
291 | // should be Align(1). |
292 | unsigned PointerAlign = Alignment.valueOrOne().value(); |
293 | if (!isa<ConstantInt>(Val: Stride) || |
294 | cast<ConstantInt>(Val: Stride)->getZExtValue() % PointerAlign != 0) |
295 | Alignment = Align(1); |
296 | } |
297 | Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment, |
298 | Args: VPI->getMaskParam(), Args: VPI->getVectorLengthParam(), |
299 | Args&: Stride); |
300 | break; |
301 | } |
302 | case Intrinsic::vp_gather: |
303 | case Intrinsic::vp_scatter: { |
304 | auto *VPI = cast<VPIntrinsic>(Val: CI); |
305 | unsigned IID = CI->getIntrinsicID(); |
306 | bool IsWrite = IID == Intrinsic::vp_scatter; |
307 | unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); |
308 | Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType(); |
309 | MaybeAlign Alignment = VPI->getPointerAlignment(); |
310 | Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment, |
311 | Args: VPI->getMaskParam(), |
312 | Args: VPI->getVectorLengthParam()); |
313 | break; |
314 | } |
315 | case Intrinsic::amdgcn_raw_buffer_load: |
316 | case Intrinsic::amdgcn_raw_ptr_buffer_load: |
317 | case Intrinsic::amdgcn_raw_buffer_load_format: |
318 | case Intrinsic::amdgcn_raw_ptr_buffer_load_format: |
319 | case Intrinsic::amdgcn_raw_tbuffer_load: |
320 | case Intrinsic::amdgcn_raw_ptr_tbuffer_load: |
321 | case Intrinsic::amdgcn_struct_buffer_load: |
322 | case Intrinsic::amdgcn_struct_ptr_buffer_load: |
323 | case Intrinsic::amdgcn_struct_buffer_load_format: |
324 | case Intrinsic::amdgcn_struct_ptr_buffer_load_format: |
325 | case Intrinsic::amdgcn_struct_tbuffer_load: |
326 | case Intrinsic::amdgcn_struct_ptr_tbuffer_load: |
327 | case Intrinsic::amdgcn_s_buffer_load: |
328 | case Intrinsic::amdgcn_global_load_tr_b64: |
329 | case Intrinsic::amdgcn_global_load_tr_b128: { |
330 | unsigned PtrOpNo = 0; |
331 | bool IsWrite = false; |
332 | Type *Ty = CI->getType(); |
333 | Value *Ptr = CI->getArgOperand(i: PtrOpNo); |
334 | MaybeAlign Alignment = Ptr->getPointerAlignment(DL); |
335 | Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment); |
336 | break; |
337 | } |
338 | case Intrinsic::amdgcn_raw_tbuffer_store: |
339 | case Intrinsic::amdgcn_raw_ptr_tbuffer_store: |
340 | case Intrinsic::amdgcn_raw_buffer_store: |
341 | case Intrinsic::amdgcn_raw_ptr_buffer_store: |
342 | case Intrinsic::amdgcn_raw_buffer_store_format: |
343 | case Intrinsic::amdgcn_raw_ptr_buffer_store_format: |
344 | case Intrinsic::amdgcn_struct_buffer_store: |
345 | case Intrinsic::amdgcn_struct_ptr_buffer_store: |
346 | case Intrinsic::amdgcn_struct_buffer_store_format: |
347 | case Intrinsic::amdgcn_struct_ptr_buffer_store_format: |
348 | case Intrinsic::amdgcn_struct_tbuffer_store: |
349 | case Intrinsic::amdgcn_struct_ptr_tbuffer_store: { |
350 | unsigned PtrOpNo = 1; |
351 | bool IsWrite = true; |
352 | Value *Ptr = CI->getArgOperand(i: PtrOpNo); |
353 | Type *Ty = Ptr->getType(); |
354 | MaybeAlign Alignment = Ptr->getPointerAlignment(DL); |
355 | Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment); |
356 | break; |
357 | } |
358 | default: |
359 | for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { |
360 | if (Type *Ty = CI->getParamByRefType(ArgNo)) { |
361 | Interesting.emplace_back(Args&: I, Args&: ArgNo, Args: false, Args&: Ty, Args: Align(1)); |
362 | } else if (Type *Ty = CI->getParamByValType(ArgNo)) { |
363 | Interesting.emplace_back(Args&: I, Args&: ArgNo, Args: false, Args&: Ty, Args: Align(1)); |
364 | } |
365 | } |
366 | } |
367 | } |
368 | } |
369 | } // end namespace AMDGPU |
370 | } // end namespace llvm |
371 | |