1//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===-------------------------------------------------------------===//
8
9#include "AMDGPUAsanInstrumentation.h"
10
11#define DEBUG_TYPE "amdgpu-asan-instrumentation"
12
13using namespace llvm;
14
15namespace llvm {
16namespace AMDGPU {
17
18static uint64_t getRedzoneSizeForScale(int AsanScale) {
19 // Redzone used for stack and globals is at least 32 bytes.
20 // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
21 return std::max(a: 32U, b: 1U << AsanScale);
22}
23
24static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
25 return getRedzoneSizeForScale(AsanScale);
26}
27
28uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
29 constexpr uint64_t kMaxRZ = 1 << 18;
30 const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);
31
32 uint64_t RZ = 0;
33 if (SizeInBytes <= MinRZ / 2) {
34 // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
35 // at least 32 bytes, optimize when SizeInBytes is less than or equal to
36 // half of MinRZ.
37 RZ = MinRZ - SizeInBytes;
38 } else {
39 // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
40 RZ = std::clamp(val: (SizeInBytes / MinRZ / 4) * MinRZ, lo: MinRZ, hi: kMaxRZ);
41
42 // Round up to multiple of MinRZ.
43 if (SizeInBytes % MinRZ)
44 RZ += MinRZ - (SizeInBytes % MinRZ);
45 }
46
47 assert((RZ + SizeInBytes) % MinRZ == 0);
48
49 return RZ;
50}
51
52static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
53 size_t Res = llvm::countr_zero(Val: TypeSize / 8);
54 return Res;
55}
56
57static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
58 Value *Cond, bool Recover) {
59 Value *ReportCond = Cond;
60 if (!Recover) {
61 auto *Ballot =
62 IRB.CreateIntrinsic(ID: Intrinsic::amdgcn_ballot, Types: IRB.getInt64Ty(), Args: {Cond});
63 ReportCond = IRB.CreateIsNotNull(Arg: Ballot);
64 }
65
66 auto *Trm = SplitBlockAndInsertIfThen(
67 Cond: ReportCond, SplitBefore: &*IRB.GetInsertPoint(), Unreachable: false,
68 BranchWeights: MDBuilder(M.getContext()).createUnlikelyBranchWeights());
69 Trm->getParent()->setName("asan.report");
70
71 if (Recover)
72 return Trm;
73
74 Trm = SplitBlockAndInsertIfThen(Cond, SplitBefore: Trm, Unreachable: false);
75 IRB.SetInsertPoint(Trm);
76 return IRB.CreateIntrinsic(ID: Intrinsic::amdgcn_unreachable, Args: {});
77}
78
79static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
80 Value *AddrLong, Value *ShadowValue,
81 uint32_t TypeStoreSize, int AsanScale) {
82 uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale;
83 // Addr & (Granularity - 1)
84 Value *LastAccessedByte =
85 IRB.CreateAnd(LHS: AddrLong, RHS: ConstantInt::get(Ty: IntptrTy, V: Granularity - 1));
86 // (Addr & (Granularity - 1)) + size - 1
87 if (TypeStoreSize / 8 > 1)
88 LastAccessedByte = IRB.CreateAdd(
89 LHS: LastAccessedByte, RHS: ConstantInt::get(Ty: IntptrTy, V: TypeStoreSize / 8 - 1));
90 // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
91 LastAccessedByte =
92 IRB.CreateIntCast(V: LastAccessedByte, DestTy: ShadowValue->getType(), isSigned: false);
93 // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
94 return IRB.CreateICmpSGE(LHS: LastAccessedByte, RHS: ShadowValue);
95}
96
97static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
98 Type *IntptrTy, Instruction *InsertBefore,
99 Value *Addr, bool IsWrite,
100 size_t AccessSizeIndex,
101 Value *SizeArgument, bool Recover) {
102 IRB.SetInsertPoint(InsertBefore);
103 CallInst *Call = nullptr;
104 SmallString<128> kAsanReportErrorTemplate{"__asan_report_"};
105 SmallString<64> TypeStr{IsWrite ? "store" : "load"};
106 SmallString<64> EndingStr{Recover ? "_noabort" : ""};
107
108 SmallString<128> AsanErrorCallbackSizedString;
109 raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString);
110 AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n"
111 << EndingStr;
112
113 SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
114 AttributeList AL2;
115 FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
116 Name: AsanErrorCallbackSizedOS.str(),
117 T: FunctionType::get(Result: IRB.getVoidTy(), Params: Args2, isVarArg: false), AttributeList: AL2);
118 SmallVector<Type *, 2> Args1{1, IntptrTy};
119 AttributeList AL1;
120
121 SmallString<128> AsanErrorCallbackString;
122 raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
123 AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
124 << (1ULL << AccessSizeIndex) << EndingStr;
125
126 FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
127 Name: AsanErrorCallbackOS.str(),
128 T: FunctionType::get(Result: IRB.getVoidTy(), Params: Args1, isVarArg: false), AttributeList: AL1);
129 if (SizeArgument) {
130 Call = IRB.CreateCall(Callee: AsanErrorCallbackSized, Args: {Addr, SizeArgument});
131 } else {
132 Call = IRB.CreateCall(Callee: AsanErrorCallback, Args: Addr);
133 }
134
135 Call->setCannotMerge();
136 return Call;
137}
138
139static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
140 Value *Shadow, int AsanScale, uint32_t AsanOffset) {
141 // Shadow >> scale
142 Shadow = IRB.CreateLShr(LHS: Shadow, RHS: AsanScale);
143 if (AsanOffset == 0)
144 return Shadow;
145 // (Shadow >> scale) | offset
146 Value *ShadowBase = ConstantInt::get(Ty: IntptrTy, V: AsanOffset);
147 return IRB.CreateAdd(LHS: Shadow, RHS: ShadowBase);
148}
149
150static void instrumentAddressImpl(Module &M, IRBuilder<> &IRB,
151 Instruction *OrigIns,
152 Instruction *InsertBefore, Value *Addr,
153 Align Alignment, uint32_t TypeStoreSize,
154 bool IsWrite, Value *SizeArgument,
155 bool UseCalls, bool Recover, int AsanScale,
156 int AsanOffset) {
157 Type *AddrTy = Addr->getType();
158 Type *IntptrTy = M.getDataLayout().getIntPtrType(
159 C&: M.getContext(), AddressSpace: AddrTy->getPointerAddressSpace());
160 IRB.SetInsertPoint(InsertBefore);
161 size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeSize: TypeStoreSize);
162 Type *ShadowTy = IntegerType::get(C&: M.getContext(),
163 NumBits: std::max(a: 8U, b: TypeStoreSize >> AsanScale));
164 Type *ShadowPtrTy = PointerType::get(C&: M.getContext(), AddressSpace: 0);
165 Value *AddrLong = IRB.CreatePtrToInt(V: Addr, DestTy: IntptrTy);
166 Value *ShadowPtr =
167 memToShadow(M, IRB, IntptrTy, Shadow: AddrLong, AsanScale, AsanOffset);
168 const uint64_t ShadowAlign =
169 std::max<uint64_t>(a: Alignment.value() >> AsanScale, b: 1);
170 Value *ShadowValue = IRB.CreateAlignedLoad(
171 Ty: ShadowTy, Ptr: IRB.CreateIntToPtr(V: ShadowPtr, DestTy: ShadowPtrTy), Align: Align(ShadowAlign));
172 Value *Cmp = IRB.CreateIsNotNull(Arg: ShadowValue);
173 auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue,
174 TypeStoreSize, AsanScale);
175 Cmp = IRB.CreateAnd(LHS: Cmp, RHS: Cmp2);
176 Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cond: Cmp, Recover);
177 Instruction *Crash =
178 generateCrashCode(M, IRB, IntptrTy, InsertBefore: CrashTerm, Addr: AddrLong, IsWrite,
179 AccessSizeIndex, SizeArgument, Recover);
180 Crash->setDebugLoc(OrigIns->getDebugLoc());
181}
182
183void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
184 Instruction *InsertBefore, Value *Addr, Align Alignment,
185 TypeSize TypeStoreSize, bool IsWrite,
186 Value *SizeArgument, bool UseCalls, bool Recover,
187 int AsanScale, int AsanOffset) {
188 if (!TypeStoreSize.isScalable()) {
189 unsigned Granularity = 1 << AsanScale;
190 const auto FixedSize = TypeStoreSize.getFixedValue();
191 switch (FixedSize) {
192 case 8:
193 case 16:
194 case 32:
195 case 64:
196 case 128:
197 if (Alignment.value() >= Granularity ||
198 Alignment.value() >= FixedSize / 8)
199 return instrumentAddressImpl(
200 M, IRB, OrigIns, InsertBefore, Addr, Alignment, TypeStoreSize: FixedSize, IsWrite,
201 SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
202 }
203 }
204 // Instrument unusual size or unusual alignment.
205 IRB.SetInsertPoint(InsertBefore);
206 Type *AddrTy = Addr->getType();
207 Type *IntptrTy = M.getDataLayout().getIntPtrType(AddrTy);
208 Value *NumBits = IRB.CreateTypeSize(Ty: IntptrTy, Size: TypeStoreSize);
209 Value *Size = IRB.CreateLShr(LHS: NumBits, RHS: ConstantInt::get(Ty: IntptrTy, V: 3));
210 Value *AddrLong = IRB.CreatePtrToInt(V: Addr, DestTy: IntptrTy);
211 Value *SizeMinusOne = IRB.CreateAdd(LHS: Size, RHS: ConstantInt::get(Ty: IntptrTy, V: -1));
212 Value *LastByte =
213 IRB.CreateIntToPtr(V: IRB.CreateAdd(LHS: AddrLong, RHS: SizeMinusOne), DestTy: AddrTy);
214 instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, Addr, Alignment: {}, TypeStoreSize: 8, IsWrite,
215 SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
216 instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, Addr: LastByte, Alignment: {}, TypeStoreSize: 8, IsWrite,
217 SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
218}
219
220void getInterestingMemoryOperands(
221 Module &M, Instruction *I,
222 SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
223 const DataLayout &DL = M.getDataLayout();
224 if (LoadInst *LI = dyn_cast<LoadInst>(Val: I)) {
225 Interesting.emplace_back(Args&: I, Args: LI->getPointerOperandIndex(), Args: false,
226 Args: LI->getType(), Args: LI->getAlign());
227 } else if (StoreInst *SI = dyn_cast<StoreInst>(Val: I)) {
228 Interesting.emplace_back(Args&: I, Args: SI->getPointerOperandIndex(), Args: true,
229 Args: SI->getValueOperand()->getType(), Args: SI->getAlign());
230 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: I)) {
231 Interesting.emplace_back(Args&: I, Args: RMW->getPointerOperandIndex(), Args: true,
232 Args: RMW->getValOperand()->getType(), Args: std::nullopt);
233 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Val: I)) {
234 Interesting.emplace_back(Args&: I, Args: XCHG->getPointerOperandIndex(), Args: true,
235 Args: XCHG->getCompareOperand()->getType(),
236 Args: std::nullopt);
237 } else if (auto *CI = dyn_cast<CallInst>(Val: I)) {
238 switch (CI->getIntrinsicID()) {
239 case Intrinsic::masked_load:
240 case Intrinsic::masked_store:
241 case Intrinsic::masked_gather:
242 case Intrinsic::masked_scatter: {
243 bool IsWrite = CI->getType()->isVoidTy();
244 // Masked store has an initial operand for the value.
245 unsigned OpOffset = IsWrite ? 1 : 0;
246 Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType();
247 MaybeAlign Alignment = Align(1);
248 // Otherwise no alignment guarantees. We probably got Undef.
249 if (auto *Op = dyn_cast<ConstantInt>(Val: CI->getOperand(i_nocapture: 1 + OpOffset)))
250 Alignment = Op->getMaybeAlignValue();
251 Value *Mask = CI->getOperand(i_nocapture: 2 + OpOffset);
252 Interesting.emplace_back(Args&: I, Args&: OpOffset, Args&: IsWrite, Args&: Ty, Args&: Alignment, Args&: Mask);
253 break;
254 }
255 case Intrinsic::masked_expandload:
256 case Intrinsic::masked_compressstore: {
257 bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
258 unsigned OpOffset = IsWrite ? 1 : 0;
259 auto *BasePtr = CI->getOperand(i_nocapture: OpOffset);
260 MaybeAlign Alignment = BasePtr->getPointerAlignment(DL);
261 Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType();
262 IRBuilder<> IB(I);
263 Value *Mask = CI->getOperand(i_nocapture: 1 + OpOffset);
264 Type *IntptrTy = M.getDataLayout().getIntPtrType(
265 C&: M.getContext(), AddressSpace: BasePtr->getType()->getPointerAddressSpace());
266 // Use the popcount of Mask as the effective vector length.
267 Type *ExtTy = VectorType::get(ElementType: IntptrTy, Other: cast<VectorType>(Val: Ty));
268 Value *ExtMask = IB.CreateZExt(V: Mask, DestTy: ExtTy);
269 Value *EVL = IB.CreateAddReduce(Src: ExtMask);
270 Value *TrueMask = ConstantInt::get(Ty: Mask->getType(), V: 1);
271 Interesting.emplace_back(Args&: I, Args&: OpOffset, Args&: IsWrite, Args&: Ty, Args&: Alignment, Args&: TrueMask,
272 Args&: EVL);
273 break;
274 }
275 case Intrinsic::vp_load:
276 case Intrinsic::vp_store:
277 case Intrinsic::experimental_vp_strided_load:
278 case Intrinsic::experimental_vp_strided_store: {
279 auto *VPI = cast<VPIntrinsic>(Val: CI);
280 unsigned IID = CI->getIntrinsicID();
281 bool IsWrite = CI->getType()->isVoidTy();
282 unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
283 Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType();
284 MaybeAlign Alignment = VPI->getOperand(i_nocapture: PtrOpNo)->getPointerAlignment(DL);
285 Value *Stride = nullptr;
286 if (IID == Intrinsic::experimental_vp_strided_store ||
287 IID == Intrinsic::experimental_vp_strided_load) {
288 Stride = VPI->getOperand(i_nocapture: PtrOpNo + 1);
289 // Use the pointer alignment as the element alignment if the stride is a
290 // mutiple of the pointer alignment. Otherwise, the element alignment
291 // should be Align(1).
292 unsigned PointerAlign = Alignment.valueOrOne().value();
293 if (!isa<ConstantInt>(Val: Stride) ||
294 cast<ConstantInt>(Val: Stride)->getZExtValue() % PointerAlign != 0)
295 Alignment = Align(1);
296 }
297 Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment,
298 Args: VPI->getMaskParam(), Args: VPI->getVectorLengthParam(),
299 Args&: Stride);
300 break;
301 }
302 case Intrinsic::vp_gather:
303 case Intrinsic::vp_scatter: {
304 auto *VPI = cast<VPIntrinsic>(Val: CI);
305 unsigned IID = CI->getIntrinsicID();
306 bool IsWrite = IID == Intrinsic::vp_scatter;
307 unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
308 Type *Ty = IsWrite ? CI->getArgOperand(i: 0)->getType() : CI->getType();
309 MaybeAlign Alignment = VPI->getPointerAlignment();
310 Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment,
311 Args: VPI->getMaskParam(),
312 Args: VPI->getVectorLengthParam());
313 break;
314 }
315 case Intrinsic::amdgcn_raw_buffer_load:
316 case Intrinsic::amdgcn_raw_ptr_buffer_load:
317 case Intrinsic::amdgcn_raw_buffer_load_format:
318 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
319 case Intrinsic::amdgcn_raw_tbuffer_load:
320 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
321 case Intrinsic::amdgcn_struct_buffer_load:
322 case Intrinsic::amdgcn_struct_ptr_buffer_load:
323 case Intrinsic::amdgcn_struct_buffer_load_format:
324 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
325 case Intrinsic::amdgcn_struct_tbuffer_load:
326 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
327 case Intrinsic::amdgcn_s_buffer_load:
328 case Intrinsic::amdgcn_global_load_tr_b64:
329 case Intrinsic::amdgcn_global_load_tr_b128: {
330 unsigned PtrOpNo = 0;
331 bool IsWrite = false;
332 Type *Ty = CI->getType();
333 Value *Ptr = CI->getArgOperand(i: PtrOpNo);
334 MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
335 Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment);
336 break;
337 }
338 case Intrinsic::amdgcn_raw_tbuffer_store:
339 case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
340 case Intrinsic::amdgcn_raw_buffer_store:
341 case Intrinsic::amdgcn_raw_ptr_buffer_store:
342 case Intrinsic::amdgcn_raw_buffer_store_format:
343 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
344 case Intrinsic::amdgcn_struct_buffer_store:
345 case Intrinsic::amdgcn_struct_ptr_buffer_store:
346 case Intrinsic::amdgcn_struct_buffer_store_format:
347 case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
348 case Intrinsic::amdgcn_struct_tbuffer_store:
349 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
350 unsigned PtrOpNo = 1;
351 bool IsWrite = true;
352 Value *Ptr = CI->getArgOperand(i: PtrOpNo);
353 Type *Ty = Ptr->getType();
354 MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
355 Interesting.emplace_back(Args&: I, Args&: PtrOpNo, Args&: IsWrite, Args&: Ty, Args&: Alignment);
356 break;
357 }
358 default:
359 for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
360 if (Type *Ty = CI->getParamByRefType(ArgNo)) {
361 Interesting.emplace_back(Args&: I, Args&: ArgNo, Args: false, Args&: Ty, Args: Align(1));
362 } else if (Type *Ty = CI->getParamByValType(ArgNo)) {
363 Interesting.emplace_back(Args&: I, Args&: ArgNo, Args: false, Args&: Ty, Args: Align(1));
364 }
365 }
366 }
367 }
368}
369} // end namespace AMDGPU
370} // end namespace llvm
371