1//===----- TypeSanitizer.cpp - type-based-aliasing-violation detector -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of TypeSanitizer, a type-based-aliasing-violation
10// detector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Instrumentation/TypeSanitizer.h"
15#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/Analysis/MemoryLocation.h"
20#include "llvm/Analysis/TargetLibraryInfo.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/InstIterator.h"
25#include "llvm/IR/Instructions.h"
26#include "llvm/IR/IntrinsicInst.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/IR/MDBuilder.h"
30#include "llvm/IR/Metadata.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/Type.h"
33#include "llvm/ProfileData/InstrProf.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/MD5.h"
36#include "llvm/Support/Regex.h"
37#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38#include "llvm/Transforms/Utils/Local.h"
39#include "llvm/Transforms/Utils/ModuleUtils.h"
40
41#include <cctype>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "tysan"
46
47static const char *const kTysanModuleCtorName = "tysan.module_ctor";
48static const char *const kTysanInitName = "__tysan_init";
49static const char *const kTysanCheckName = "__tysan_check";
50static const char *const kTysanGVNamePrefix = "__tysan_v1_";
51
52static const char *const kTysanShadowMemoryAddress =
53 "__tysan_shadow_memory_address";
54static const char *const kTysanAppMemMask = "__tysan_app_memory_mask";
55
56static cl::opt<bool>
57 ClWritesAlwaysSetType("tysan-writes-always-set-type",
58 cl::desc("Writes always set the type"), cl::Hidden,
59 cl::init(Val: false));
60
61static cl::opt<bool> ClOutlineInstrumentation(
62 "tysan-outline-instrumentation",
63 cl::desc("Uses function calls for all TySan instrumentation, reducing "
64 "ELF size"),
65 cl::Hidden, cl::init(Val: true));
66
67static cl::opt<bool> ClVerifyOutlinedInstrumentation(
68 "tysan-verify-outlined-instrumentation",
69 cl::desc("Check types twice with both inlined instrumentation and "
70 "function calls. This verifies that they behave the same."),
71 cl::Hidden, cl::init(Val: false));
72
73STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
74
75namespace {
76
77/// TypeSanitizer: instrument the code in module to find type-based aliasing
78/// violations.
79struct TypeSanitizer {
80 TypeSanitizer(Module &M);
81 bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
82 void instrumentGlobals(Module &M);
83
84private:
85 typedef SmallDenseMap<const MDNode *, GlobalVariable *, 8>
86 TypeDescriptorsMapTy;
87 typedef SmallDenseMap<const MDNode *, std::string, 8> TypeNameMapTy;
88
89 void initializeCallbacks(Module &M);
90
91 Instruction *getShadowBase(Function &F);
92 Instruction *getAppMemMask(Function &F);
93
94 bool instrumentWithShadowUpdate(IRBuilder<> &IRB, const MDNode *TBAAMD,
95 Value *Ptr, uint64_t AccessSize, bool IsRead,
96 bool IsWrite, Value *ShadowBase,
97 Value *AppMemMask, bool ForceSetType,
98 bool SanitizeFunction,
99 TypeDescriptorsMapTy &TypeDescriptors,
100 const DataLayout &DL);
101
102 /// Memory-related intrinsics/instructions reset the type of the destination
103 /// memory (including allocas and byval arguments).
104 bool instrumentMemInst(Value *I, Instruction *ShadowBase,
105 Instruction *AppMemMask, const DataLayout &DL);
106
107 std::string getAnonymousStructIdentifier(const MDNode *MD,
108 TypeNameMapTy &TypeNames);
109 bool generateTypeDescriptor(const MDNode *MD,
110 TypeDescriptorsMapTy &TypeDescriptors,
111 TypeNameMapTy &TypeNames, Module &M);
112 bool generateBaseTypeDescriptor(const MDNode *MD,
113 TypeDescriptorsMapTy &TypeDescriptors,
114 TypeNameMapTy &TypeNames, Module &M);
115
116 const Triple TargetTriple;
117 Regex AnonNameRegex;
118 Type *IntptrTy;
119 uint64_t PtrShift;
120 IntegerType *OrdTy, *U64Ty;
121
122 /// Callbacks to run-time library are computed in initializeCallbacks.
123 FunctionCallee TysanCheck;
124 FunctionCallee TysanCtorFunction;
125
126 FunctionCallee TysanIntrumentMemInst;
127 FunctionCallee TysanInstrumentWithShadowUpdate;
128 FunctionCallee TysanSetShadowType;
129
130 /// Callback to set types for gloabls.
131 Function *TysanGlobalsSetTypeFunction;
132};
133} // namespace
134
135TypeSanitizer::TypeSanitizer(Module &M)
136 : TargetTriple(M.getTargetTriple()),
137 AnonNameRegex("^_ZTS.*N[1-9][0-9]*_GLOBAL__N") {
138 const DataLayout &DL = M.getDataLayout();
139 IntptrTy = DL.getIntPtrType(C&: M.getContext());
140 PtrShift = countr_zero(Val: IntptrTy->getPrimitiveSizeInBits() / 8);
141
142 TysanGlobalsSetTypeFunction = M.getFunction(Name: "__tysan_set_globals_types");
143 initializeCallbacks(M);
144}
145
146void TypeSanitizer::initializeCallbacks(Module &M) {
147 IRBuilder<> IRB(M.getContext());
148 OrdTy = IRB.getInt32Ty();
149 U64Ty = IRB.getInt64Ty();
150 Type *BoolType = IRB.getInt1Ty();
151
152 AttributeList Attr;
153 Attr = Attr.addFnAttribute(C&: M.getContext(), Kind: Attribute::NoUnwind);
154 // Initialize the callbacks.
155 TysanCheck =
156 M.getOrInsertFunction(Name: kTysanCheckName, AttributeList: Attr, RetTy: IRB.getVoidTy(),
157 Args: IRB.getPtrTy(), // Pointer to data to be read.
158 Args: OrdTy, // Size of the data in bytes.
159 Args: IRB.getPtrTy(), // Pointer to type descriptor.
160 Args: OrdTy // Flags.
161 );
162
163 TysanCtorFunction =
164 M.getOrInsertFunction(Name: kTysanModuleCtorName, AttributeList: Attr, RetTy: IRB.getVoidTy());
165
166 TysanIntrumentMemInst = M.getOrInsertFunction(
167 Name: "__tysan_instrument_mem_inst", AttributeList: Attr, RetTy: IRB.getVoidTy(),
168 Args: IRB.getPtrTy(), // Pointer of data to be written to
169 Args: IRB.getPtrTy(), // Pointer of data to write
170 Args: U64Ty, // Size of the data in bytes
171 Args: BoolType // Do we need to call memmove
172 );
173
174 TysanInstrumentWithShadowUpdate = M.getOrInsertFunction(
175 Name: "__tysan_instrument_with_shadow_update", AttributeList: Attr, RetTy: IRB.getVoidTy(),
176 Args: IRB.getPtrTy(), // Pointer to data to be read
177 Args: IRB.getPtrTy(), // Pointer to type descriptor
178 Args: BoolType, // Do we need to type check this
179 Args: U64Ty, // Size of data we access in bytes
180 Args: OrdTy // Flags
181 );
182
183 TysanSetShadowType = M.getOrInsertFunction(
184 Name: "__tysan_set_shadow_type", AttributeList: Attr, RetTy: IRB.getVoidTy(),
185 Args: IRB.getPtrTy(), // Pointer of data to be written to
186 Args: IRB.getPtrTy(), // Pointer to the new type descriptor
187 Args: U64Ty // Size of data we access in bytes
188 );
189}
190
191void TypeSanitizer::instrumentGlobals(Module &M) {
192 TysanGlobalsSetTypeFunction = nullptr;
193
194 NamedMDNode *Globals = M.getNamedMetadata(Name: "llvm.tysan.globals");
195 if (!Globals)
196 return;
197
198 TysanGlobalsSetTypeFunction = Function::Create(
199 Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false),
200 Linkage: GlobalValue::InternalLinkage, N: "__tysan_set_globals_types", M: &M);
201 BasicBlock *BB =
202 BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: TysanGlobalsSetTypeFunction);
203 ReturnInst::Create(C&: M.getContext(), InsertAtEnd: BB);
204
205 const DataLayout &DL = M.getDataLayout();
206 Value *ShadowBase = getShadowBase(F&: *TysanGlobalsSetTypeFunction);
207 Value *AppMemMask = getAppMemMask(F&: *TysanGlobalsSetTypeFunction);
208 TypeDescriptorsMapTy TypeDescriptors;
209 TypeNameMapTy TypeNames;
210
211 for (const auto &GMD : Globals->operands()) {
212 auto *GV = mdconst::dyn_extract_or_null<GlobalVariable>(MD: GMD->getOperand(I: 0));
213 if (!GV)
214 continue;
215 const MDNode *TBAAMD = cast<MDNode>(Val: GMD->getOperand(I: 1));
216 if (!generateBaseTypeDescriptor(MD: TBAAMD, TypeDescriptors, TypeNames, M))
217 continue;
218
219 IRBuilder<> IRB(
220 TysanGlobalsSetTypeFunction->getEntryBlock().getTerminator());
221 Type *AccessTy = GV->getValueType();
222 assert(AccessTy->isSized());
223 uint64_t AccessSize = DL.getTypeStoreSize(Ty: AccessTy);
224 instrumentWithShadowUpdate(IRB, TBAAMD, Ptr: GV, AccessSize, IsRead: false, IsWrite: false,
225 ShadowBase, AppMemMask, ForceSetType: true, SanitizeFunction: false,
226 TypeDescriptors, DL);
227 }
228
229 if (TysanGlobalsSetTypeFunction) {
230 IRBuilder<> IRB(cast<Function>(Val: TysanCtorFunction.getCallee())
231 ->getEntryBlock()
232 .getTerminator());
233 IRB.CreateCall(Callee: TysanGlobalsSetTypeFunction, Args: {});
234 }
235}
236
237static const char LUT[] = "0123456789abcdef";
238
239static std::string encodeName(StringRef Name) {
240 size_t Length = Name.size();
241 std::string Output = kTysanGVNamePrefix;
242 Output.reserve(res_arg: Output.size() + 3 * Length);
243 for (size_t i = 0; i < Length; ++i) {
244 const unsigned char c = Name[i];
245 if (isalnum(c)) {
246 Output.push_back(c: c);
247 continue;
248 }
249
250 if (c == '_') {
251 Output.append(s: "__");
252 continue;
253 }
254
255 Output.push_back(c: '_');
256 Output.push_back(c: LUT[c >> 4]);
257 Output.push_back(c: LUT[c & 15]);
258 }
259
260 return Output;
261}
262
263std::string
264TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
265 TypeNameMapTy &TypeNames) {
266 MD5 Hash;
267
268 for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
269 const MDNode *MemberNode = dyn_cast<MDNode>(Val: MD->getOperand(I: i));
270 if (!MemberNode)
271 return "";
272
273 auto TNI = TypeNames.find(Val: MemberNode);
274 std::string MemberName;
275 if (TNI != TypeNames.end()) {
276 MemberName = TNI->second;
277 } else {
278 if (MemberNode->getNumOperands() < 1)
279 return "";
280 MDString *MemberNameNode = dyn_cast<MDString>(Val: MemberNode->getOperand(I: 0));
281 if (!MemberNameNode)
282 return "";
283 MemberName = MemberNameNode->getString().str();
284 if (MemberName.empty())
285 MemberName = getAnonymousStructIdentifier(MD: MemberNode, TypeNames);
286 if (MemberName.empty())
287 return "";
288 TypeNames[MemberNode] = MemberName;
289 }
290
291 Hash.update(Str: MemberName);
292 Hash.update(Str: "\0");
293
294 uint64_t Offset =
295 mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i + 1))->getZExtValue();
296 Hash.update(Str: utostr(X: Offset));
297 Hash.update(Str: "\0");
298 }
299
300 MD5::MD5Result HashResult;
301 Hash.final(Result&: HashResult);
302 return "__anonymous_" + std::string(HashResult.digest().str());
303}
304
305bool TypeSanitizer::generateBaseTypeDescriptor(
306 const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
307 TypeNameMapTy &TypeNames, Module &M) {
308 if (MD->getNumOperands() < 1)
309 return false;
310
311 MDString *NameNode = dyn_cast<MDString>(Val: MD->getOperand(I: 0));
312 if (!NameNode)
313 return false;
314
315 std::string Name = NameNode->getString().str();
316 if (Name.empty())
317 Name = getAnonymousStructIdentifier(MD, TypeNames);
318 if (Name.empty())
319 return false;
320 TypeNames[MD] = Name;
321 std::string EncodedName = encodeName(Name);
322
323 GlobalVariable *GV =
324 dyn_cast_or_null<GlobalVariable>(Val: M.getNamedValue(Name: EncodedName));
325 if (GV) {
326 TypeDescriptors[MD] = GV;
327 return true;
328 }
329
330 SmallVector<std::pair<Constant *, uint64_t>> Members;
331 for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
332 const MDNode *MemberNode = dyn_cast<MDNode>(Val: MD->getOperand(I: i));
333 if (!MemberNode)
334 return false;
335
336 Constant *Member;
337 auto TDI = TypeDescriptors.find(Val: MemberNode);
338 if (TDI != TypeDescriptors.end()) {
339 Member = TDI->second;
340 } else {
341 if (!generateBaseTypeDescriptor(MD: MemberNode, TypeDescriptors, TypeNames,
342 M))
343 return false;
344
345 Member = TypeDescriptors[MemberNode];
346 }
347
348 uint64_t Offset =
349 mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i + 1))->getZExtValue();
350
351 Members.push_back(Elt: std::make_pair(x&: Member, y&: Offset));
352 }
353
354 // The descriptor for a scalar is:
355 // [2, member count, [type pointer, offset]..., name]
356
357 LLVMContext &C = MD->getContext();
358 Constant *NameData = ConstantDataArray::getString(Context&: C, Initializer: NameNode->getString());
359 SmallVector<Type *> TDSubTys;
360 SmallVector<Constant *> TDSubData;
361
362 auto PushTDSub = [&](Constant *C) {
363 TDSubTys.push_back(Elt: C->getType());
364 TDSubData.push_back(Elt: C);
365 };
366
367 PushTDSub(ConstantInt::get(Ty: IntptrTy, V: 2));
368 PushTDSub(ConstantInt::get(Ty: IntptrTy, V: Members.size()));
369
370 // Types that are in an anonymous namespace are local to this module.
371 // FIXME: This should really be marked by the frontend in the metadata
372 // instead of having us guess this from the mangled name. Moreover, the regex
373 // here can pick up (unlikely) names in the non-reserved namespace (because
374 // it needs to search into the type to pick up cases where the type in the
375 // anonymous namespace is a template parameter, etc.).
376 bool ShouldBeComdat = !AnonNameRegex.match(String: NameNode->getString());
377 for (auto &Member : Members) {
378 PushTDSub(Member.first);
379 PushTDSub(ConstantInt::get(Ty: IntptrTy, V: Member.second));
380 }
381
382 PushTDSub(NameData);
383
384 StructType *TDTy = StructType::get(Context&: C, Elements: TDSubTys);
385 Constant *TD = ConstantStruct::get(T: TDTy, V: TDSubData);
386
387 GlobalVariable *TDGV =
388 new GlobalVariable(TDTy, true,
389 !ShouldBeComdat ? GlobalValue::InternalLinkage
390 : GlobalValue::LinkOnceODRLinkage,
391 TD, EncodedName);
392 M.insertGlobalVariable(GV: TDGV);
393
394 if (ShouldBeComdat) {
395 if (TargetTriple.isOSBinFormatELF()) {
396 Comdat *TDComdat = M.getOrInsertComdat(Name: EncodedName);
397 TDGV->setComdat(TDComdat);
398 }
399 appendToUsed(M, Values: TDGV);
400 }
401
402 TypeDescriptors[MD] = TDGV;
403 return true;
404}
405
406bool TypeSanitizer::generateTypeDescriptor(
407 const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
408 TypeNameMapTy &TypeNames, Module &M) {
409 // Here we need to generate a type descriptor corresponding to this TBAA
410 // metadata node. Under the current scheme there are three kinds of TBAA
411 // metadata nodes: scalar nodes, struct nodes, and struct tag nodes.
412
413 if (MD->getNumOperands() < 3)
414 return false;
415
416 const MDNode *BaseNode = dyn_cast<MDNode>(Val: MD->getOperand(I: 0));
417 if (!BaseNode)
418 return false;
419
420 // This is a struct tag (element-access) node.
421
422 const MDNode *AccessNode = dyn_cast<MDNode>(Val: MD->getOperand(I: 1));
423 if (!AccessNode)
424 return false;
425
426 Constant *Base;
427 auto TDI = TypeDescriptors.find(Val: BaseNode);
428 if (TDI != TypeDescriptors.end()) {
429 Base = TDI->second;
430 } else {
431 if (!generateBaseTypeDescriptor(MD: BaseNode, TypeDescriptors, TypeNames, M))
432 return false;
433
434 Base = TypeDescriptors[BaseNode];
435 }
436
437 Constant *Access;
438 TDI = TypeDescriptors.find(Val: AccessNode);
439 if (TDI != TypeDescriptors.end()) {
440 Access = TDI->second;
441 } else {
442 if (!generateBaseTypeDescriptor(MD: AccessNode, TypeDescriptors, TypeNames, M))
443 return false;
444
445 Access = TypeDescriptors[AccessNode];
446 }
447
448 uint64_t Offset =
449 mdconst::extract<ConstantInt>(MD: MD->getOperand(I: 2))->getZExtValue();
450 std::string EncodedName =
451 std::string(Base->getName()) + "_o_" + utostr(X: Offset);
452
453 GlobalVariable *GV =
454 dyn_cast_or_null<GlobalVariable>(Val: M.getNamedValue(Name: EncodedName));
455 if (GV) {
456 TypeDescriptors[MD] = GV;
457 return true;
458 }
459
460 // The descriptor for a scalar is:
461 // [1, base-type pointer, access-type pointer, offset]
462
463 StructType *TDTy =
464 StructType::get(elt1: IntptrTy, elts: Base->getType(), elts: Access->getType(), elts: IntptrTy);
465 Constant *TD =
466 ConstantStruct::get(T: TDTy, Vs: ConstantInt::get(Ty: IntptrTy, V: 1), Vs: Base, Vs: Access,
467 Vs: ConstantInt::get(Ty: IntptrTy, V: Offset));
468
469 bool ShouldBeComdat = cast<GlobalVariable>(Val: Base)->getLinkage() ==
470 GlobalValue::LinkOnceODRLinkage;
471
472 GlobalVariable *TDGV =
473 new GlobalVariable(TDTy, true,
474 !ShouldBeComdat ? GlobalValue::InternalLinkage
475 : GlobalValue::LinkOnceODRLinkage,
476 TD, EncodedName);
477 M.insertGlobalVariable(GV: TDGV);
478
479 if (ShouldBeComdat) {
480 if (TargetTriple.isOSBinFormatELF()) {
481 Comdat *TDComdat = M.getOrInsertComdat(Name: EncodedName);
482 TDGV->setComdat(TDComdat);
483 }
484 appendToUsed(M, Values: TDGV);
485 }
486
487 TypeDescriptors[MD] = TDGV;
488 return true;
489}
490
491Instruction *TypeSanitizer::getShadowBase(Function &F) {
492 IRBuilder<> IRB(&F.front().front());
493 Constant *GlobalShadowAddress =
494 F.getParent()->getOrInsertGlobal(Name: kTysanShadowMemoryAddress, Ty: IntptrTy);
495 return IRB.CreateLoad(Ty: IntptrTy, Ptr: GlobalShadowAddress, Name: "shadow.base");
496}
497
498Instruction *TypeSanitizer::getAppMemMask(Function &F) {
499 IRBuilder<> IRB(&F.front().front());
500 Value *GlobalAppMemMask =
501 F.getParent()->getOrInsertGlobal(Name: kTysanAppMemMask, Ty: IntptrTy);
502 return IRB.CreateLoad(Ty: IntptrTy, Ptr: GlobalAppMemMask, Name: "app.mem.mask");
503}
504
505/// Collect all loads and stores, and for what TBAA nodes we need to generate
506/// type descriptors.
507void collectMemAccessInfo(
508 Function &F, const TargetLibraryInfo &TLI,
509 SmallVectorImpl<std::pair<Instruction *, MemoryLocation>> &MemoryAccesses,
510 SmallSetVector<const MDNode *, 8> &TBAAMetadata,
511 SmallVectorImpl<Value *> &MemTypeResetInsts) {
512 // Traverse all instructions, collect loads/stores/returns, check for calls.
513 for (Instruction &Inst : instructions(F)) {
514 // Skip memory accesses inserted by another instrumentation.
515 if (Inst.getMetadata(KindID: LLVMContext::MD_nosanitize))
516 continue;
517
518 if (isa<LoadInst>(Val: Inst) || isa<StoreInst>(Val: Inst) ||
519 isa<AtomicCmpXchgInst>(Val: Inst) || isa<AtomicRMWInst>(Val: Inst)) {
520 MemoryLocation MLoc = MemoryLocation::get(Inst: &Inst);
521
522 // Swift errors are special (we can't introduce extra uses on them).
523 if (MLoc.Ptr->isSwiftError())
524 continue;
525
526 // Skip non-address-space-0 pointers; we don't know how to handle them.
527 Type *PtrTy = cast<PointerType>(Val: MLoc.Ptr->getType());
528 if (PtrTy->getPointerAddressSpace() != 0)
529 continue;
530
531 if (MLoc.AATags.TBAA)
532 TBAAMetadata.insert(X: MLoc.AATags.TBAA);
533 MemoryAccesses.push_back(Elt: std::make_pair(x: &Inst, y&: MLoc));
534 } else if (isa<CallInst>(Val: Inst) || isa<InvokeInst>(Val: Inst)) {
535 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst))
536 maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI: &TLI);
537
538 if (isa<MemIntrinsic, LifetimeIntrinsic>(Val: Inst))
539 MemTypeResetInsts.push_back(Elt: &Inst);
540 } else if (isa<AllocaInst>(Val: Inst)) {
541 MemTypeResetInsts.push_back(Elt: &Inst);
542 }
543 }
544}
545
546bool TypeSanitizer::sanitizeFunction(Function &F,
547 const TargetLibraryInfo &TLI) {
548 if (F.isDeclaration())
549 return false;
550 // This is required to prevent instrumenting call to __tysan_init from within
551 // the module constructor.
552 if (&F == TysanCtorFunction.getCallee() || &F == TysanGlobalsSetTypeFunction)
553 return false;
554 initializeCallbacks(M&: *F.getParent());
555
556 // We need to collect all loads and stores, and know for what TBAA nodes we
557 // need to generate type descriptors.
558 SmallVector<std::pair<Instruction *, MemoryLocation>> MemoryAccesses;
559 SmallSetVector<const MDNode *, 8> TBAAMetadata;
560 SmallVector<Value *> MemTypeResetInsts;
561 collectMemAccessInfo(F, TLI, MemoryAccesses, TBAAMetadata, MemTypeResetInsts);
562
563 // byval arguments also need their types reset (they're new stack memory,
564 // just like allocas).
565 for (auto &A : F.args())
566 if (A.hasByValAttr())
567 MemTypeResetInsts.push_back(Elt: &A);
568
569 Module &M = *F.getParent();
570 TypeDescriptorsMapTy TypeDescriptors;
571 TypeNameMapTy TypeNames;
572 bool Res = false;
573 for (const MDNode *MD : TBAAMetadata) {
574 if (TypeDescriptors.count(Val: MD))
575 continue;
576
577 if (!generateTypeDescriptor(MD, TypeDescriptors, TypeNames, M))
578 return Res; // Giving up.
579
580 Res = true;
581 }
582
583 const DataLayout &DL = F.getParent()->getDataLayout();
584 bool SanitizeFunction = F.hasFnAttribute(Kind: Attribute::SanitizeType);
585 bool NeedsInstrumentation =
586 MemTypeResetInsts.empty() && MemoryAccesses.empty();
587 Instruction *ShadowBase = NeedsInstrumentation ? nullptr : getShadowBase(F);
588 Instruction *AppMemMask = NeedsInstrumentation ? nullptr : getAppMemMask(F);
589 for (const auto &[I, MLoc] : MemoryAccesses) {
590 IRBuilder<> IRB(I);
591 assert(MLoc.Size.isPrecise());
592 if (instrumentWithShadowUpdate(
593 IRB, TBAAMD: MLoc.AATags.TBAA, Ptr: const_cast<Value *>(MLoc.Ptr),
594 AccessSize: MLoc.Size.getValue(), IsRead: I->mayReadFromMemory(), IsWrite: I->mayWriteToMemory(),
595 ShadowBase, AppMemMask, ForceSetType: false, SanitizeFunction, TypeDescriptors,
596 DL)) {
597 ++NumInstrumentedAccesses;
598 Res = true;
599 }
600 }
601
602 for (auto Inst : MemTypeResetInsts)
603 Res |= instrumentMemInst(I: Inst, ShadowBase, AppMemMask, DL);
604
605 return Res;
606}
607
608static Value *convertToShadowDataInt(IRBuilder<> &IRB, Value *Ptr,
609 Type *IntptrTy, uint64_t PtrShift,
610 Value *ShadowBase, Value *AppMemMask) {
611 return IRB.CreateAdd(
612 LHS: IRB.CreateShl(
613 LHS: IRB.CreateAnd(LHS: IRB.CreatePtrToInt(V: Ptr, DestTy: IntptrTy, Name: "app.ptr.int"),
614 RHS: AppMemMask, Name: "app.ptr.masked"),
615 RHS: PtrShift, Name: "app.ptr.shifted"),
616 RHS: ShadowBase, Name: "shadow.ptr.int");
617}
618
619bool TypeSanitizer::instrumentWithShadowUpdate(
620 IRBuilder<> &IRB, const MDNode *TBAAMD, Value *Ptr, uint64_t AccessSize,
621 bool IsRead, bool IsWrite, Value *ShadowBase, Value *AppMemMask,
622 bool ForceSetType, bool SanitizeFunction,
623 TypeDescriptorsMapTy &TypeDescriptors, const DataLayout &DL) {
624 Constant *TDGV;
625 if (TBAAMD)
626 TDGV = TypeDescriptors[TBAAMD];
627 else
628 TDGV = Constant::getNullValue(Ty: IRB.getPtrTy());
629
630 Value *TD = IRB.CreateBitCast(V: TDGV, DestTy: IRB.getPtrTy());
631
632 if (ClOutlineInstrumentation) {
633 if (!ForceSetType && (!ClWritesAlwaysSetType || IsRead)) {
634 // We need to check the type here. If the type is unknown, then the read
635 // sets the type. If the type is known, then it is checked. If the type
636 // doesn't match, then we call the runtime type check (which may yet
637 // determine that the mismatch is okay).
638
639 Constant *Flags =
640 ConstantInt::get(Ty: OrdTy, V: (int)IsRead | (((int)IsWrite) << 1));
641
642 IRB.CreateCall(Callee: TysanInstrumentWithShadowUpdate,
643 Args: {Ptr, TD,
644 SanitizeFunction ? IRB.getTrue() : IRB.getFalse(),
645 IRB.getInt64(C: AccessSize), Flags});
646 } else if (ForceSetType || IsWrite) {
647 // In the mode where writes always set the type, for a write (which does
648 // not also read), we just set the type.
649 IRB.CreateCall(Callee: TysanSetShadowType, Args: {Ptr, TD, IRB.getInt64(C: AccessSize)});
650 }
651
652 return true;
653 }
654
655 Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift,
656 ShadowBase, AppMemMask);
657 Type *Int8PtrPtrTy = PointerType::get(C&: IRB.getContext(), AddressSpace: 0);
658 Value *ShadowData =
659 IRB.CreateIntToPtr(V: ShadowDataInt, DestTy: Int8PtrPtrTy, Name: "shadow.ptr");
660
661 auto SetType = [&]() {
662 IRB.CreateStore(Val: TD, Ptr: ShadowData);
663
664 // Now fill the remainder of the shadow memory corresponding to the
665 // remainder of the the bytes of the type with a bad type descriptor.
666 for (uint64_t i = 1; i < AccessSize; ++i) {
667 Value *BadShadowData = IRB.CreateIntToPtr(
668 V: IRB.CreateAdd(LHS: ShadowDataInt,
669 RHS: ConstantInt::get(Ty: IntptrTy, V: i << PtrShift),
670 Name: "shadow.byte." + Twine(i) + ".offset"),
671 DestTy: Int8PtrPtrTy, Name: "shadow.byte." + Twine(i) + ".ptr");
672
673 // This is the TD value, -i, which is used to indicate that the byte is
674 // i bytes after the first byte of the type.
675 Value *BadTD =
676 IRB.CreateIntToPtr(V: ConstantInt::getSigned(Ty: IntptrTy, V: -i),
677 DestTy: IRB.getPtrTy(), Name: "bad.descriptor" + Twine(i));
678 IRB.CreateStore(Val: BadTD, Ptr: BadShadowData);
679 }
680 };
681
682 if (ForceSetType || (ClWritesAlwaysSetType && IsWrite)) {
683 // In the mode where writes always set the type, for a write (which does
684 // not also read), we just set the type.
685 SetType();
686 return true;
687 }
688
689 assert((!ClWritesAlwaysSetType || IsRead) &&
690 "should have handled case above");
691 LLVMContext &C = IRB.getContext();
692 MDNode *UnlikelyBW = MDBuilder(C).createBranchWeights(TrueWeight: 1, FalseWeight: 100000);
693
694 if (!SanitizeFunction) {
695 // If we're not sanitizing this function, then we only care whether we
696 // need to *set* the type.
697 Value *LoadedTD = IRB.CreateLoad(Ty: IRB.getPtrTy(), Ptr: ShadowData, Name: "shadow.desc");
698 Value *NullTDCmp = IRB.CreateIsNull(Arg: LoadedTD, Name: "desc.set");
699 Instruction *NullTDTerm = SplitBlockAndInsertIfThen(
700 Cond: NullTDCmp, SplitBefore: &*IRB.GetInsertPoint(), Unreachable: false, BranchWeights: UnlikelyBW);
701 IRB.SetInsertPoint(NullTDTerm);
702 NullTDTerm->getParent()->setName("set.type");
703 SetType();
704 return true;
705 }
706 // We need to check the type here. If the type is unknown, then the read
707 // sets the type. If the type is known, then it is checked. If the type
708 // doesn't match, then we call the runtime (which may yet determine that
709 // the mismatch is okay).
710 //
711 // The checks generated below have the following structure.
712 //
713 // ; First we load the descriptor for the load from shadow memory and
714 // ; compare it against the type descriptor for the current access type.
715 // %shadow.desc = load ptr %shadow.data
716 // %bad.desc = icmp ne %shadow.desc, %td
717 // br %bad.desc, %bad.bb, %good.bb
718 //
719 // bad.bb:
720 // %shadow.desc.null = icmp eq %shadow.desc, null
721 // br %shadow.desc.null, %null.td.bb, %good.td.bb
722 //
723 // null.td.bb:
724 // ; The typ is unknown, set it if all bytes in the value are also unknown.
725 // ; To check, we load the shadow data for all bytes of the access. For the
726 // ; pseudo code below, assume an access of size 1.
727 // %shadow.data.int = add %shadow.data.int, 0
728 // %l = load (inttoptr %shadow.data.int)
729 // %is.not.null = icmp ne %l, null
730 // %not.all.unknown = %is.not.null
731 // br %no.all.unknown, before.set.type.bb
732 //
733 // before.set.type.bb:
734 // ; Call runtime to check mismatch.
735 // call void @__tysan_check()
736 // br %set.type.bb
737 //
738 // set.type.bb:
739 // ; Now fill the remainder of the shadow memory corresponding to the
740 // ; remainder of the the bytes of the type with a bad type descriptor.
741 // store %TD, %shadow.data
742 // br %continue.bb
743 //
744 // good.td.bb::
745 // ; We have a non-trivial mismatch. Call the runtime.
746 // call void @__tysan_check()
747 // br %continue.bb
748 //
749 // good.bb:
750 // ; We appear to have the right type. Make sure that all other bytes in
751 // ; the type are still marked as interior bytes. If not, call the runtime.
752 // %shadow.data.int = add %shadow.data.int, 0
753 // %l = load (inttoptr %shadow.data.int)
754 // %not.all.interior = icmp sge %l, 0
755 // br %not.all.interior, label %check.rt.bb, label %continue.bb
756 //
757 // check.rt.bb:
758 // call void @__tysan_check()
759 // br %continue.bb
760
761 Constant *Flags = ConstantInt::get(Ty: OrdTy, V: int(IsRead) | (int(IsWrite) << 1));
762
763 Value *LoadedTD = IRB.CreateLoad(Ty: IRB.getPtrTy(), Ptr: ShadowData, Name: "shadow.desc");
764 Value *BadTDCmp = IRB.CreateICmpNE(LHS: LoadedTD, RHS: TD, Name: "bad.desc");
765 Instruction *BadTDTerm, *GoodTDTerm;
766 SplitBlockAndInsertIfThenElse(Cond: BadTDCmp, SplitBefore: &*IRB.GetInsertPoint(), ThenTerm: &BadTDTerm,
767 ElseTerm: &GoodTDTerm, BranchWeights: UnlikelyBW);
768 IRB.SetInsertPoint(BadTDTerm);
769
770 // We now know that the types did not match (we're on the slow path). If
771 // the type is unknown, then set it.
772 Value *NullTDCmp = IRB.CreateIsNull(Arg: LoadedTD);
773 Instruction *NullTDTerm, *MismatchTerm;
774 SplitBlockAndInsertIfThenElse(Cond: NullTDCmp, SplitBefore: &*IRB.GetInsertPoint(), ThenTerm: &NullTDTerm,
775 ElseTerm: &MismatchTerm);
776
777 // If the type is unknown, then set the type.
778 IRB.SetInsertPoint(NullTDTerm);
779
780 // We're about to set the type. Make sure that all bytes in the value are
781 // also of unknown type.
782 Value *Size = ConstantInt::get(Ty: OrdTy, V: AccessSize);
783 Value *NotAllUnkTD = IRB.getFalse();
784 for (uint64_t i = 1; i < AccessSize; ++i) {
785 Value *UnkShadowData = IRB.CreateIntToPtr(
786 V: IRB.CreateAdd(LHS: ShadowDataInt, RHS: ConstantInt::get(Ty: IntptrTy, V: i << PtrShift)),
787 DestTy: Int8PtrPtrTy);
788 Value *ILdTD = IRB.CreateLoad(Ty: IRB.getPtrTy(), Ptr: UnkShadowData);
789 NotAllUnkTD = IRB.CreateOr(LHS: NotAllUnkTD, RHS: IRB.CreateIsNotNull(Arg: ILdTD));
790 }
791
792 Instruction *BeforeSetType = &*IRB.GetInsertPoint();
793 Instruction *BadUTDTerm =
794 SplitBlockAndInsertIfThen(Cond: NotAllUnkTD, SplitBefore: BeforeSetType, Unreachable: false, BranchWeights: UnlikelyBW);
795 IRB.SetInsertPoint(BadUTDTerm);
796 IRB.CreateCall(Callee: TysanCheck, Args: {IRB.CreateBitCast(V: Ptr, DestTy: IRB.getPtrTy()), Size,
797 (Value *)TD, (Value *)Flags});
798
799 IRB.SetInsertPoint(BeforeSetType);
800 SetType();
801
802 // We have a non-trivial mismatch. Call the runtime.
803 IRB.SetInsertPoint(MismatchTerm);
804 IRB.CreateCall(Callee: TysanCheck, Args: {IRB.CreateBitCast(V: Ptr, DestTy: IRB.getPtrTy()), Size,
805 (Value *)TD, (Value *)Flags});
806
807 // We appear to have the right type. Make sure that all other bytes in
808 // the type are still marked as interior bytes. If not, call the runtime.
809 IRB.SetInsertPoint(GoodTDTerm);
810 Value *NotAllBadTD = IRB.getFalse();
811 for (uint64_t i = 1; i < AccessSize; ++i) {
812 Value *BadShadowData = IRB.CreateIntToPtr(
813 V: IRB.CreateAdd(LHS: ShadowDataInt, RHS: ConstantInt::get(Ty: IntptrTy, V: i << PtrShift)),
814 DestTy: Int8PtrPtrTy);
815 Value *ILdTD = IRB.CreatePtrToInt(
816 V: IRB.CreateLoad(Ty: IRB.getPtrTy(), Ptr: BadShadowData), DestTy: IntptrTy);
817 NotAllBadTD = IRB.CreateOr(
818 LHS: NotAllBadTD, RHS: IRB.CreateICmpSGE(LHS: ILdTD, RHS: ConstantInt::get(Ty: IntptrTy, V: 0)));
819 }
820
821 Instruction *BadITDTerm = SplitBlockAndInsertIfThen(
822 Cond: NotAllBadTD, SplitBefore: &*IRB.GetInsertPoint(), Unreachable: false, BranchWeights: UnlikelyBW);
823 IRB.SetInsertPoint(BadITDTerm);
824 IRB.CreateCall(Callee: TysanCheck, Args: {IRB.CreateBitCast(V: Ptr, DestTy: IRB.getPtrTy()), Size,
825 (Value *)TD, (Value *)Flags});
826 return true;
827}
828
829bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
830 Instruction *AppMemMask,
831 const DataLayout &DL) {
832 BasicBlock::iterator IP;
833 BasicBlock *BB;
834 Function *F;
835
836 if (auto *I = dyn_cast<Instruction>(Val: V)) {
837 IP = BasicBlock::iterator(I);
838 BB = I->getParent();
839 F = BB->getParent();
840 } else {
841 auto *A = cast<Argument>(Val: V);
842 F = A->getParent();
843 BB = &F->getEntryBlock();
844 IP = BB->getFirstInsertionPt();
845
846 // Find the next insert point after both ShadowBase and AppMemMask.
847 if (IP->comesBefore(Other: ShadowBase))
848 IP = ShadowBase->getNextNode()->getIterator();
849 if (IP->comesBefore(Other: AppMemMask))
850 IP = AppMemMask->getNextNode()->getIterator();
851 }
852
853 Value *Dest, *Size, *Src = nullptr;
854 bool NeedsMemMove = false;
855 IRBuilder<> IRB(BB, IP);
856
857 if (auto *A = dyn_cast<Argument>(Val: V)) {
858 assert(A->hasByValAttr() && "Type reset for non-byval argument?");
859
860 Dest = A;
861 Size =
862 ConstantInt::get(Ty: IntptrTy, V: DL.getTypeAllocSize(Ty: A->getParamByValType()));
863 } else {
864 auto *I = cast<Instruction>(Val: V);
865 if (auto *MI = dyn_cast<MemIntrinsic>(Val: I)) {
866 if (MI->getDestAddressSpace() != 0)
867 return false;
868
869 Dest = MI->getDest();
870 Size = MI->getLength();
871
872 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) {
873 if (MTI->getSourceAddressSpace() == 0) {
874 Src = MTI->getSource();
875 NeedsMemMove = isa<MemMoveInst>(Val: MTI);
876 }
877 }
878 } else if (auto *II = dyn_cast<LifetimeIntrinsic>(Val: I)) {
879 auto *AI = dyn_cast<AllocaInst>(Val: II->getArgOperand(i: 0));
880 if (!AI)
881 return false;
882
883 Size = IRB.CreateAllocationSize(DestTy: IntptrTy, AI);
884 Dest = II->getArgOperand(i: 0);
885 } else if (auto *AI = dyn_cast<AllocaInst>(Val: I)) {
886 // We need to clear the types for new stack allocations (or else we might
887 // read stale type information from a previous function execution).
888
889 IRB.SetInsertPoint(&*std::next(x: BasicBlock::iterator(I)));
890 IRB.SetInstDebugLocation(I);
891
892 Size = IRB.CreateAllocationSize(DestTy: IntptrTy, AI);
893 Dest = I;
894 } else {
895 return false;
896 }
897 }
898
899 if (ClOutlineInstrumentation) {
900 if (!Src)
901 Src = ConstantPointerNull::get(T: IRB.getPtrTy());
902
903 IRB.CreateCall(
904 Callee: TysanIntrumentMemInst,
905 Args: {Dest, Src, Size, NeedsMemMove ? IRB.getTrue() : IRB.getFalse()});
906 return true;
907 } else {
908 if (!ShadowBase)
909 ShadowBase = getShadowBase(F&: *F);
910 if (!AppMemMask)
911 AppMemMask = getAppMemMask(F&: *F);
912
913 Value *ShadowDataInt = IRB.CreateAdd(
914 LHS: IRB.CreateShl(
915 LHS: IRB.CreateAnd(LHS: IRB.CreatePtrToInt(V: Dest, DestTy: IntptrTy), RHS: AppMemMask),
916 RHS: PtrShift),
917 RHS: ShadowBase);
918 Value *ShadowData = IRB.CreateIntToPtr(V: ShadowDataInt, DestTy: IRB.getPtrTy());
919
920 if (!Src) {
921 IRB.CreateMemSet(Ptr: ShadowData, Val: IRB.getInt8(C: 0),
922 Size: IRB.CreateShl(LHS: Size, RHS: PtrShift), Align: Align(1ull << PtrShift));
923 return true;
924 }
925
926 Value *SrcShadowDataInt = IRB.CreateAdd(
927 LHS: IRB.CreateShl(
928 LHS: IRB.CreateAnd(LHS: IRB.CreatePtrToInt(V: Src, DestTy: IntptrTy), RHS: AppMemMask),
929 RHS: PtrShift),
930 RHS: ShadowBase);
931 Value *SrcShadowData = IRB.CreateIntToPtr(V: SrcShadowDataInt, DestTy: IRB.getPtrTy());
932
933 if (NeedsMemMove) {
934 IRB.CreateMemMove(Dst: ShadowData, DstAlign: Align(1ull << PtrShift), Src: SrcShadowData,
935 SrcAlign: Align(1ull << PtrShift), Size: IRB.CreateShl(LHS: Size, RHS: PtrShift));
936 } else {
937 IRB.CreateMemCpy(Dst: ShadowData, DstAlign: Align(1ull << PtrShift), Src: SrcShadowData,
938 SrcAlign: Align(1ull << PtrShift), Size: IRB.CreateShl(LHS: Size, RHS: PtrShift));
939 }
940 }
941
942 return true;
943}
944
945PreservedAnalyses TypeSanitizerPass::run(Module &M,
946 ModuleAnalysisManager &MAM) {
947 Function *TysanCtorFunction;
948 std::tie(args&: TysanCtorFunction, args: std::ignore) =
949 createSanitizerCtorAndInitFunctions(M, CtorName: kTysanModuleCtorName,
950 InitName: kTysanInitName, /*InitArgTypes=*/{},
951 /*InitArgs=*/{});
952
953 TypeSanitizer TySan(M);
954 TySan.instrumentGlobals(M);
955 appendToGlobalCtors(M, F: TysanCtorFunction, Priority: 0);
956
957 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
958 for (Function &F : M) {
959 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(IR&: F);
960 TySan.sanitizeFunction(F, TLI);
961 if (ClVerifyOutlinedInstrumentation && ClOutlineInstrumentation) {
962 // Outlined instrumentation is a new option, and so this exists to
963 // verify there is no difference in behaviour between the options.
964 // If the outlined instrumentation triggers a verification failure
965 // when the original inlined instrumentation does not, or vice versa,
966 // then there is a discrepency which should be investigated.
967 ClOutlineInstrumentation = false;
968 TySan.sanitizeFunction(F, TLI);
969 ClOutlineInstrumentation = true;
970 }
971 }
972
973 return PreservedAnalyses::none();
974}
975