1//===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of SanitizerBinaryMetadata.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CaptureTracking.h"
21#include "llvm/Analysis/ValueTracking.h"
22#include "llvm/IR/Constant.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/GlobalValue.h"
26#include "llvm/IR/GlobalVariable.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/Module.h"
34#include "llvm/IR/Type.h"
35#include "llvm/IR/Value.h"
36#include "llvm/ProfileData/InstrProf.h"
37#include "llvm/Support/Allocator.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/SpecialCaseList.h"
41#include "llvm/Support/StringSaver.h"
42#include "llvm/Support/VirtualFileSystem.h"
43#include "llvm/TargetParser/Triple.h"
44#include "llvm/Transforms/Utils/ModuleUtils.h"
45
46#include <array>
47#include <cstdint>
48#include <memory>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "sanmd"
53
54namespace {
55
56//===--- Constants --------------------------------------------------------===//
57
58constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits
59constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
60constexpr int kCtorDtorPriority = 2;
61
62// Pairs of names of initialization callback functions and which section
63// contains the relevant metadata.
64class MetadataInfo {
65public:
66 const StringRef FunctionPrefix;
67 const StringRef SectionSuffix;
68
69 static const MetadataInfo Covered;
70 static const MetadataInfo Atomics;
71
72private:
73 // Forbid construction elsewhere.
74 explicit constexpr MetadataInfo(StringRef FunctionPrefix,
75 StringRef SectionSuffix)
76 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
77};
78const MetadataInfo MetadataInfo::Covered{
79 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
80const MetadataInfo MetadataInfo::Atomics{
81 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
82
83// The only instances of MetadataInfo are the constants above, so a set of
84// them may simply store pointers to them. To deterministically generate code,
85// we need to use a set with stable iteration order, such as SetVector.
86using MetadataInfoSet = SetVector<const MetadataInfo *>;
87
88//===--- Command-line options ---------------------------------------------===//
89
90cl::opt<bool> ClWeakCallbacks(
91 "sanitizer-metadata-weak-callbacks",
92 cl::desc("Declare callbacks extern weak, and only call if non-null."),
93 cl::Hidden, cl::init(Val: true));
94cl::opt<bool>
95 ClNoSanitize("sanitizer-metadata-nosanitize-attr",
96 cl::desc("Mark some metadata features uncovered in functions "
97 "with associated no_sanitize attributes."),
98 cl::Hidden, cl::init(Val: true));
99
100cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
101 cl::desc("Emit PCs for covered functions."),
102 cl::Hidden, cl::init(Val: false));
103cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
104 cl::desc("Emit PCs for atomic operations."),
105 cl::Hidden, cl::init(Val: false));
106cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
107 cl::desc("Emit PCs for start of functions that are "
108 "subject for use-after-return checking"),
109 cl::Hidden, cl::init(Val: false));
110
111//===--- Statistics -------------------------------------------------------===//
112
113STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
114STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
115STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
116
117//===----------------------------------------------------------------------===//
118
119// Apply opt overrides.
120SanitizerBinaryMetadataOptions &&
121transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
122 Opts.Covered |= ClEmitCovered;
123 Opts.Atomics |= ClEmitAtomics;
124 Opts.UAR |= ClEmitUAR;
125 return std::move(Opts);
126}
127
128class SanitizerBinaryMetadata {
129public:
130 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
131 std::unique_ptr<SpecialCaseList> Ignorelist)
132 : Mod(M), Options(transformOptionsFromCl(Opts: std::move(Opts))),
133 Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
134 VersionStr(utostr(X: getVersion())), IRB(M.getContext()) {
135 // FIXME: Make it work with other formats.
136 assert(TargetTriple.isOSBinFormatELF() && "ELF only");
137 assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
138 "Device targets are not supported");
139 }
140
141 bool run();
142
143private:
144 uint32_t getVersion() const {
145 uint32_t Version = kVersionBase;
146 const auto CM = Mod.getCodeModel();
147 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
148 Version |= kVersionPtrSizeRel;
149 return Version;
150 }
151
152 void runOn(Function &F, MetadataInfoSet &MIS);
153
154 // Determines which set of metadata to collect for this instruction.
155 //
156 // Returns true if covered metadata is required to unambiguously interpret
157 // other metadata. For example, if we are interested in atomics metadata, any
158 // function with memory operations (atomic or not) requires covered metadata
159 // to determine if a memory operation is atomic or not in modules compiled
160 // with SanitizerBinaryMetadata.
161 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
162 uint64_t &FeatureMask);
163
164 // Get start/end section marker pointer.
165 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
166
167 // Returns the target-dependent section name.
168 StringRef getSectionName(StringRef SectionSuffix);
169
170 // Returns the section start marker name.
171 StringRef getSectionStart(StringRef SectionSuffix);
172
173 // Returns the section end marker name.
174 StringRef getSectionEnd(StringRef SectionSuffix);
175
176 // Returns true if the access to the address should be considered "atomic".
177 bool pretendAtomicAccess(const Value *Addr);
178
179 Module &Mod;
180 const SanitizerBinaryMetadataOptions Options;
181 std::unique_ptr<SpecialCaseList> Ignorelist;
182 const Triple TargetTriple;
183 const std::string VersionStr;
184 IRBuilder<> IRB;
185 BumpPtrAllocator Alloc;
186 UniqueStringSaver StringPool{Alloc};
187};
188
189bool SanitizerBinaryMetadata::run() {
190 MetadataInfoSet MIS;
191
192 for (Function &F : Mod)
193 runOn(F, MIS);
194
195 if (MIS.empty())
196 return false;
197
198 //
199 // Setup constructors and call all initialization functions for requested
200 // metadata features.
201 //
202
203 auto *PtrTy = IRB.getPtrTy();
204 auto *Int32Ty = IRB.getInt32Ty();
205 const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
206 auto *Version = ConstantInt::get(Ty: Int32Ty, V: getVersion());
207
208 for (const MetadataInfo *MI : MIS) {
209 const std::array<Value *, InitTypes.size()> InitArgs = {
210 Version,
211 getSectionMarker(MarkerName: getSectionStart(SectionSuffix: MI->SectionSuffix), Ty: PtrTy),
212 getSectionMarker(MarkerName: getSectionEnd(SectionSuffix: MI->SectionSuffix), Ty: PtrTy),
213 };
214
215 // Calls to the initialization functions with different versions cannot be
216 // merged. Give the structors unique names based on the version, which will
217 // also be used as the COMDAT key.
218 const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str();
219
220 // We declare the _add and _del functions as weak, and only call them if
221 // there is a valid symbol linked. This allows building binaries with
222 // semantic metadata, but without having callbacks. When a tool that wants
223 // the metadata is linked which provides the callbacks, they will be called.
224 Function *Ctor =
225 createSanitizerCtorAndInitFunctions(
226 M&: Mod, CtorName: StructorPrefix + ".module_ctor",
227 InitName: (MI->FunctionPrefix + "_add").str(), InitArgTypes: InitTypes, InitArgs,
228 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
229 .first;
230 Function *Dtor =
231 createSanitizerCtorAndInitFunctions(
232 M&: Mod, CtorName: StructorPrefix + ".module_dtor",
233 InitName: (MI->FunctionPrefix + "_del").str(), InitArgTypes: InitTypes, InitArgs,
234 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
235 .first;
236 Constant *CtorComdatKey = nullptr;
237 Constant *DtorComdatKey = nullptr;
238 if (TargetTriple.supportsCOMDAT()) {
239 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
240 // key needs to be a non-local linkage.
241 Ctor->setComdat(Mod.getOrInsertComdat(Name: Ctor->getName()));
242 Dtor->setComdat(Mod.getOrInsertComdat(Name: Dtor->getName()));
243 Ctor->setLinkage(GlobalValue::ExternalLinkage);
244 Dtor->setLinkage(GlobalValue::ExternalLinkage);
245 // DSOs should _not_ call another constructor/destructor!
246 Ctor->setVisibility(GlobalValue::HiddenVisibility);
247 Dtor->setVisibility(GlobalValue::HiddenVisibility);
248 CtorComdatKey = Ctor;
249 DtorComdatKey = Dtor;
250 }
251 appendToGlobalCtors(M&: Mod, F: Ctor, Priority: kCtorDtorPriority, Data: CtorComdatKey);
252 appendToGlobalDtors(M&: Mod, F: Dtor, Priority: kCtorDtorPriority, Data: DtorComdatKey);
253 }
254
255 return true;
256}
257
258void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
259 if (F.empty())
260 return;
261 if (F.hasFnAttribute(Kind: Attribute::DisableSanitizerInstrumentation))
262 return;
263 if (Ignorelist && Ignorelist->inSection(Section: "metadata", Prefix: "fun", Query: F.getName()))
264 return;
265 // Don't touch available_externally functions, their actual body is elsewhere.
266 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
267 return;
268
269 MDBuilder MDB(F.getContext());
270
271 // The metadata features enabled for this function, stored along covered
272 // metadata (if enabled).
273 uint64_t FeatureMask = 0;
274 // Don't emit unnecessary covered metadata for all functions to save space.
275 bool RequiresCovered = false;
276
277 if (Options.Atomics || Options.UAR) {
278 for (BasicBlock &BB : F)
279 for (Instruction &I : BB)
280 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
281 }
282
283 if (ClNoSanitize && F.hasFnAttribute(Kind: "no_sanitize_thread"))
284 FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
285 if (F.isVarArg())
286 FeatureMask &= ~kSanitizerBinaryMetadataUAR;
287 if (FeatureMask & kSanitizerBinaryMetadataUAR) {
288 RequiresCovered = true;
289 NumMetadataUAR++;
290 }
291
292 // Covered metadata is always emitted if explicitly requested, otherwise only
293 // if some other metadata requires it to unambiguously interpret it for
294 // modules compiled with SanitizerBinaryMetadata.
295 if (Options.Covered || (FeatureMask && RequiresCovered)) {
296 NumMetadataCovered++;
297 const auto *MI = &MetadataInfo::Covered;
298 MIS.insert(X: MI);
299 const StringRef Section = getSectionName(SectionSuffix: MI->SectionSuffix);
300 // The feature mask will be placed after the function size.
301 Constant *CFM = IRB.getInt64(C: FeatureMask);
302 F.setMetadata(KindID: LLVMContext::MD_pcsections,
303 Node: MDB.createPCSections(Sections: {{Section, {CFM}}}));
304 }
305}
306
307bool isUARSafeCall(CallInst *CI) {
308 auto *F = CI->getCalledFunction();
309 // There are no intrinsic functions that leak arguments.
310 // If the called function does not return, the current function
311 // does not return as well, so no possibility of use-after-return.
312 // Sanitizer function also don't leak or don't return.
313 // It's safe to both pass pointers to local variables to them
314 // and to tail-call them.
315 return F && (F->isIntrinsic() || F->doesNotReturn() ||
316 F->getName().starts_with(Prefix: "__asan_") ||
317 F->getName().starts_with(Prefix: "__hwsan_") ||
318 F->getName().starts_with(Prefix: "__ubsan_") ||
319 F->getName().starts_with(Prefix: "__msan_") ||
320 F->getName().starts_with(Prefix: "__tsan_"));
321}
322
323bool hasUseAfterReturnUnsafeUses(Value &V) {
324 for (User *U : V.users()) {
325 if (auto *I = dyn_cast<Instruction>(Val: U)) {
326 if (I->isLifetimeStartOrEnd() || I->isDroppable())
327 continue;
328 if (auto *CI = dyn_cast<CallInst>(Val: U)) {
329 if (isUARSafeCall(CI))
330 continue;
331 }
332 if (isa<LoadInst>(Val: U))
333 continue;
334 if (auto *SI = dyn_cast<StoreInst>(Val: U)) {
335 // If storing TO the alloca, then the address isn't taken.
336 if (SI->getOperand(i_nocapture: 1) == &V)
337 continue;
338 }
339 if (auto *GEPI = dyn_cast<GetElementPtrInst>(Val: U)) {
340 if (!hasUseAfterReturnUnsafeUses(V&: *GEPI))
341 continue;
342 } else if (auto *BCI = dyn_cast<BitCastInst>(Val: U)) {
343 if (!hasUseAfterReturnUnsafeUses(V&: *BCI))
344 continue;
345 }
346 }
347 return true;
348 }
349 return false;
350}
351
352bool useAfterReturnUnsafe(Instruction &I) {
353 if (isa<AllocaInst>(Val: I))
354 return hasUseAfterReturnUnsafeUses(V&: I);
355 // Tail-called functions are not necessary intercepted
356 // at runtime because there is no call instruction.
357 // So conservatively mark the caller as requiring checking.
358 else if (auto *CI = dyn_cast<CallInst>(Val: &I))
359 return CI->isTailCall() && !isUARSafeCall(CI);
360 return false;
361}
362
363bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
364 if (!Addr)
365 return false;
366
367 Addr = Addr->stripInBoundsOffsets();
368 auto *GV = dyn_cast<GlobalVariable>(Val: Addr);
369 if (!GV)
370 return false;
371
372 // Some compiler-generated accesses are known racy, to avoid false positives
373 // in data-race analysis pretend they're atomic.
374 if (GV->hasSection()) {
375 const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
376 const auto ProfSec =
377 getInstrProfSectionName(IPSK: IPSK_cnts, OF, /*AddSegmentInfo=*/false);
378 if (GV->getSection().ends_with(Suffix: ProfSec))
379 return true;
380 }
381 if (GV->getName().starts_with(Prefix: "__llvm_gcov") ||
382 GV->getName().starts_with(Prefix: "__llvm_gcda"))
383 return true;
384
385 return false;
386}
387
388// Returns true if the memory at `Addr` may be shared with other threads.
389bool maybeSharedMutable(const Value *Addr) {
390 // By default assume memory may be shared.
391 if (!Addr)
392 return true;
393
394 if (isa<AllocaInst>(Val: getUnderlyingObject(V: Addr)) &&
395 !PointerMayBeCaptured(V: Addr, ReturnCaptures: true, StoreCaptures: true))
396 return false; // Object is on stack but does not escape.
397
398 Addr = Addr->stripInBoundsOffsets();
399 if (auto *GV = dyn_cast<GlobalVariable>(Val: Addr)) {
400 if (GV->isConstant())
401 return false; // Shared, but not mutable.
402 }
403
404 return true;
405}
406
407bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
408 MDBuilder &MDB, uint64_t &FeatureMask) {
409 SmallVector<const MetadataInfo *, 1> InstMetadata;
410 bool RequiresCovered = false;
411
412 // Only call if at least 1 type of metadata is requested.
413 assert(Options.UAR || Options.Atomics);
414
415 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
416 if (useAfterReturnUnsafe(I))
417 FeatureMask |= kSanitizerBinaryMetadataUAR;
418 }
419
420 if (Options.Atomics) {
421 const Value *Addr = nullptr;
422 if (auto *SI = dyn_cast<StoreInst>(Val: &I))
423 Addr = SI->getPointerOperand();
424 else if (auto *LI = dyn_cast<LoadInst>(Val: &I))
425 Addr = LI->getPointerOperand();
426
427 if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
428 auto SSID = getAtomicSyncScopeID(I: &I);
429 if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
430 pretendAtomicAccess(Addr)) {
431 NumMetadataAtomics++;
432 InstMetadata.push_back(Elt: &MetadataInfo::Atomics);
433 }
434 FeatureMask |= kSanitizerBinaryMetadataAtomics;
435 RequiresCovered = true;
436 }
437 }
438
439 // Attach MD_pcsections to instruction.
440 if (!InstMetadata.empty()) {
441 MIS.insert(Start: InstMetadata.begin(), End: InstMetadata.end());
442 SmallVector<MDBuilder::PCSection, 1> Sections;
443 for (const auto &MI : InstMetadata)
444 Sections.push_back(Elt: {getSectionName(SectionSuffix: MI->SectionSuffix), {}});
445 I.setMetadata(KindID: LLVMContext::MD_pcsections, Node: MDB.createPCSections(Sections));
446 }
447
448 return RequiresCovered;
449}
450
451GlobalVariable *
452SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
453 // Use ExternalWeak so that if all sections are discarded due to section
454 // garbage collection, the linker will not report undefined symbol errors.
455 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
456 GlobalVariable::ExternalWeakLinkage,
457 /*Initializer=*/nullptr, MarkerName);
458 Marker->setVisibility(GlobalValue::HiddenVisibility);
459 return Marker;
460}
461
462StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
463 // FIXME: Other TargetTriples.
464 // Request ULEB128 encoding for all integer constants.
465 return StringPool.save(S: SectionSuffix + VersionStr + "!C");
466}
467
468StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
469 // Twine only concatenates 2 strings; with >2 strings, concatenating them
470 // creates Twine temporaries, and returning the final Twine no longer works
471 // because we'd end up with a stack-use-after-return. So here we also use the
472 // StringPool to store the new string.
473 return StringPool.save(S: "__start_" + SectionSuffix + VersionStr);
474}
475
476StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
477 return StringPool.save(S: "__stop_" + SectionSuffix + VersionStr);
478}
479
480} // namespace
481
482SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
483 SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
484 : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
485
486PreservedAnalyses
487SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
488 std::unique_ptr<SpecialCaseList> Ignorelist;
489 if (!IgnorelistFiles.empty()) {
490 Ignorelist = SpecialCaseList::createOrDie(Paths: IgnorelistFiles,
491 FS&: *vfs::getRealFileSystem());
492 if (Ignorelist->inSection(Section: "metadata", Prefix: "src", Query: M.getSourceFileName()))
493 return PreservedAnalyses::all();
494 }
495
496 SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
497 if (Pass.run())
498 return PreservedAnalyses::none();
499 return PreservedAnalyses::all();
500}
501