1//===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of SanitizerBinaryMetadata.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CaptureTracking.h"
21#include "llvm/Analysis/ValueTracking.h"
22#include "llvm/IR/Constant.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/GlobalValue.h"
26#include "llvm/IR/GlobalVariable.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/Module.h"
34#include "llvm/IR/Type.h"
35#include "llvm/IR/Value.h"
36#include "llvm/ProfileData/InstrProf.h"
37#include "llvm/Support/Allocator.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/SpecialCaseList.h"
40#include "llvm/Support/StringSaver.h"
41#include "llvm/Support/VirtualFileSystem.h"
42#include "llvm/TargetParser/Triple.h"
43#include "llvm/Transforms/Utils/ModuleUtils.h"
44
45#include <array>
46#include <cstdint>
47#include <memory>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "sanmd"
52
53namespace {
54
55//===--- Constants --------------------------------------------------------===//
56
57constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits
58constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
59constexpr int kCtorDtorPriority = 2;
60
61// Pairs of names of initialization callback functions and which section
62// contains the relevant metadata.
63class MetadataInfo {
64public:
65 const StringRef FunctionPrefix;
66 const StringRef SectionSuffix;
67
68 static const MetadataInfo Covered;
69 static const MetadataInfo Atomics;
70
71private:
72 // Forbid construction elsewhere.
73 explicit constexpr MetadataInfo(StringRef FunctionPrefix,
74 StringRef SectionSuffix)
75 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
76};
77const MetadataInfo MetadataInfo::Covered{
78 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
79const MetadataInfo MetadataInfo::Atomics{
80 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
81
82// The only instances of MetadataInfo are the constants above, so a set of
83// them may simply store pointers to them. To deterministically generate code,
84// we need to use a set with stable iteration order, such as SetVector.
85using MetadataInfoSet = SetVector<const MetadataInfo *>;
86
87//===--- Command-line options ---------------------------------------------===//
88
89cl::opt<bool> ClWeakCallbacks(
90 "sanitizer-metadata-weak-callbacks",
91 cl::desc("Declare callbacks extern weak, and only call if non-null."),
92 cl::Hidden, cl::init(Val: true));
93cl::opt<bool>
94 ClNoSanitize("sanitizer-metadata-nosanitize-attr",
95 cl::desc("Mark some metadata features uncovered in functions "
96 "with associated no_sanitize attributes."),
97 cl::Hidden, cl::init(Val: true));
98
99cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
100 cl::desc("Emit PCs for covered functions."),
101 cl::Hidden, cl::init(Val: false));
102cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
103 cl::desc("Emit PCs for atomic operations."),
104 cl::Hidden, cl::init(Val: false));
105cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
106 cl::desc("Emit PCs for start of functions that are "
107 "subject for use-after-return checking"),
108 cl::Hidden, cl::init(Val: false));
109
110//===--- Statistics -------------------------------------------------------===//
111
112STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
113STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
114STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
115
116//===----------------------------------------------------------------------===//
117
118// Apply opt overrides.
119SanitizerBinaryMetadataOptions &&
120transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
121 Opts.Covered |= ClEmitCovered;
122 Opts.Atomics |= ClEmitAtomics;
123 Opts.UAR |= ClEmitUAR;
124 return std::move(Opts);
125}
126
127class SanitizerBinaryMetadata {
128public:
129 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
130 std::unique_ptr<SpecialCaseList> Ignorelist)
131 : Mod(M), Options(transformOptionsFromCl(Opts: std::move(Opts))),
132 Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
133 VersionStr(utostr(X: getVersion())), IRB(M.getContext()) {
134 // FIXME: Make it work with other formats.
135 assert(TargetTriple.isOSBinFormatELF() && "ELF only");
136 assert(!TargetTriple.isGPU() && "Device targets are not supported");
137 }
138
139 bool run();
140
141private:
142 uint32_t getVersion() const {
143 uint32_t Version = kVersionBase;
144 const auto CM = Mod.getCodeModel();
145 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
146 Version |= kVersionPtrSizeRel;
147 return Version;
148 }
149
150 void runOn(Function &F, MetadataInfoSet &MIS);
151
152 // Determines which set of metadata to collect for this instruction.
153 //
154 // Returns true if covered metadata is required to unambiguously interpret
155 // other metadata. For example, if we are interested in atomics metadata, any
156 // function with memory operations (atomic or not) requires covered metadata
157 // to determine if a memory operation is atomic or not in modules compiled
158 // with SanitizerBinaryMetadata.
159 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
160 uint64_t &FeatureMask);
161
162 // Get start/end section marker pointer.
163 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
164
165 // Returns the target-dependent section name.
166 StringRef getSectionName(StringRef SectionSuffix);
167
168 // Returns the section start marker name.
169 StringRef getSectionStart(StringRef SectionSuffix);
170
171 // Returns the section end marker name.
172 StringRef getSectionEnd(StringRef SectionSuffix);
173
174 // Returns true if the access to the address should be considered "atomic".
175 bool pretendAtomicAccess(const Value *Addr);
176
177 Module &Mod;
178 const SanitizerBinaryMetadataOptions Options;
179 std::unique_ptr<SpecialCaseList> Ignorelist;
180 const Triple TargetTriple;
181 const std::string VersionStr;
182 IRBuilder<> IRB;
183 BumpPtrAllocator Alloc;
184 UniqueStringSaver StringPool{Alloc};
185};
186
187bool SanitizerBinaryMetadata::run() {
188 MetadataInfoSet MIS;
189
190 for (Function &F : Mod)
191 runOn(F, MIS);
192
193 if (MIS.empty())
194 return false;
195
196 //
197 // Setup constructors and call all initialization functions for requested
198 // metadata features.
199 //
200
201 auto *PtrTy = IRB.getPtrTy();
202 auto *Int32Ty = IRB.getInt32Ty();
203 const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
204 auto *Version = ConstantInt::get(Ty: Int32Ty, V: getVersion());
205
206 for (const MetadataInfo *MI : MIS) {
207 const std::array<Value *, InitTypes.size()> InitArgs = {
208 Version,
209 getSectionMarker(MarkerName: getSectionStart(SectionSuffix: MI->SectionSuffix), Ty: PtrTy),
210 getSectionMarker(MarkerName: getSectionEnd(SectionSuffix: MI->SectionSuffix), Ty: PtrTy),
211 };
212
213 // Calls to the initialization functions with different versions cannot be
214 // merged. Give the structors unique names based on the version, which will
215 // also be used as the COMDAT key.
216 const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str();
217
218 // We declare the _add and _del functions as weak, and only call them if
219 // there is a valid symbol linked. This allows building binaries with
220 // semantic metadata, but without having callbacks. When a tool that wants
221 // the metadata is linked which provides the callbacks, they will be called.
222 Function *Ctor =
223 createSanitizerCtorAndInitFunctions(
224 M&: Mod, CtorName: StructorPrefix + ".module_ctor",
225 InitName: (MI->FunctionPrefix + "_add").str(), InitArgTypes: InitTypes, InitArgs,
226 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
227 .first;
228 Function *Dtor =
229 createSanitizerCtorAndInitFunctions(
230 M&: Mod, CtorName: StructorPrefix + ".module_dtor",
231 InitName: (MI->FunctionPrefix + "_del").str(), InitArgTypes: InitTypes, InitArgs,
232 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
233 .first;
234 Constant *CtorComdatKey = nullptr;
235 Constant *DtorComdatKey = nullptr;
236 if (TargetTriple.supportsCOMDAT()) {
237 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
238 // key needs to be a non-local linkage.
239 Ctor->setComdat(Mod.getOrInsertComdat(Name: Ctor->getName()));
240 Dtor->setComdat(Mod.getOrInsertComdat(Name: Dtor->getName()));
241 Ctor->setLinkage(GlobalValue::ExternalLinkage);
242 Dtor->setLinkage(GlobalValue::ExternalLinkage);
243 // DSOs should _not_ call another constructor/destructor!
244 Ctor->setVisibility(GlobalValue::HiddenVisibility);
245 Dtor->setVisibility(GlobalValue::HiddenVisibility);
246 CtorComdatKey = Ctor;
247 DtorComdatKey = Dtor;
248 }
249 appendToGlobalCtors(M&: Mod, F: Ctor, Priority: kCtorDtorPriority, Data: CtorComdatKey);
250 appendToGlobalDtors(M&: Mod, F: Dtor, Priority: kCtorDtorPriority, Data: DtorComdatKey);
251 }
252
253 return true;
254}
255
256void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
257 if (F.empty())
258 return;
259 // Do not apply any instrumentation for naked functions.
260 if (F.hasFnAttribute(Kind: Attribute::Naked))
261 return;
262 if (F.hasFnAttribute(Kind: Attribute::DisableSanitizerInstrumentation))
263 return;
264 if (Ignorelist && Ignorelist->inSection(Section: "metadata", Prefix: "fun", Query: F.getName()))
265 return;
266 // Don't touch available_externally functions, their actual body is elsewhere.
267 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
268 return;
269
270 MDBuilder MDB(F.getContext());
271
272 // The metadata features enabled for this function, stored along covered
273 // metadata (if enabled).
274 uint64_t FeatureMask = 0;
275 // Don't emit unnecessary covered metadata for all functions to save space.
276 bool RequiresCovered = false;
277
278 if (Options.Atomics || Options.UAR) {
279 for (BasicBlock &BB : F)
280 for (Instruction &I : BB)
281 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
282 }
283
284 if (ClNoSanitize && F.hasFnAttribute(Kind: "no_sanitize_thread"))
285 FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
286 if (F.isVarArg())
287 FeatureMask &= ~kSanitizerBinaryMetadataUAR;
288 if (FeatureMask & kSanitizerBinaryMetadataUAR) {
289 RequiresCovered = true;
290 NumMetadataUAR++;
291 }
292
293 // Covered metadata is always emitted if explicitly requested, otherwise only
294 // if some other metadata requires it to unambiguously interpret it for
295 // modules compiled with SanitizerBinaryMetadata.
296 if (Options.Covered || (FeatureMask && RequiresCovered)) {
297 NumMetadataCovered++;
298 const auto *MI = &MetadataInfo::Covered;
299 MIS.insert(X: MI);
300 const StringRef Section = getSectionName(SectionSuffix: MI->SectionSuffix);
301 // The feature mask will be placed after the function size.
302 Constant *CFM = IRB.getInt64(C: FeatureMask);
303 F.setMetadata(KindID: LLVMContext::MD_pcsections,
304 Node: MDB.createPCSections(Sections: {{Section, {CFM}}}));
305 }
306}
307
308bool isUARSafeCall(CallInst *CI) {
309 auto *F = CI->getCalledFunction();
310 // There are no intrinsic functions that leak arguments.
311 // If the called function does not return, the current function
312 // does not return as well, so no possibility of use-after-return.
313 // Sanitizer function also don't leak or don't return.
314 // It's safe to both pass pointers to local variables to them
315 // and to tail-call them.
316 return F && (F->isIntrinsic() || F->doesNotReturn() ||
317 F->getName().starts_with(Prefix: "__asan_") ||
318 F->getName().starts_with(Prefix: "__hwsan_") ||
319 F->getName().starts_with(Prefix: "__ubsan_") ||
320 F->getName().starts_with(Prefix: "__msan_") ||
321 F->getName().starts_with(Prefix: "__tsan_"));
322}
323
324bool hasUseAfterReturnUnsafeUses(Value &V) {
325 for (User *U : V.users()) {
326 if (auto *I = dyn_cast<Instruction>(Val: U)) {
327 if (I->isLifetimeStartOrEnd() || I->isDroppable())
328 continue;
329 if (auto *CI = dyn_cast<CallInst>(Val: U)) {
330 if (isUARSafeCall(CI))
331 continue;
332 }
333 if (isa<LoadInst>(Val: U))
334 continue;
335 if (auto *SI = dyn_cast<StoreInst>(Val: U)) {
336 // If storing TO the alloca, then the address isn't taken.
337 if (SI->getOperand(i_nocapture: 1) == &V)
338 continue;
339 }
340 if (auto *GEPI = dyn_cast<GetElementPtrInst>(Val: U)) {
341 if (!hasUseAfterReturnUnsafeUses(V&: *GEPI))
342 continue;
343 } else if (auto *BCI = dyn_cast<BitCastInst>(Val: U)) {
344 if (!hasUseAfterReturnUnsafeUses(V&: *BCI))
345 continue;
346 }
347 }
348 return true;
349 }
350 return false;
351}
352
353bool useAfterReturnUnsafe(Instruction &I) {
354 if (isa<AllocaInst>(Val: I))
355 return hasUseAfterReturnUnsafeUses(V&: I);
356 // Tail-called functions are not necessary intercepted
357 // at runtime because there is no call instruction.
358 // So conservatively mark the caller as requiring checking.
359 else if (auto *CI = dyn_cast<CallInst>(Val: &I))
360 return CI->isTailCall() && !isUARSafeCall(CI);
361 return false;
362}
363
364bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
365 if (!Addr)
366 return false;
367
368 Addr = Addr->stripInBoundsOffsets();
369 auto *GV = dyn_cast<GlobalVariable>(Val: Addr);
370 if (!GV)
371 return false;
372
373 // Some compiler-generated accesses are known racy, to avoid false positives
374 // in data-race analysis pretend they're atomic.
375 if (GV->hasSection()) {
376 const auto OF = Mod.getTargetTriple().getObjectFormat();
377 const auto ProfSec =
378 getInstrProfSectionName(IPSK: IPSK_cnts, OF, /*AddSegmentInfo=*/false);
379 if (GV->getSection().ends_with(Suffix: ProfSec))
380 return true;
381 }
382 if (GV->getName().starts_with(Prefix: "__llvm_gcov") ||
383 GV->getName().starts_with(Prefix: "__llvm_gcda"))
384 return true;
385
386 return false;
387}
388
389// Returns true if the memory at `Addr` may be shared with other threads.
390bool maybeSharedMutable(const Value *Addr) {
391 // By default assume memory may be shared.
392 if (!Addr)
393 return true;
394
395 const AllocaInst *AI = findAllocaForValue(V: Addr);
396 if (AI && !PointerMayBeCaptured(V: AI, /*ReturnCaptures=*/true))
397 return false; // Object is on stack but does not escape.
398
399 Addr = Addr->stripInBoundsOffsets();
400 if (auto *GV = dyn_cast<GlobalVariable>(Val: Addr)) {
401 if (GV->isConstant())
402 return false; // Shared, but not mutable.
403 }
404
405 return true;
406}
407
408bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
409 MDBuilder &MDB, uint64_t &FeatureMask) {
410 SmallVector<const MetadataInfo *, 1> InstMetadata;
411 bool RequiresCovered = false;
412
413 // Only call if at least 1 type of metadata is requested.
414 assert(Options.UAR || Options.Atomics);
415
416 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
417 if (useAfterReturnUnsafe(I))
418 FeatureMask |= kSanitizerBinaryMetadataUAR;
419 }
420
421 if (Options.Atomics) {
422 const Value *Addr = nullptr;
423 if (auto *SI = dyn_cast<StoreInst>(Val: &I))
424 Addr = SI->getPointerOperand();
425 else if (auto *LI = dyn_cast<LoadInst>(Val: &I))
426 Addr = LI->getPointerOperand();
427
428 if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
429 auto SSID = getAtomicSyncScopeID(I: &I);
430 if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
431 pretendAtomicAccess(Addr)) {
432 NumMetadataAtomics++;
433 InstMetadata.push_back(Elt: &MetadataInfo::Atomics);
434 }
435 FeatureMask |= kSanitizerBinaryMetadataAtomics;
436 RequiresCovered = true;
437 }
438 }
439
440 // Attach MD_pcsections to instruction.
441 if (!InstMetadata.empty()) {
442 MIS.insert_range(R&: InstMetadata);
443 SmallVector<MDBuilder::PCSection, 1> Sections;
444 for (const auto &MI : InstMetadata)
445 Sections.push_back(Elt: {getSectionName(SectionSuffix: MI->SectionSuffix), {}});
446 I.setMetadata(KindID: LLVMContext::MD_pcsections, Node: MDB.createPCSections(Sections));
447 }
448
449 return RequiresCovered;
450}
451
452GlobalVariable *
453SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
454 // Use ExternalWeak so that if all sections are discarded due to section
455 // garbage collection, the linker will not report undefined symbol errors.
456 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
457 GlobalVariable::ExternalWeakLinkage,
458 /*Initializer=*/nullptr, MarkerName);
459 Marker->setVisibility(GlobalValue::HiddenVisibility);
460 return Marker;
461}
462
463StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
464 // FIXME: Other TargetTriples.
465 // Request ULEB128 encoding for all integer constants.
466 return StringPool.save(S: SectionSuffix + VersionStr + "!C");
467}
468
469StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
470 // Twine only concatenates 2 strings; with >2 strings, concatenating them
471 // creates Twine temporaries, and returning the final Twine no longer works
472 // because we'd end up with a stack-use-after-return. So here we also use the
473 // StringPool to store the new string.
474 return StringPool.save(S: "__start_" + SectionSuffix + VersionStr);
475}
476
477StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
478 return StringPool.save(S: "__stop_" + SectionSuffix + VersionStr);
479}
480
481} // namespace
482
483SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
484 SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
485 : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
486
487PreservedAnalyses
488SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
489 std::unique_ptr<SpecialCaseList> Ignorelist;
490 if (!IgnorelistFiles.empty()) {
491 Ignorelist = SpecialCaseList::createOrDie(Paths: IgnorelistFiles,
492 FS&: *vfs::getRealFileSystem());
493 if (Ignorelist->inSection(Section: "metadata", Prefix: "src", Query: M.getSourceFileName()))
494 return PreservedAnalyses::all();
495 }
496
497 SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
498 if (Pass.run())
499 return PreservedAnalyses::none();
500 return PreservedAnalyses::all();
501}
502