1//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//===----------------------------------------------------------------------===//
10
11#include "AArch64.h"
12#include "llvm/BinaryFormat/ELF.h"
13#include "llvm/IR/Attributes.h"
14#include "llvm/IR/Constants.h"
15#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/GlobalVariable.h"
17#include "llvm/IR/IRBuilder.h"
18#include "llvm/IR/Module.h"
19#include "llvm/Pass.h"
20#include "llvm/Support/raw_ostream.h"
21
22#include <algorithm>
23#include <set>
24
25using namespace llvm;
26
27static const Align kTagGranuleSize = Align(16);
28
29static bool shouldTagGlobal(GlobalVariable &G) {
30 if (!G.isTagged())
31 return false;
32
33 assert(G.hasSanitizerMetadata() &&
34 "Missing sanitizer metadata, but symbol is apparently tagged.");
35 GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata();
36
37 // For now, don't instrument constant data, as it'll be in .rodata anyway. It
38 // may be worth instrumenting these in future to stop them from being used as
39 // gadgets.
40 if (G.getName().starts_with(Prefix: "llvm.") || G.isThreadLocal() || G.isConstant()) {
41 Meta.Memtag = false;
42 G.setSanitizerMetadata(Meta);
43 return false;
44 }
45
46 // Globals can be placed implicitly or explicitly in sections. There's two
47 // different types of globals that meet this criteria that cause problems:
48 // 1. Function pointers that are going into various init arrays (either
49 // explicitly through `__attribute__((section(<foo>)))` or implicitly
50 // through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
51 // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
52 // overaligned and overpadded, making iterating over them problematic, and
53 // each function pointer is individually tagged (so the iteration over
54 // them causes SIGSEGV/MTE[AS]ERR).
55 // 2. Global variables put into an explicit section, where the section's name
56 // is a valid C-style identifier. The linker emits a `__start_<name>` and
57 // `__stop_<na,e>` symbol for the section, so that you can iterate over
58 // globals within this section. Unfortunately, again, these globals would
59 // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
60 //
61 // To mitigate both these cases, and because specifying a section is rare
62 // outside of these two cases, disable MTE protection for globals in any
63 // section.
64 if (G.hasSection()) {
65 Meta.Memtag = false;
66 G.setSanitizerMetadata(Meta);
67 return false;
68 }
69
70 return true;
71}
72
73// Technically, due to ELF symbol interposition semantics, we can't change the
74// alignment or size of symbols. If we increase the alignment or size of a
75// symbol, the compiler may make optimisations based on this new alignment or
76// size. If the symbol is interposed, this optimisation could lead to
77// alignment-related or OOB read/write crashes.
78//
79// This is handled in the linker. When the linker sees multiple declarations of
80// a global variable, and some are tagged, and some are untagged, it resolves it
81// to be an untagged definition - but preserves the tag-granule-rounded size and
82// tag-granule-alignment. This should prevent these kind of crashes intra-DSO.
83// For cross-DSO, it's been a reasonable contract that if you're interposing a
84// sanitizer-instrumented global, then the interposer also needs to be
85// sanitizer-instrumented.
86//
87// FIXME: In theory, this can be fixed by splitting the size/alignment of
88// globals into two uses: an "output alignment" that's emitted to the ELF file,
89// and an "optimisation alignment" that's used for optimisation. Thus, we could
90// adjust the output alignment only, and still optimise based on the pessimistic
91// pre-tagging size/alignment.
92static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
93 Constant *Initializer = G->getInitializer();
94 uint64_t SizeInBytes =
95 M.getDataLayout().getTypeAllocSize(Ty: Initializer->getType());
96
97 uint64_t NewSize = alignTo(Size: SizeInBytes, A: kTagGranuleSize);
98 if (SizeInBytes != NewSize) {
99 // Pad the initializer out to the next multiple of 16 bytes.
100 llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0);
101 Constant *Padding = ConstantDataArray::get(Context&: M.getContext(), Elts&: Init);
102 Initializer = ConstantStruct::getAnon(V: {Initializer, Padding});
103 auto *NewGV = new GlobalVariable(
104 M, Initializer->getType(), G->isConstant(), G->getLinkage(),
105 Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace());
106 NewGV->copyAttributesFrom(Src: G);
107 NewGV->setComdat(G->getComdat());
108 NewGV->copyMetadata(Src: G, Offset: 0);
109
110 NewGV->takeName(V: G);
111 G->replaceAllUsesWith(V: NewGV);
112 G->eraseFromParent();
113 G = NewGV;
114 }
115
116 G->setAlignment(std::max(a: G->getAlign().valueOrOne(), b: kTagGranuleSize));
117
118 // Ensure that tagged globals don't get merged by ICF - as they should have
119 // different tags at runtime.
120 G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
121}
122
123namespace {
124class AArch64GlobalsTagging : public ModulePass {
125public:
126 static char ID;
127
128 explicit AArch64GlobalsTagging() : ModulePass(ID) {
129 initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry());
130 }
131
132 bool runOnModule(Module &M) override;
133
134 StringRef getPassName() const override { return "AArch64 Globals Tagging"; }
135
136private:
137 std::set<GlobalVariable *> GlobalsToTag;
138};
139} // anonymous namespace
140
141char AArch64GlobalsTagging::ID = 0;
142
143bool AArch64GlobalsTagging::runOnModule(Module &M) {
144 // No mutating the globals in-place, or iterator invalidation occurs.
145 std::vector<GlobalVariable *> GlobalsToTag;
146 for (GlobalVariable &G : M.globals()) {
147 if (G.isDeclaration() || !shouldTagGlobal(G))
148 continue;
149 GlobalsToTag.push_back(x: &G);
150 }
151
152 for (GlobalVariable *G : GlobalsToTag) {
153 tagGlobalDefinition(M, G);
154 }
155
156 return true;
157}
158
159INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging",
160 "AArch64 Globals Tagging Pass", false, false)
161INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging",
162 "AArch64 Globals Tagging Pass", false, false)
163
164ModulePass *llvm::createAArch64GlobalsTaggingPass() {
165 return new AArch64GlobalsTagging();
166}
167