1 | //===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #include "AArch64.h" |
12 | #include "llvm/BinaryFormat/ELF.h" |
13 | #include "llvm/IR/Attributes.h" |
14 | #include "llvm/IR/Constants.h" |
15 | #include "llvm/IR/GlobalValue.h" |
16 | #include "llvm/IR/GlobalVariable.h" |
17 | #include "llvm/IR/IRBuilder.h" |
18 | #include "llvm/IR/Module.h" |
19 | #include "llvm/Pass.h" |
20 | #include "llvm/Support/raw_ostream.h" |
21 | |
22 | #include <algorithm> |
23 | #include <set> |
24 | |
25 | using namespace llvm; |
26 | |
27 | static const Align kTagGranuleSize = Align(16); |
28 | |
29 | static bool shouldTagGlobal(GlobalVariable &G) { |
30 | if (!G.isTagged()) |
31 | return false; |
32 | |
33 | assert(G.hasSanitizerMetadata() && |
34 | "Missing sanitizer metadata, but symbol is apparently tagged." ); |
35 | GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata(); |
36 | |
37 | // For now, don't instrument constant data, as it'll be in .rodata anyway. It |
38 | // may be worth instrumenting these in future to stop them from being used as |
39 | // gadgets. |
40 | if (G.getName().starts_with(Prefix: "llvm." ) || G.isThreadLocal() || G.isConstant()) { |
41 | Meta.Memtag = false; |
42 | G.setSanitizerMetadata(Meta); |
43 | return false; |
44 | } |
45 | |
46 | // Globals can be placed implicitly or explicitly in sections. There's two |
47 | // different types of globals that meet this criteria that cause problems: |
48 | // 1. Function pointers that are going into various init arrays (either |
49 | // explicitly through `__attribute__((section(<foo>)))` or implicitly |
50 | // through `__attribute__((constructor)))`, such as ".(pre)init(_array)", |
51 | // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up |
52 | // overaligned and overpadded, making iterating over them problematic, and |
53 | // each function pointer is individually tagged (so the iteration over |
54 | // them causes SIGSEGV/MTE[AS]ERR). |
55 | // 2. Global variables put into an explicit section, where the section's name |
56 | // is a valid C-style identifier. The linker emits a `__start_<name>` and |
57 | // `__stop_<na,e>` symbol for the section, so that you can iterate over |
58 | // globals within this section. Unfortunately, again, these globals would |
59 | // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR. |
60 | // |
61 | // To mitigate both these cases, and because specifying a section is rare |
62 | // outside of these two cases, disable MTE protection for globals in any |
63 | // section. |
64 | if (G.hasSection()) { |
65 | Meta.Memtag = false; |
66 | G.setSanitizerMetadata(Meta); |
67 | return false; |
68 | } |
69 | |
70 | return true; |
71 | } |
72 | |
73 | // Technically, due to ELF symbol interposition semantics, we can't change the |
74 | // alignment or size of symbols. If we increase the alignment or size of a |
75 | // symbol, the compiler may make optimisations based on this new alignment or |
76 | // size. If the symbol is interposed, this optimisation could lead to |
77 | // alignment-related or OOB read/write crashes. |
78 | // |
79 | // This is handled in the linker. When the linker sees multiple declarations of |
80 | // a global variable, and some are tagged, and some are untagged, it resolves it |
81 | // to be an untagged definition - but preserves the tag-granule-rounded size and |
82 | // tag-granule-alignment. This should prevent these kind of crashes intra-DSO. |
83 | // For cross-DSO, it's been a reasonable contract that if you're interposing a |
84 | // sanitizer-instrumented global, then the interposer also needs to be |
85 | // sanitizer-instrumented. |
86 | // |
87 | // FIXME: In theory, this can be fixed by splitting the size/alignment of |
88 | // globals into two uses: an "output alignment" that's emitted to the ELF file, |
89 | // and an "optimisation alignment" that's used for optimisation. Thus, we could |
90 | // adjust the output alignment only, and still optimise based on the pessimistic |
91 | // pre-tagging size/alignment. |
92 | static void tagGlobalDefinition(Module &M, GlobalVariable *G) { |
93 | Constant *Initializer = G->getInitializer(); |
94 | uint64_t SizeInBytes = |
95 | M.getDataLayout().getTypeAllocSize(Ty: Initializer->getType()); |
96 | |
97 | uint64_t NewSize = alignTo(Size: SizeInBytes, A: kTagGranuleSize); |
98 | if (SizeInBytes != NewSize) { |
99 | // Pad the initializer out to the next multiple of 16 bytes. |
100 | llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0); |
101 | Constant *Padding = ConstantDataArray::get(Context&: M.getContext(), Elts&: Init); |
102 | Initializer = ConstantStruct::getAnon(V: {Initializer, Padding}); |
103 | auto *NewGV = new GlobalVariable( |
104 | M, Initializer->getType(), G->isConstant(), G->getLinkage(), |
105 | Initializer, "" , G, G->getThreadLocalMode(), G->getAddressSpace()); |
106 | NewGV->copyAttributesFrom(Src: G); |
107 | NewGV->setComdat(G->getComdat()); |
108 | NewGV->copyMetadata(Src: G, Offset: 0); |
109 | |
110 | NewGV->takeName(V: G); |
111 | G->replaceAllUsesWith(V: NewGV); |
112 | G->eraseFromParent(); |
113 | G = NewGV; |
114 | } |
115 | |
116 | G->setAlignment(std::max(a: G->getAlign().valueOrOne(), b: kTagGranuleSize)); |
117 | |
118 | // Ensure that tagged globals don't get merged by ICF - as they should have |
119 | // different tags at runtime. |
120 | G->setUnnamedAddr(GlobalValue::UnnamedAddr::None); |
121 | } |
122 | |
123 | namespace { |
124 | class AArch64GlobalsTagging : public ModulePass { |
125 | public: |
126 | static char ID; |
127 | |
128 | explicit AArch64GlobalsTagging() : ModulePass(ID) { |
129 | initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry()); |
130 | } |
131 | |
132 | bool runOnModule(Module &M) override; |
133 | |
134 | StringRef getPassName() const override { return "AArch64 Globals Tagging" ; } |
135 | |
136 | private: |
137 | std::set<GlobalVariable *> GlobalsToTag; |
138 | }; |
139 | } // anonymous namespace |
140 | |
141 | char AArch64GlobalsTagging::ID = 0; |
142 | |
143 | bool AArch64GlobalsTagging::runOnModule(Module &M) { |
144 | // No mutating the globals in-place, or iterator invalidation occurs. |
145 | std::vector<GlobalVariable *> GlobalsToTag; |
146 | for (GlobalVariable &G : M.globals()) { |
147 | if (G.isDeclaration() || !shouldTagGlobal(G)) |
148 | continue; |
149 | GlobalsToTag.push_back(x: &G); |
150 | } |
151 | |
152 | for (GlobalVariable *G : GlobalsToTag) { |
153 | tagGlobalDefinition(M, G); |
154 | } |
155 | |
156 | return true; |
157 | } |
158 | |
159 | INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging" , |
160 | "AArch64 Globals Tagging Pass" , false, false) |
161 | INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging" , |
162 | "AArch64 Globals Tagging Pass" , false, false) |
163 | |
164 | ModulePass *llvm::createAArch64GlobalsTaggingPass() { |
165 | return new AArch64GlobalsTagging(); |
166 | } |
167 | |