1//===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the custom lowering code required by the shadow-stack GC
10// strategy.
11//
12// This pass implements the code transformation described in this paper:
13// "Accurate Garbage Collection in an Uncooperative Environment"
14// Fergus Henderson, ISMM, 2002
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/ShadowStackGCLowering.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/Analysis/DomTreeUpdater.h"
22#include "llvm/CodeGen/GCMetadata.h"
23#include "llvm/CodeGen/Passes.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Dominators.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/GlobalValue.h"
32#include "llvm/IR/GlobalVariable.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
40#include "llvm/InitializePasses.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/Alignment.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/MathExtras.h"
45#include "llvm/Transforms/Utils/EscapeEnumerator.h"
46#include <cassert>
47#include <optional>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "shadow-stack-gc-lowering"
54
55namespace {
56
57class ShadowStackGCLoweringImpl {
58 /// RootChain - This is the global linked-list that contains the chain of GC
59 /// roots.
60 GlobalVariable *Head = nullptr;
61
62 StructType *FrameMapTy = nullptr;
63
64 /// Roots - GC roots in the current function. Each is a pair of the
65 /// intrinsic call and its corresponding alloca.
66 std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
67
68 /// RootOffsets - Byte offsets and sizes of each root within the frame.
69 /// Each element is a pair of (offset, size).
70 std::vector<std::pair<uint64_t, uint64_t>> RootOffsets;
71
72public:
73 ShadowStackGCLoweringImpl() = default;
74
75 bool doInitialization(Module &M);
76 bool runOnFunction(Function &F, DomTreeUpdater *DTU);
77
78private:
79 bool IsNullValue(Value *V);
80 Constant *GetFrameMap(Function &F, uint64_t FrameSizeInPtrs);
81 std::pair<uint64_t, Align> ComputeFrameLayout(Function &F);
82 void CollectRoots(Function &F);
83};
84
85class ShadowStackGCLowering : public FunctionPass {
86 ShadowStackGCLoweringImpl Impl;
87
88public:
89 static char ID;
90
91 ShadowStackGCLowering();
92
93 bool doInitialization(Module &M) override { return Impl.doInitialization(M); }
94 void getAnalysisUsage(AnalysisUsage &AU) const override {
95 AU.addPreserved<DominatorTreeWrapperPass>();
96 }
97 bool runOnFunction(Function &F) override {
98 std::optional<DomTreeUpdater> DTU;
99 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
100 DTU.emplace(args&: DTWP->getDomTree(), args: DomTreeUpdater::UpdateStrategy::Lazy);
101 return Impl.runOnFunction(F, DTU: DTU ? &*DTU : nullptr);
102 }
103};
104
105} // end anonymous namespace
106
107PreservedAnalyses ShadowStackGCLoweringPass::run(Module &M,
108 ModuleAnalysisManager &MAM) {
109 auto &Map = MAM.getResult<CollectorMetadataAnalysis>(IR&: M);
110 if (!Map.contains(GCName: "shadow-stack"))
111 return PreservedAnalyses::all();
112
113 ShadowStackGCLoweringImpl Impl;
114 bool Changed = Impl.doInitialization(M);
115 for (auto &F : M) {
116 auto &FAM =
117 MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
118 auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(IR&: F);
119 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
120 Changed |= Impl.runOnFunction(F, DTU: DT ? &DTU : nullptr);
121 }
122
123 if (!Changed)
124 return PreservedAnalyses::all();
125 PreservedAnalyses PA;
126 PA.preserve<DominatorTreeAnalysis>();
127 return PA;
128}
129
130char ShadowStackGCLowering::ID = 0;
131char &llvm::ShadowStackGCLoweringID = ShadowStackGCLowering::ID;
132
133INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
134 "Shadow Stack GC Lowering", false, false)
135INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
136INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
137INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
138 "Shadow Stack GC Lowering", false, false)
139
140FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
141
142ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) {}
143
144Constant *ShadowStackGCLoweringImpl::GetFrameMap(Function &F,
145 uint64_t FrameSizeInPtrs) {
146 // doInitialization creates the abstract type of this value.
147 Type *VoidPtr = PointerType::getUnqual(C&: F.getContext());
148
149 // Truncate the ShadowStackDescriptor if some metadata is null.
150 unsigned NumMeta = 0;
151 SmallVector<Constant *, 16> Metadata;
152 for (unsigned I = 0; I != Roots.size(); ++I) {
153 Constant *C = cast<Constant>(Val: Roots[I].first->getArgOperand(i: 1));
154 if (!C->isNullValue())
155 NumMeta = I + 1;
156 Metadata.push_back(Elt: C);
157 }
158 Metadata.resize(N: NumMeta);
159
160 Type *Int32Ty = Type::getInt32Ty(C&: F.getContext());
161
162 Constant *BaseElts[] = {
163 ConstantInt::get(Ty: Int32Ty, V: FrameSizeInPtrs, IsSigned: false),
164 ConstantInt::get(Ty: Int32Ty, V: NumMeta, IsSigned: false),
165 };
166
167 Constant *DescriptorElts[] = {
168 ConstantStruct::get(T: FrameMapTy, V: BaseElts),
169 ConstantArray::get(T: ArrayType::get(ElementType: VoidPtr, NumElements: NumMeta), V: Metadata)};
170
171 Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
172 StructType *STy = StructType::create(Elements: EltTys, Name: "gc_map." + utostr(X: NumMeta));
173
174 Constant *FrameMap = ConstantStruct::get(T: STy, V: DescriptorElts);
175
176 // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
177 // that, short of multithreaded LLVM, it should be safe; all that is
178 // necessary is that a simple Module::iterator loop not be invalidated.
179 // Appending to the GlobalVariable list is safe in that sense.
180 //
181 // All of the output passes emit globals last. The ExecutionEngine
182 // explicitly supports adding globals to the module after
183 // initialization.
184 //
185 // Still, if it isn't deemed acceptable, then this transformation needs
186 // to be a ModulePass (which means it cannot be in the 'llc' pipeline
187 // (which uses a FunctionPassManager (which segfaults (not asserts) if
188 // provided a ModulePass))).
189 return new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
190 GlobalVariable::InternalLinkage, FrameMap,
191 "__gc_" + F.getName());
192}
193
194std::pair<uint64_t, Align>
195ShadowStackGCLoweringImpl::ComputeFrameLayout(Function &F) {
196 // Compute the layout of the shadow stack frame using byte offsets.
197 // Layout: [Next ptr | Map ptr | Root 0 | Root 1 | ... | Root N]
198
199 const DataLayout &DL = F.getParent()->getDataLayout();
200 uint64_t PtrSize = DL.getPointerSize(AS: 0);
201 Align PtrAlign = DL.getPointerABIAlignment(AS: 0);
202
203 RootOffsets.clear();
204 Align MaxAlign = PtrAlign;
205
206 // Offset 0: Next pointer
207 // Offset PtrSize: Map pointer
208 uint64_t Offset = 2 * PtrSize;
209
210 // Compute offsets and sizes for each root
211 for (const std::pair<CallInst *, AllocaInst *> &Root : Roots) {
212 AllocaInst *AI = Root.second;
213 std::optional<TypeSize> RootSize = AI->getAllocationSize(DL);
214 if (!RootSize || !RootSize->isFixed())
215 reportFatalUsageError(
216 reason: "Intrinsic::gcroot requires a fixed size stack object");
217 uint64_t Size = RootSize->getFixedValue();
218 Align RootAlign = AI->getAlign();
219 MaxAlign = std::max(a: MaxAlign, b: RootAlign);
220
221 // Align the offset for this root
222 uint64_t AlignedOffset = alignTo(Size: Offset, A: RootAlign);
223
224 // Store both offset and size as a pair
225 RootOffsets.push_back(x: {AlignedOffset, Size});
226 Offset = AlignedOffset + Size;
227 }
228
229 // Final frame size, aligned to maximum alignment
230 uint64_t FrameSize = alignTo(Size: Offset, A: MaxAlign);
231 return {FrameSize, MaxAlign};
232}
233
234/// doInitialization - If this module uses the GC intrinsics, find them now. If
235/// not, exit fast.
236bool ShadowStackGCLoweringImpl::doInitialization(Module &M) {
237 bool Active = false;
238 for (Function &F : M) {
239 if (F.hasGC() && F.getGC() == "shadow-stack") {
240 Active = true;
241 break;
242 }
243 }
244 if (!Active)
245 return false;
246
247 // struct FrameMap {
248 // int32_t NumRoots; // Number of roots in stack frame.
249 // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
250 // void *Meta[]; // May be absent for roots without metadata.
251 // };
252 std::vector<Type *> EltTys;
253 // 32 bits is ok up to a 32GB stack frame. :)
254 EltTys.push_back(x: Type::getInt32Ty(C&: M.getContext()));
255 // Specifies length of variable length array.
256 EltTys.push_back(x: Type::getInt32Ty(C&: M.getContext()));
257 FrameMapTy = StructType::create(Elements: EltTys, Name: "gc_map");
258
259 // The shadow stack linked list uses opaque pointers.
260 // Each frame is a byte array with: [Next ptr | Map ptr | Roots...]
261 PointerType *StackEntryPtrTy = PointerType::getUnqual(C&: M.getContext());
262
263 // Get the root chain if it already exists.
264 Head = M.getGlobalVariable(Name: "llvm_gc_root_chain");
265 if (!Head) {
266 // If the root chain does not exist, insert a new one with linkonce
267 // linkage!
268 Head = new GlobalVariable(
269 M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
270 Constant::getNullValue(Ty: StackEntryPtrTy), "llvm_gc_root_chain");
271 } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
272 Head->setInitializer(Constant::getNullValue(Ty: StackEntryPtrTy));
273 Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
274 }
275
276 return true;
277}
278
279bool ShadowStackGCLoweringImpl::IsNullValue(Value *V) {
280 if (Constant *C = dyn_cast<Constant>(Val: V))
281 return C->isNullValue();
282 return false;
283}
284
285void ShadowStackGCLoweringImpl::CollectRoots(Function &F) {
286 assert(Roots.empty() && "Not cleaned up?");
287
288 SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
289
290 for (BasicBlock &BB : F)
291 for (Instruction &I : BB)
292 if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Val: &I))
293 if (Function *F = CI->getCalledFunction())
294 if (F->getIntrinsicID() == Intrinsic::gcroot) {
295 std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
296 x&: CI,
297 y: cast<AllocaInst>(Val: CI->getArgOperand(i: 0)->stripPointerCasts()));
298 if (IsNullValue(V: CI->getArgOperand(i: 1)))
299 Roots.push_back(x: Pair);
300 else
301 MetaRoots.push_back(Elt: Pair);
302 }
303
304 // Number roots with metadata (usually empty) at the beginning, so that the
305 // FrameMap::Meta array can be elided.
306 Roots.insert(position: Roots.begin(), first: MetaRoots.begin(), last: MetaRoots.end());
307}
308
309/// runOnFunction - Insert code to maintain the shadow stack.
310bool ShadowStackGCLoweringImpl::runOnFunction(Function &F,
311 DomTreeUpdater *DTU) {
312 // Quick exit for functions that do not use the shadow stack GC.
313 if (!F.hasGC() || F.getGC() != "shadow-stack")
314 return false;
315
316 LLVMContext &Context = F.getContext();
317 const DataLayout &DL = F.getParent()->getDataLayout();
318
319 // Find calls to llvm.gcroot.
320 CollectRoots(F);
321
322 // If there are no roots in this function, then there is no need to add a
323 // stack map entry for it.
324 if (Roots.empty())
325 return false;
326
327 // Compute frame layout using byte offsets first.
328 auto [FrameSize, FrameAlign] = ComputeFrameLayout(F);
329
330 // Build the constant map with frame size in pointer-sized units.
331 uint64_t PtrSize = DL.getPointerSize();
332 Value *FrameMap = GetFrameMap(F, FrameSizeInPtrs: FrameSize / PtrSize - 2);
333
334 // Build the shadow stack entry at the very start of the function.
335 BasicBlock::iterator IP = F.getEntryBlock().begin();
336 IRBuilder<> AtEntry(IP->getParent(), IP);
337 Type *Int8Ty = Type::getInt8Ty(C&: Context);
338 AllocaInst *StackEntry = AtEntry.CreateAlloca(
339 Ty: ArrayType::get(ElementType: Int8Ty, NumElements: FrameSize), ArraySize: nullptr, Name: "gc_frame");
340 StackEntry->setAlignment(FrameAlign);
341
342 AtEntry.SetInsertPointPastAllocas(&F);
343 IP = AtEntry.GetInsertPoint();
344
345 // Initialize the map pointer and load the current head of the shadow stack.
346 Instruction *CurrentHead =
347 AtEntry.CreateLoad(Ty: AtEntry.getPtrTy(), Ptr: Head, Name: "gc_currhead");
348
349 // Map pointer is at offset PtrSize (after the Next pointer)
350 Value *EntryMapPtr = AtEntry.CreatePtrAdd(
351 Ptr: StackEntry, Offset: AtEntry.getInt64(C: PtrSize), Name: "gc_frame.map");
352 AtEntry.CreateStore(Val: FrameMap, Ptr: EntryMapPtr);
353
354 // Zero out any padding between roots to ensure deterministic frame contents.
355 // This includes the region after the map pointer up to the first root.
356 uint64_t LastEnd = 2 * PtrSize; // End of Map pointer field
357 assert(RootOffsets.size() == Roots.size());
358 for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
359 auto [RootOffset, RootSize] = RootOffsets[I];
360
361 // Zero any padding before this root
362 if (RootOffset > LastEnd) {
363 Value *PaddingPtr =
364 AtEntry.CreatePtrAdd(Ptr: StackEntry, Offset: AtEntry.getInt64(C: LastEnd));
365 AtEntry.CreateMemSet(Ptr: PaddingPtr, Val: AtEntry.getInt8(C: 0), Size: RootOffset - LastEnd,
366 Align: Align(1));
367 }
368
369 // For each root, compute pointer using precomputed offset
370 Value *SlotPtr = AtEntry.CreatePtrAdd(
371 Ptr: StackEntry, Offset: AtEntry.getInt64(C: RootOffset), Name: "gc_root");
372
373 // And use it in lieu of the alloca.
374 AllocaInst *OriginalAlloca = Roots[I].second;
375 SlotPtr->takeName(V: OriginalAlloca);
376 OriginalAlloca->replaceAllUsesWith(V: SlotPtr);
377
378 LastEnd = RootOffset + RootSize;
379 }
380
381 // Zero any padding at the end of the frame
382 if (FrameSize > LastEnd) {
383 Value *PaddingPtr =
384 AtEntry.CreatePtrAdd(Ptr: StackEntry, Offset: AtEntry.getInt64(C: LastEnd));
385 AtEntry.CreateMemSet(Ptr: PaddingPtr, Val: AtEntry.getInt8(C: 0), Size: FrameSize - LastEnd,
386 Align: Align(1));
387 }
388
389 // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
390 // really necessary (the collector would never see the intermediate state at
391 // runtime), but it's nicer not to push the half-initialized entry onto the
392 // shadow stack.
393 while (isa<StoreInst>(Val: IP))
394 ++IP;
395 AtEntry.SetInsertPoint(TheBB: IP->getParent(), IP);
396
397 // Push the entry onto the shadow stack.
398 // Next pointer is at offset 0, so it's just the frame pointer
399 AtEntry.CreateStore(Val: CurrentHead, Ptr: StackEntry);
400 // The new head value is also the frame pointer (the linked list links to
401 // frame base)
402 AtEntry.CreateStore(Val: StackEntry, Ptr: Head);
403
404 // For each instruction that escapes...
405 EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true, DTU);
406 while (IRBuilder<> *AtExit = EE.Next()) {
407 // Pop the entry from the shadow stack. Don't reuse CurrentHead from
408 // AtEntry, since that would make the value live for the entire function.
409 // Next pointer is at offset 0, so load from the frame base
410 Value *SavedHead =
411 AtExit->CreateLoad(Ty: AtExit->getPtrTy(), Ptr: StackEntry, Name: "gc_savedhead");
412 AtExit->CreateStore(Val: SavedHead, Ptr: Head);
413 }
414
415 // Delete the original allocas (which are no longer used) and the intrinsic
416 // calls (which are no longer valid). Doing this last avoids invalidating
417 // iterators.
418 for (std::pair<CallInst *, AllocaInst *> &Root : Roots) {
419 Root.first->eraseFromParent();
420 Root.second->eraseFromParent();
421 }
422
423 Roots.clear();
424 RootOffsets.clear();
425 return true;
426}
427