1 | //===- StripSymbols.cpp - Strip symbols and debug info from a module ------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The StripSymbols transformation implements code stripping. Specifically, it |
10 | // can delete: |
11 | // |
12 | // * names for virtual registers |
13 | // * symbols for internal globals and functions |
14 | // * debug information |
15 | // |
16 | // Note that this transformation makes code much less readable, so it should |
17 | // only be used in situations where the 'strip' utility would be used, such as |
18 | // reducing code size or making it harder to reverse engineer code. |
19 | // |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #include "llvm/Transforms/IPO/StripSymbols.h" |
23 | #include "llvm/ADT/SmallPtrSet.h" |
24 | #include "llvm/IR/Constants.h" |
25 | #include "llvm/IR/DebugInfo.h" |
26 | #include "llvm/IR/DerivedTypes.h" |
27 | #include "llvm/IR/InstIterator.h" |
28 | #include "llvm/IR/Instructions.h" |
29 | #include "llvm/IR/Module.h" |
30 | #include "llvm/IR/PassManager.h" |
31 | #include "llvm/IR/TypeFinder.h" |
32 | #include "llvm/IR/ValueSymbolTable.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Transforms/IPO.h" |
35 | #include "llvm/Transforms/IPO/StripSymbols.h" |
36 | #include "llvm/Transforms/Utils/Local.h" |
37 | |
38 | using namespace llvm; |
39 | |
40 | static cl::opt<bool> |
41 | StripGlobalConstants("strip-global-constants" , cl::init(Val: false), cl::Hidden, |
42 | cl::desc("Removes debug compile units which reference " |
43 | "to non-existing global constants" )); |
44 | |
45 | /// OnlyUsedBy - Return true if V is only used by Usr. |
46 | static bool OnlyUsedBy(Value *V, Value *Usr) { |
47 | for (User *U : V->users()) |
48 | if (U != Usr) |
49 | return false; |
50 | |
51 | return true; |
52 | } |
53 | |
54 | static void RemoveDeadConstant(Constant *C) { |
55 | assert(C->use_empty() && "Constant is not dead!" ); |
56 | SmallPtrSet<Constant*, 4> Operands; |
57 | for (Value *Op : C->operands()) |
58 | if (OnlyUsedBy(V: Op, Usr: C)) |
59 | Operands.insert(Ptr: cast<Constant>(Val: Op)); |
60 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: C)) { |
61 | if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals. |
62 | GV->eraseFromParent(); |
63 | } else if (!isa<Function>(Val: C)) { |
64 | // FIXME: Why does the type of the constant matter here? |
65 | if (isa<StructType>(Val: C->getType()) || isa<ArrayType>(Val: C->getType()) || |
66 | isa<VectorType>(Val: C->getType())) |
67 | C->destroyConstant(); |
68 | } |
69 | |
70 | // If the constant referenced anything, see if we can delete it as well. |
71 | for (Constant *O : Operands) |
72 | RemoveDeadConstant(C: O); |
73 | } |
74 | |
75 | // Strip the symbol table of its names. |
76 | // |
77 | static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { |
78 | for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) { |
79 | Value *V = VI->getValue(); |
80 | ++VI; |
81 | if (!isa<GlobalValue>(Val: V) || cast<GlobalValue>(Val: V)->hasLocalLinkage()) { |
82 | if (!PreserveDbgInfo || !V->getName().starts_with(Prefix: "llvm.dbg" )) |
83 | // Set name to "", removing from symbol table! |
84 | V->setName("" ); |
85 | } |
86 | } |
87 | } |
88 | |
89 | // Strip any named types of their names. |
90 | static void StripTypeNames(Module &M, bool PreserveDbgInfo) { |
91 | TypeFinder StructTypes; |
92 | StructTypes.run(M, onlyNamed: false); |
93 | |
94 | for (StructType *STy : StructTypes) { |
95 | if (STy->isLiteral() || STy->getName().empty()) continue; |
96 | |
97 | if (PreserveDbgInfo && STy->getName().starts_with(Prefix: "llvm.dbg" )) |
98 | continue; |
99 | |
100 | STy->setName("" ); |
101 | } |
102 | } |
103 | |
104 | /// Find values that are marked as llvm.used. |
105 | static void findUsedValues(GlobalVariable *LLVMUsed, |
106 | SmallPtrSetImpl<const GlobalValue*> &UsedValues) { |
107 | if (!LLVMUsed) return; |
108 | UsedValues.insert(Ptr: LLVMUsed); |
109 | |
110 | ConstantArray *Inits = cast<ConstantArray>(Val: LLVMUsed->getInitializer()); |
111 | |
112 | for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) |
113 | if (GlobalValue *GV = |
114 | dyn_cast<GlobalValue>(Val: Inits->getOperand(i_nocapture: i)->stripPointerCasts())) |
115 | UsedValues.insert(Ptr: GV); |
116 | } |
117 | |
118 | /// StripSymbolNames - Strip symbol names. |
119 | static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { |
120 | |
121 | SmallPtrSet<const GlobalValue*, 8> llvmUsedValues; |
122 | findUsedValues(LLVMUsed: M.getGlobalVariable(Name: "llvm.used" ), UsedValues&: llvmUsedValues); |
123 | findUsedValues(LLVMUsed: M.getGlobalVariable(Name: "llvm.compiler.used" ), UsedValues&: llvmUsedValues); |
124 | |
125 | for (GlobalVariable &GV : M.globals()) { |
126 | if (GV.hasLocalLinkage() && !llvmUsedValues.contains(Ptr: &GV)) |
127 | if (!PreserveDbgInfo || !GV.getName().starts_with(Prefix: "llvm.dbg" )) |
128 | GV.setName("" ); // Internal symbols can't participate in linkage |
129 | } |
130 | |
131 | for (Function &I : M) { |
132 | if (I.hasLocalLinkage() && !llvmUsedValues.contains(Ptr: &I)) |
133 | if (!PreserveDbgInfo || !I.getName().starts_with(Prefix: "llvm.dbg" )) |
134 | I.setName("" ); // Internal symbols can't participate in linkage |
135 | if (auto *Symtab = I.getValueSymbolTable()) |
136 | StripSymtab(ST&: *Symtab, PreserveDbgInfo); |
137 | } |
138 | |
139 | // Remove all names from types. |
140 | StripTypeNames(M, PreserveDbgInfo); |
141 | |
142 | return true; |
143 | } |
144 | |
145 | static bool stripDebugDeclareImpl(Module &M) { |
146 | |
147 | Function *Declare = M.getFunction(Name: "llvm.dbg.declare" ); |
148 | std::vector<Constant*> DeadConstants; |
149 | |
150 | if (Declare) { |
151 | while (!Declare->use_empty()) { |
152 | CallInst *CI = cast<CallInst>(Val: Declare->user_back()); |
153 | Value *Arg1 = CI->getArgOperand(i: 0); |
154 | Value *Arg2 = CI->getArgOperand(i: 1); |
155 | assert(CI->use_empty() && "llvm.dbg intrinsic should have void result" ); |
156 | CI->eraseFromParent(); |
157 | if (Arg1->use_empty()) { |
158 | if (Constant *C = dyn_cast<Constant>(Val: Arg1)) |
159 | DeadConstants.push_back(x: C); |
160 | else |
161 | RecursivelyDeleteTriviallyDeadInstructions(V: Arg1); |
162 | } |
163 | if (Arg2->use_empty()) |
164 | if (Constant *C = dyn_cast<Constant>(Val: Arg2)) |
165 | DeadConstants.push_back(x: C); |
166 | } |
167 | Declare->eraseFromParent(); |
168 | } |
169 | |
170 | while (!DeadConstants.empty()) { |
171 | Constant *C = DeadConstants.back(); |
172 | DeadConstants.pop_back(); |
173 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: C)) { |
174 | if (GV->hasLocalLinkage()) |
175 | RemoveDeadConstant(C: GV); |
176 | } else |
177 | RemoveDeadConstant(C); |
178 | } |
179 | |
180 | return true; |
181 | } |
182 | |
183 | static bool stripDeadDebugInfoImpl(Module &M) { |
184 | bool Changed = false; |
185 | |
186 | LLVMContext &C = M.getContext(); |
187 | |
188 | // Find all debug info in F. This is actually overkill in terms of what we |
189 | // want to do, but we want to try and be as resilient as possible in the face |
190 | // of potential debug info changes by using the formal interfaces given to us |
191 | // as much as possible. |
192 | DebugInfoFinder F; |
193 | F.processModule(M); |
194 | |
195 | // For each compile unit, find the live set of global variables/functions and |
196 | // replace the current list of potentially dead global variables/functions |
197 | // with the live list. |
198 | SmallVector<Metadata *, 64> LiveGlobalVariables; |
199 | DenseSet<DIGlobalVariableExpression *> VisitedSet; |
200 | |
201 | std::set<DIGlobalVariableExpression *> LiveGVs; |
202 | for (GlobalVariable &GV : M.globals()) { |
203 | SmallVector<DIGlobalVariableExpression *, 1> GVEs; |
204 | GV.getDebugInfo(GVs&: GVEs); |
205 | for (auto *GVE : GVEs) |
206 | LiveGVs.insert(x: GVE); |
207 | } |
208 | |
209 | std::set<DICompileUnit *> LiveCUs; |
210 | DebugInfoFinder LiveCUFinder; |
211 | for (const Function &F : M.functions()) { |
212 | if (auto *SP = cast_or_null<DISubprogram>(Val: F.getSubprogram())) |
213 | LiveCUFinder.processSubprogram(SP); |
214 | for (const Instruction &I : instructions(F)) |
215 | LiveCUFinder.processInstruction(M, I); |
216 | } |
217 | auto FoundCUs = LiveCUFinder.compile_units(); |
218 | LiveCUs.insert(first: FoundCUs.begin(), last: FoundCUs.end()); |
219 | |
220 | bool HasDeadCUs = false; |
221 | for (DICompileUnit *DIC : F.compile_units()) { |
222 | // Create our live global variable list. |
223 | bool GlobalVariableChange = false; |
224 | for (auto *DIG : DIC->getGlobalVariables()) { |
225 | if (DIG->getExpression() && DIG->getExpression()->isConstant() && |
226 | !StripGlobalConstants) |
227 | LiveGVs.insert(x: DIG); |
228 | |
229 | // Make sure we only visit each global variable only once. |
230 | if (!VisitedSet.insert(V: DIG).second) |
231 | continue; |
232 | |
233 | // If a global variable references DIG, the global variable is live. |
234 | if (LiveGVs.count(x: DIG)) |
235 | LiveGlobalVariables.push_back(Elt: DIG); |
236 | else |
237 | GlobalVariableChange = true; |
238 | } |
239 | |
240 | if (!LiveGlobalVariables.empty()) |
241 | LiveCUs.insert(x: DIC); |
242 | else if (!LiveCUs.count(x: DIC)) |
243 | HasDeadCUs = true; |
244 | |
245 | // If we found dead global variables, replace the current global |
246 | // variable list with our new live global variable list. |
247 | if (GlobalVariableChange) { |
248 | DIC->replaceGlobalVariables(N: MDTuple::get(Context&: C, MDs: LiveGlobalVariables)); |
249 | Changed = true; |
250 | } |
251 | |
252 | // Reset lists for the next iteration. |
253 | LiveGlobalVariables.clear(); |
254 | } |
255 | |
256 | if (HasDeadCUs) { |
257 | // Delete the old node and replace it with a new one |
258 | NamedMDNode *NMD = M.getOrInsertNamedMetadata(Name: "llvm.dbg.cu" ); |
259 | NMD->clearOperands(); |
260 | if (!LiveCUs.empty()) { |
261 | for (DICompileUnit *CU : LiveCUs) |
262 | NMD->addOperand(M: CU); |
263 | } |
264 | Changed = true; |
265 | } |
266 | |
267 | return Changed; |
268 | } |
269 | |
270 | PreservedAnalyses StripSymbolsPass::run(Module &M, ModuleAnalysisManager &AM) { |
271 | StripDebugInfo(M); |
272 | StripSymbolNames(M, PreserveDbgInfo: false); |
273 | PreservedAnalyses PA; |
274 | PA.preserveSet<CFGAnalyses>(); |
275 | return PA; |
276 | } |
277 | |
278 | PreservedAnalyses StripNonDebugSymbolsPass::run(Module &M, |
279 | ModuleAnalysisManager &AM) { |
280 | StripSymbolNames(M, PreserveDbgInfo: true); |
281 | PreservedAnalyses PA; |
282 | PA.preserveSet<CFGAnalyses>(); |
283 | return PA; |
284 | } |
285 | |
286 | PreservedAnalyses StripDebugDeclarePass::run(Module &M, |
287 | ModuleAnalysisManager &AM) { |
288 | stripDebugDeclareImpl(M); |
289 | PreservedAnalyses PA; |
290 | PA.preserveSet<CFGAnalyses>(); |
291 | return PA; |
292 | } |
293 | |
294 | PreservedAnalyses StripDeadDebugInfoPass::run(Module &M, |
295 | ModuleAnalysisManager &AM) { |
296 | stripDeadDebugInfoImpl(M); |
297 | PreservedAnalyses PA; |
298 | PA.preserveSet<CFGAnalyses>(); |
299 | return PA; |
300 | } |
301 | |