1 | //===-- Internalize.cpp - Mark functions internal -------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass loops over all of the functions and variables in the input module. |
10 | // If the function or variable does not need to be preserved according to the |
11 | // client supplied callback, it is marked as internal. |
12 | // |
13 | // This transformation would not be legal in a regular compilation, but it gets |
14 | // extra information from the linker about what is safe. |
15 | // |
16 | // For example: Internalizing a function with external linkage. Only if we are |
17 | // told it is only used from within this module, it is safe to do it. |
18 | // |
19 | //===----------------------------------------------------------------------===// |
20 | |
21 | #include "llvm/Transforms/IPO/Internalize.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/ADT/StringSet.h" |
24 | #include "llvm/Analysis/CallGraph.h" |
25 | #include "llvm/IR/Module.h" |
26 | #include "llvm/Support/CommandLine.h" |
27 | #include "llvm/Support/Debug.h" |
28 | #include "llvm/Support/GlobPattern.h" |
29 | #include "llvm/Support/LineIterator.h" |
30 | #include "llvm/Support/MemoryBuffer.h" |
31 | #include "llvm/Support/raw_ostream.h" |
32 | #include "llvm/TargetParser/Triple.h" |
33 | #include "llvm/Transforms/IPO.h" |
34 | using namespace llvm; |
35 | |
36 | #define DEBUG_TYPE "internalize" |
37 | |
38 | STATISTIC(NumAliases, "Number of aliases internalized" ); |
39 | STATISTIC(NumFunctions, "Number of functions internalized" ); |
40 | STATISTIC(NumGlobals, "Number of global vars internalized" ); |
41 | |
42 | // APIFile - A file which contains a list of symbol glob patterns that should |
43 | // not be marked external. |
44 | static cl::opt<std::string> |
45 | APIFile("internalize-public-api-file" , cl::value_desc("filename" ), |
46 | cl::desc("A file containing list of symbol names to preserve" )); |
47 | |
48 | // APIList - A list of symbol glob patterns that should not be marked internal. |
49 | static cl::list<std::string> |
50 | APIList("internalize-public-api-list" , cl::value_desc("list" ), |
51 | cl::desc("A list of symbol names to preserve" ), cl::CommaSeparated); |
52 | |
53 | namespace { |
54 | // Helper to load an API list to preserve from file and expose it as a functor |
55 | // for internalization. |
56 | class PreserveAPIList { |
57 | public: |
58 | PreserveAPIList() { |
59 | if (!APIFile.empty()) |
60 | LoadFile(Filename: APIFile); |
61 | for (StringRef Pattern : APIList) |
62 | addGlob(Pattern); |
63 | } |
64 | |
65 | bool operator()(const GlobalValue &GV) { |
66 | return llvm::any_of( |
67 | Range&: ExternalNames, P: [&](GlobPattern &GP) { return GP.match(S: GV.getName()); }); |
68 | } |
69 | |
70 | private: |
71 | // Contains the set of symbols loaded from file |
72 | SmallVector<GlobPattern> ExternalNames; |
73 | |
74 | void addGlob(StringRef Pattern) { |
75 | auto GlobOrErr = GlobPattern::create(Pat: Pattern); |
76 | if (!GlobOrErr) { |
77 | errs() << "WARNING: when loading pattern: '" |
78 | << toString(E: GlobOrErr.takeError()) << "' ignoring" ; |
79 | return; |
80 | } |
81 | ExternalNames.emplace_back(Args: std::move(*GlobOrErr)); |
82 | } |
83 | |
84 | void LoadFile(StringRef Filename) { |
85 | // Load the APIFile... |
86 | ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = |
87 | MemoryBuffer::getFile(Filename); |
88 | if (!BufOrErr) { |
89 | errs() << "WARNING: Internalize couldn't load file '" << Filename |
90 | << "'! Continuing as if it's empty.\n" ; |
91 | return; // Just continue as if the file were empty |
92 | } |
93 | Buf = std::move(*BufOrErr); |
94 | for (line_iterator I(*Buf, true), E; I != E; ++I) |
95 | addGlob(Pattern: *I); |
96 | } |
97 | |
98 | std::shared_ptr<MemoryBuffer> Buf; |
99 | }; |
100 | } // end anonymous namespace |
101 | |
102 | bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) { |
103 | // Function must be defined here |
104 | if (GV.isDeclaration()) |
105 | return true; |
106 | |
107 | // Available externally is really just a "declaration with a body". |
108 | if (GV.hasAvailableExternallyLinkage()) |
109 | return true; |
110 | |
111 | // Assume that dllexported symbols are referenced elsewhere |
112 | if (GV.hasDLLExportStorageClass()) |
113 | return true; |
114 | |
115 | // As the name suggests, externally initialized variables need preserving as |
116 | // they would be initialized elsewhere externally. |
117 | if (const auto *G = dyn_cast<GlobalVariable>(Val: &GV)) |
118 | if (G->isExternallyInitialized()) |
119 | return true; |
120 | |
121 | // Already local, has nothing to do. |
122 | if (GV.hasLocalLinkage()) |
123 | return false; |
124 | |
125 | // Check some special cases |
126 | if (AlwaysPreserved.count(Key: GV.getName())) |
127 | return true; |
128 | |
129 | return MustPreserveGV(GV); |
130 | } |
131 | |
132 | bool InternalizePass::maybeInternalize( |
133 | GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) { |
134 | if (Comdat *C = GV.getComdat()) { |
135 | // For GlobalAlias, C is the aliasee object's comdat which may have been |
136 | // redirected. So ComdatMap may not contain C. |
137 | if (ComdatMap.lookup(Val: C).External) |
138 | return false; |
139 | |
140 | if (auto *GO = dyn_cast<GlobalObject>(Val: &GV)) { |
141 | // If a comdat with one member is not externally visible, we can drop it. |
142 | // Otherwise, the comdat can be used to establish dependencies among the |
143 | // group of sections. Thus we have to keep the comdat but switch it to |
144 | // nodeduplicate. |
145 | // Note: nodeduplicate is not necessary for COFF. wasm doesn't support |
146 | // nodeduplicate. |
147 | ComdatInfo &Info = ComdatMap.find(Val: C)->second; |
148 | if (Info.Size == 1) |
149 | GO->setComdat(nullptr); |
150 | else if (!IsWasm) |
151 | C->setSelectionKind(Comdat::NoDeduplicate); |
152 | } |
153 | |
154 | if (GV.hasLocalLinkage()) |
155 | return false; |
156 | } else { |
157 | if (GV.hasLocalLinkage()) |
158 | return false; |
159 | |
160 | if (shouldPreserveGV(GV)) |
161 | return false; |
162 | } |
163 | |
164 | GV.setVisibility(GlobalValue::DefaultVisibility); |
165 | GV.setLinkage(GlobalValue::InternalLinkage); |
166 | return true; |
167 | } |
168 | |
169 | // If GV is part of a comdat and is externally visible, update the comdat size |
170 | // and keep track of its comdat so that we don't internalize any of its members. |
171 | void InternalizePass::checkComdat( |
172 | GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) { |
173 | Comdat *C = GV.getComdat(); |
174 | if (!C) |
175 | return; |
176 | |
177 | ComdatInfo &Info = ComdatMap[C]; |
178 | ++Info.Size; |
179 | if (shouldPreserveGV(GV)) |
180 | Info.External = true; |
181 | } |
182 | |
183 | bool InternalizePass::internalizeModule(Module &M) { |
184 | bool Changed = false; |
185 | |
186 | SmallVector<GlobalValue *, 4> Used; |
187 | collectUsedGlobalVariables(M, Vec&: Used, CompilerUsed: false); |
188 | |
189 | // Collect comdat size and visiblity information for the module. |
190 | DenseMap<const Comdat *, ComdatInfo> ComdatMap; |
191 | if (!M.getComdatSymbolTable().empty()) { |
192 | for (Function &F : M) |
193 | checkComdat(GV&: F, ComdatMap); |
194 | for (GlobalVariable &GV : M.globals()) |
195 | checkComdat(GV, ComdatMap); |
196 | for (GlobalAlias &GA : M.aliases()) |
197 | checkComdat(GV&: GA, ComdatMap); |
198 | } |
199 | |
200 | // We must assume that globals in llvm.used have a reference that not even |
201 | // the linker can see, so we don't internalize them. |
202 | // For llvm.compiler.used the situation is a bit fuzzy. The assembler and |
203 | // linker can drop those symbols. If this pass is running as part of LTO, |
204 | // one might think that it could just drop llvm.compiler.used. The problem |
205 | // is that even in LTO llvm doesn't see every reference. For example, |
206 | // we don't see references from function local inline assembly. To be |
207 | // conservative, we internalize symbols in llvm.compiler.used, but we |
208 | // keep llvm.compiler.used so that the symbol is not deleted by llvm. |
209 | for (GlobalValue *V : Used) { |
210 | AlwaysPreserved.insert(key: V->getName()); |
211 | } |
212 | |
213 | // Never internalize the llvm.used symbol. It is used to implement |
214 | // attribute((used)). |
215 | // FIXME: Shouldn't this just filter on llvm.metadata section?? |
216 | AlwaysPreserved.insert(key: "llvm.used" ); |
217 | AlwaysPreserved.insert(key: "llvm.compiler.used" ); |
218 | |
219 | // Never internalize anchors used by the machine module info, else the info |
220 | // won't find them. (see MachineModuleInfo.) |
221 | AlwaysPreserved.insert(key: "llvm.global_ctors" ); |
222 | AlwaysPreserved.insert(key: "llvm.global_dtors" ); |
223 | AlwaysPreserved.insert(key: "llvm.global.annotations" ); |
224 | |
225 | // Never internalize symbols code-gen inserts. |
226 | // FIXME: We should probably add this (and the __stack_chk_guard) via some |
227 | // type of call-back in CodeGen. |
228 | AlwaysPreserved.insert(key: "__stack_chk_fail" ); |
229 | if (M.getTargetTriple().isOSAIX()) |
230 | AlwaysPreserved.insert(key: "__ssp_canary_word" ); |
231 | else |
232 | AlwaysPreserved.insert(key: "__stack_chk_guard" ); |
233 | |
234 | // Preserve the RPC interface for GPU host callbacks when internalizing. |
235 | if (M.getTargetTriple().isNVPTX()) |
236 | AlwaysPreserved.insert(key: "__llvm_rpc_client" ); |
237 | |
238 | // Mark all functions not in the api as internal. |
239 | IsWasm = M.getTargetTriple().isOSBinFormatWasm(); |
240 | for (Function &I : M) { |
241 | if (!maybeInternalize(GV&: I, ComdatMap)) |
242 | continue; |
243 | Changed = true; |
244 | |
245 | ++NumFunctions; |
246 | LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n" ); |
247 | } |
248 | |
249 | // Mark all global variables with initializers that are not in the api as |
250 | // internal as well. |
251 | for (auto &GV : M.globals()) { |
252 | if (!maybeInternalize(GV, ComdatMap)) |
253 | continue; |
254 | Changed = true; |
255 | |
256 | ++NumGlobals; |
257 | LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n" ); |
258 | } |
259 | |
260 | // Mark all aliases that are not in the api as internal as well. |
261 | for (auto &GA : M.aliases()) { |
262 | if (!maybeInternalize(GV&: GA, ComdatMap)) |
263 | continue; |
264 | Changed = true; |
265 | |
266 | ++NumAliases; |
267 | LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n" ); |
268 | } |
269 | |
270 | return Changed; |
271 | } |
272 | |
273 | InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {} |
274 | |
275 | PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) { |
276 | if (!internalizeModule(M)) |
277 | return PreservedAnalyses::all(); |
278 | |
279 | return PreservedAnalyses::none(); |
280 | } |
281 | |