1 | //===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // SymbolRewriter is a LLVM pass which can rewrite symbols transparently within |
10 | // existing code. It is implemented as a compiler pass and is configured via a |
11 | // YAML configuration file. |
12 | // |
13 | // The YAML configuration file format is as follows: |
14 | // |
15 | // RewriteMapFile := RewriteDescriptors |
16 | // RewriteDescriptors := RewriteDescriptor | RewriteDescriptors |
17 | // RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}' |
18 | // RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields |
19 | // RewriteDescriptorField := FieldIdentifier ':' FieldValue ',' |
20 | // RewriteDescriptorType := Identifier |
21 | // FieldIdentifier := Identifier |
22 | // FieldValue := Identifier |
23 | // Identifier := [0-9a-zA-Z]+ |
24 | // |
25 | // Currently, the following descriptor types are supported: |
26 | // |
27 | // - function: (function rewriting) |
28 | // + Source (original name of the function) |
29 | // + Target (explicit transformation) |
30 | // + Transform (pattern transformation) |
31 | // + Naked (boolean, whether the function is undecorated) |
32 | // - global variable: (external linkage global variable rewriting) |
33 | // + Source (original name of externally visible variable) |
34 | // + Target (explicit transformation) |
35 | // + Transform (pattern transformation) |
36 | // - global alias: (global alias rewriting) |
37 | // + Source (original name of the aliased name) |
38 | // + Target (explicit transformation) |
39 | // + Transform (pattern transformation) |
40 | // |
41 | // Note that source and exactly one of [Target, Transform] must be provided |
42 | // |
43 | // New rewrite descriptors can be created. Addding a new rewrite descriptor |
44 | // involves: |
45 | // |
46 | // a) extended the rewrite descriptor kind enumeration |
47 | // (<anonymous>::RewriteDescriptor::RewriteDescriptorType) |
48 | // b) implementing the new descriptor |
49 | // (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor) |
50 | // c) extending the rewrite map parser |
51 | // (<anonymous>::RewriteMapParser::parseEntry) |
52 | // |
53 | // Specify to rewrite the symbols using the `-rewrite-symbols` option, and |
54 | // specify the map file to use for the rewriting via the `-rewrite-map-file` |
55 | // option. |
56 | // |
57 | //===----------------------------------------------------------------------===// |
58 | |
59 | #include "llvm/Transforms/Utils/SymbolRewriter.h" |
60 | #include "llvm/ADT/SmallString.h" |
61 | #include "llvm/ADT/StringRef.h" |
62 | #include "llvm/ADT/ilist.h" |
63 | #include "llvm/ADT/iterator_range.h" |
64 | #include "llvm/IR/Comdat.h" |
65 | #include "llvm/IR/Function.h" |
66 | #include "llvm/IR/GlobalAlias.h" |
67 | #include "llvm/IR/GlobalObject.h" |
68 | #include "llvm/IR/GlobalVariable.h" |
69 | #include "llvm/IR/Module.h" |
70 | #include "llvm/IR/Value.h" |
71 | #include "llvm/Support/Casting.h" |
72 | #include "llvm/Support/CommandLine.h" |
73 | #include "llvm/Support/ErrorHandling.h" |
74 | #include "llvm/Support/ErrorOr.h" |
75 | #include "llvm/Support/MemoryBuffer.h" |
76 | #include "llvm/Support/Regex.h" |
77 | #include "llvm/Support/SourceMgr.h" |
78 | #include "llvm/Support/YAMLParser.h" |
79 | #include <memory> |
80 | #include <string> |
81 | #include <vector> |
82 | |
83 | using namespace llvm; |
84 | using namespace SymbolRewriter; |
85 | |
86 | #define DEBUG_TYPE "symbol-rewriter" |
87 | |
88 | static cl::list<std::string> RewriteMapFiles("rewrite-map-file" , |
89 | cl::desc("Symbol Rewrite Map" ), |
90 | cl::value_desc("filename" ), |
91 | cl::Hidden); |
92 | |
93 | static void rewriteComdat(Module &M, GlobalObject *GO, |
94 | const std::string &Source, |
95 | const std::string &Target) { |
96 | if (Comdat *CD = GO->getComdat()) { |
97 | auto &Comdats = M.getComdatSymbolTable(); |
98 | |
99 | Comdat *C = M.getOrInsertComdat(Name: Target); |
100 | C->setSelectionKind(CD->getSelectionKind()); |
101 | GO->setComdat(C); |
102 | |
103 | Comdats.erase(I: Comdats.find(Key: Source)); |
104 | } |
105 | } |
106 | |
107 | namespace { |
108 | |
109 | template <RewriteDescriptor::Type DT, typename ValueType, |
110 | ValueType *(Module::*Get)(StringRef) const> |
111 | class ExplicitRewriteDescriptor : public RewriteDescriptor { |
112 | public: |
113 | const std::string Source; |
114 | const std::string Target; |
115 | |
116 | ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked) |
117 | : RewriteDescriptor(DT), |
118 | Source(std::string(Naked ? StringRef("\01" + S.str()) : S)), |
119 | Target(std::string(T)) {} |
120 | |
121 | bool performOnModule(Module &M) override; |
122 | |
123 | static bool classof(const RewriteDescriptor *RD) { |
124 | return RD->getType() == DT; |
125 | } |
126 | }; |
127 | |
128 | } // end anonymous namespace |
129 | |
130 | template <RewriteDescriptor::Type DT, typename ValueType, |
131 | ValueType *(Module::*Get)(StringRef) const> |
132 | bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { |
133 | bool Changed = false; |
134 | if (ValueType *S = (M.*Get)(Source)) { |
135 | if (GlobalObject *GO = dyn_cast<GlobalObject>(S)) |
136 | rewriteComdat(M, GO, Source, Target); |
137 | |
138 | if (Value *T = (M.*Get)(Target)) |
139 | S->setValueName(T->getValueName()); |
140 | else |
141 | S->setName(Target); |
142 | |
143 | Changed = true; |
144 | } |
145 | return Changed; |
146 | } |
147 | |
148 | namespace { |
149 | |
150 | template <RewriteDescriptor::Type DT, typename ValueType, |
151 | ValueType *(Module::*Get)(StringRef) const, |
152 | iterator_range<typename iplist<ValueType>::iterator> |
153 | (Module::*Iterator)()> |
154 | class PatternRewriteDescriptor : public RewriteDescriptor { |
155 | public: |
156 | const std::string Pattern; |
157 | const std::string Transform; |
158 | |
159 | PatternRewriteDescriptor(StringRef P, StringRef T) |
160 | : RewriteDescriptor(DT), Pattern(std::string(P)), |
161 | Transform(std::string(T)) {} |
162 | |
163 | bool performOnModule(Module &M) override; |
164 | |
165 | static bool classof(const RewriteDescriptor *RD) { |
166 | return RD->getType() == DT; |
167 | } |
168 | }; |
169 | |
170 | } // end anonymous namespace |
171 | |
172 | template <RewriteDescriptor::Type DT, typename ValueType, |
173 | ValueType *(Module::*Get)(StringRef) const, |
174 | iterator_range<typename iplist<ValueType>::iterator> |
175 | (Module::*Iterator)()> |
176 | bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>:: |
177 | performOnModule(Module &M) { |
178 | bool Changed = false; |
179 | for (auto &C : (M.*Iterator)()) { |
180 | std::string Error; |
181 | |
182 | std::string Name = Regex(Pattern).sub(Repl: Transform, String: C.getName(), Error: &Error); |
183 | if (!Error.empty()) |
184 | report_fatal_error(Twine("unable to transforn " ) + C.getName() + " in " + |
185 | M.getModuleIdentifier() + ": " + Error); |
186 | |
187 | if (C.getName() == Name) |
188 | continue; |
189 | |
190 | if (GlobalObject *GO = dyn_cast<GlobalObject>(&C)) |
191 | rewriteComdat(M, GO, Source: std::string(C.getName()), Target: Name); |
192 | |
193 | if (Value *V = (M.*Get)(Name)) |
194 | C.setValueName(V->getValueName()); |
195 | else |
196 | C.setName(Name); |
197 | |
198 | Changed = true; |
199 | } |
200 | return Changed; |
201 | } |
202 | |
203 | namespace { |
204 | |
205 | /// Represents a rewrite for an explicitly named (function) symbol. Both the |
206 | /// source function name and target function name of the transformation are |
207 | /// explicitly spelt out. |
208 | using ExplicitRewriteFunctionDescriptor = |
209 | ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function, |
210 | &Module::getFunction>; |
211 | |
212 | /// Represents a rewrite for an explicitly named (global variable) symbol. Both |
213 | /// the source variable name and target variable name are spelt out. This |
214 | /// applies only to module level variables. |
215 | using ExplicitRewriteGlobalVariableDescriptor = |
216 | ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, |
217 | GlobalVariable, &Module::getGlobalVariable>; |
218 | |
219 | /// Represents a rewrite for an explicitly named global alias. Both the source |
220 | /// and target name are explicitly spelt out. |
221 | using ExplicitRewriteNamedAliasDescriptor = |
222 | ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, |
223 | &Module::getNamedAlias>; |
224 | |
225 | /// Represents a rewrite for a regular expression based pattern for functions. |
226 | /// A pattern for the function name is provided and a transformation for that |
227 | /// pattern to determine the target function name create the rewrite rule. |
228 | using PatternRewriteFunctionDescriptor = |
229 | PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function, |
230 | &Module::getFunction, &Module::functions>; |
231 | |
232 | /// Represents a rewrite for a global variable based upon a matching pattern. |
233 | /// Each global variable matching the provided pattern will be transformed as |
234 | /// described in the transformation pattern for the target. Applies only to |
235 | /// module level variables. |
236 | using PatternRewriteGlobalVariableDescriptor = |
237 | PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, |
238 | GlobalVariable, &Module::getGlobalVariable, |
239 | &Module::globals>; |
240 | |
241 | /// PatternRewriteNamedAliasDescriptor - represents a rewrite for global |
242 | /// aliases which match a given pattern. The provided transformation will be |
243 | /// applied to each of the matching names. |
244 | using PatternRewriteNamedAliasDescriptor = |
245 | PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, |
246 | &Module::getNamedAlias, &Module::aliases>; |
247 | |
248 | } // end anonymous namespace |
249 | |
250 | bool RewriteMapParser::parse(const std::string &MapFile, |
251 | RewriteDescriptorList *DL) { |
252 | ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping = |
253 | MemoryBuffer::getFile(Filename: MapFile); |
254 | |
255 | if (!Mapping) |
256 | report_fatal_error(reason: Twine("unable to read rewrite map '" ) + MapFile + |
257 | "': " + Mapping.getError().message()); |
258 | |
259 | if (!parse(MapFile&: *Mapping, DL)) |
260 | report_fatal_error(reason: Twine("unable to parse rewrite map '" ) + MapFile + "'" ); |
261 | |
262 | return true; |
263 | } |
264 | |
265 | bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile, |
266 | RewriteDescriptorList *DL) { |
267 | SourceMgr SM; |
268 | yaml::Stream YS(MapFile->getBuffer(), SM); |
269 | |
270 | for (auto &Document : YS) { |
271 | yaml::MappingNode *DescriptorList; |
272 | |
273 | // ignore empty documents |
274 | if (isa<yaml::NullNode>(Val: Document.getRoot())) |
275 | continue; |
276 | |
277 | DescriptorList = dyn_cast<yaml::MappingNode>(Val: Document.getRoot()); |
278 | if (!DescriptorList) { |
279 | YS.printError(N: Document.getRoot(), Msg: "DescriptorList node must be a map" ); |
280 | return false; |
281 | } |
282 | |
283 | for (auto &Descriptor : *DescriptorList) |
284 | if (!parseEntry(Stream&: YS, Entry&: Descriptor, DL)) |
285 | return false; |
286 | } |
287 | |
288 | return true; |
289 | } |
290 | |
291 | bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry, |
292 | RewriteDescriptorList *DL) { |
293 | yaml::ScalarNode *Key; |
294 | yaml::MappingNode *Value; |
295 | SmallString<32> KeyStorage; |
296 | StringRef RewriteType; |
297 | |
298 | Key = dyn_cast<yaml::ScalarNode>(Val: Entry.getKey()); |
299 | if (!Key) { |
300 | YS.printError(N: Entry.getKey(), Msg: "rewrite type must be a scalar" ); |
301 | return false; |
302 | } |
303 | |
304 | Value = dyn_cast<yaml::MappingNode>(Val: Entry.getValue()); |
305 | if (!Value) { |
306 | YS.printError(N: Entry.getValue(), Msg: "rewrite descriptor must be a map" ); |
307 | return false; |
308 | } |
309 | |
310 | RewriteType = Key->getValue(Storage&: KeyStorage); |
311 | if (RewriteType == "function" ) |
312 | return parseRewriteFunctionDescriptor(Stream&: YS, Key, Value, DL); |
313 | else if (RewriteType == "global variable" ) |
314 | return parseRewriteGlobalVariableDescriptor(Stream&: YS, Key, Value, DL); |
315 | else if (RewriteType == "global alias" ) |
316 | return parseRewriteGlobalAliasDescriptor(YS, K: Key, V: Value, DL); |
317 | |
318 | YS.printError(N: Entry.getKey(), Msg: "unknown rewrite type" ); |
319 | return false; |
320 | } |
321 | |
322 | bool RewriteMapParser:: |
323 | parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, |
324 | yaml::MappingNode *Descriptor, |
325 | RewriteDescriptorList *DL) { |
326 | bool Naked = false; |
327 | std::string Source; |
328 | std::string Target; |
329 | std::string Transform; |
330 | |
331 | for (auto &Field : *Descriptor) { |
332 | yaml::ScalarNode *Key; |
333 | yaml::ScalarNode *Value; |
334 | SmallString<32> KeyStorage; |
335 | SmallString<32> ValueStorage; |
336 | StringRef KeyValue; |
337 | |
338 | Key = dyn_cast<yaml::ScalarNode>(Val: Field.getKey()); |
339 | if (!Key) { |
340 | YS.printError(N: Field.getKey(), Msg: "descriptor key must be a scalar" ); |
341 | return false; |
342 | } |
343 | |
344 | Value = dyn_cast<yaml::ScalarNode>(Val: Field.getValue()); |
345 | if (!Value) { |
346 | YS.printError(N: Field.getValue(), Msg: "descriptor value must be a scalar" ); |
347 | return false; |
348 | } |
349 | |
350 | KeyValue = Key->getValue(Storage&: KeyStorage); |
351 | if (KeyValue == "source" ) { |
352 | std::string Error; |
353 | |
354 | Source = std::string(Value->getValue(Storage&: ValueStorage)); |
355 | if (!Regex(Source).isValid(Error)) { |
356 | YS.printError(N: Field.getKey(), Msg: "invalid regex: " + Error); |
357 | return false; |
358 | } |
359 | } else if (KeyValue == "target" ) { |
360 | Target = std::string(Value->getValue(Storage&: ValueStorage)); |
361 | } else if (KeyValue == "transform" ) { |
362 | Transform = std::string(Value->getValue(Storage&: ValueStorage)); |
363 | } else if (KeyValue == "naked" ) { |
364 | std::string Undecorated; |
365 | |
366 | Undecorated = std::string(Value->getValue(Storage&: ValueStorage)); |
367 | Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1" ; |
368 | } else { |
369 | YS.printError(N: Field.getKey(), Msg: "unknown key for function" ); |
370 | return false; |
371 | } |
372 | } |
373 | |
374 | if (Transform.empty() == Target.empty()) { |
375 | YS.printError(N: Descriptor, |
376 | Msg: "exactly one of transform or target must be specified" ); |
377 | return false; |
378 | } |
379 | |
380 | // TODO see if there is a more elegant solution to selecting the rewrite |
381 | // descriptor type |
382 | if (!Target.empty()) |
383 | DL->push_back(x: std::make_unique<ExplicitRewriteFunctionDescriptor>( |
384 | args&: Source, args&: Target, args&: Naked)); |
385 | else |
386 | DL->push_back( |
387 | x: std::make_unique<PatternRewriteFunctionDescriptor>(args&: Source, args&: Transform)); |
388 | |
389 | return true; |
390 | } |
391 | |
392 | bool RewriteMapParser:: |
393 | parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, |
394 | yaml::MappingNode *Descriptor, |
395 | RewriteDescriptorList *DL) { |
396 | std::string Source; |
397 | std::string Target; |
398 | std::string Transform; |
399 | |
400 | for (auto &Field : *Descriptor) { |
401 | yaml::ScalarNode *Key; |
402 | yaml::ScalarNode *Value; |
403 | SmallString<32> KeyStorage; |
404 | SmallString<32> ValueStorage; |
405 | StringRef KeyValue; |
406 | |
407 | Key = dyn_cast<yaml::ScalarNode>(Val: Field.getKey()); |
408 | if (!Key) { |
409 | YS.printError(N: Field.getKey(), Msg: "descriptor Key must be a scalar" ); |
410 | return false; |
411 | } |
412 | |
413 | Value = dyn_cast<yaml::ScalarNode>(Val: Field.getValue()); |
414 | if (!Value) { |
415 | YS.printError(N: Field.getValue(), Msg: "descriptor value must be a scalar" ); |
416 | return false; |
417 | } |
418 | |
419 | KeyValue = Key->getValue(Storage&: KeyStorage); |
420 | if (KeyValue == "source" ) { |
421 | std::string Error; |
422 | |
423 | Source = std::string(Value->getValue(Storage&: ValueStorage)); |
424 | if (!Regex(Source).isValid(Error)) { |
425 | YS.printError(N: Field.getKey(), Msg: "invalid regex: " + Error); |
426 | return false; |
427 | } |
428 | } else if (KeyValue == "target" ) { |
429 | Target = std::string(Value->getValue(Storage&: ValueStorage)); |
430 | } else if (KeyValue == "transform" ) { |
431 | Transform = std::string(Value->getValue(Storage&: ValueStorage)); |
432 | } else { |
433 | YS.printError(N: Field.getKey(), Msg: "unknown Key for Global Variable" ); |
434 | return false; |
435 | } |
436 | } |
437 | |
438 | if (Transform.empty() == Target.empty()) { |
439 | YS.printError(N: Descriptor, |
440 | Msg: "exactly one of transform or target must be specified" ); |
441 | return false; |
442 | } |
443 | |
444 | if (!Target.empty()) |
445 | DL->push_back(x: std::make_unique<ExplicitRewriteGlobalVariableDescriptor>( |
446 | args&: Source, args&: Target, |
447 | /*Naked*/ args: false)); |
448 | else |
449 | DL->push_back(x: std::make_unique<PatternRewriteGlobalVariableDescriptor>( |
450 | args&: Source, args&: Transform)); |
451 | |
452 | return true; |
453 | } |
454 | |
455 | bool RewriteMapParser:: |
456 | parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, |
457 | yaml::MappingNode *Descriptor, |
458 | RewriteDescriptorList *DL) { |
459 | std::string Source; |
460 | std::string Target; |
461 | std::string Transform; |
462 | |
463 | for (auto &Field : *Descriptor) { |
464 | yaml::ScalarNode *Key; |
465 | yaml::ScalarNode *Value; |
466 | SmallString<32> KeyStorage; |
467 | SmallString<32> ValueStorage; |
468 | StringRef KeyValue; |
469 | |
470 | Key = dyn_cast<yaml::ScalarNode>(Val: Field.getKey()); |
471 | if (!Key) { |
472 | YS.printError(N: Field.getKey(), Msg: "descriptor key must be a scalar" ); |
473 | return false; |
474 | } |
475 | |
476 | Value = dyn_cast<yaml::ScalarNode>(Val: Field.getValue()); |
477 | if (!Value) { |
478 | YS.printError(N: Field.getValue(), Msg: "descriptor value must be a scalar" ); |
479 | return false; |
480 | } |
481 | |
482 | KeyValue = Key->getValue(Storage&: KeyStorage); |
483 | if (KeyValue == "source" ) { |
484 | std::string Error; |
485 | |
486 | Source = std::string(Value->getValue(Storage&: ValueStorage)); |
487 | if (!Regex(Source).isValid(Error)) { |
488 | YS.printError(N: Field.getKey(), Msg: "invalid regex: " + Error); |
489 | return false; |
490 | } |
491 | } else if (KeyValue == "target" ) { |
492 | Target = std::string(Value->getValue(Storage&: ValueStorage)); |
493 | } else if (KeyValue == "transform" ) { |
494 | Transform = std::string(Value->getValue(Storage&: ValueStorage)); |
495 | } else { |
496 | YS.printError(N: Field.getKey(), Msg: "unknown key for Global Alias" ); |
497 | return false; |
498 | } |
499 | } |
500 | |
501 | if (Transform.empty() == Target.empty()) { |
502 | YS.printError(N: Descriptor, |
503 | Msg: "exactly one of transform or target must be specified" ); |
504 | return false; |
505 | } |
506 | |
507 | if (!Target.empty()) |
508 | DL->push_back(x: std::make_unique<ExplicitRewriteNamedAliasDescriptor>( |
509 | args&: Source, args&: Target, |
510 | /*Naked*/ args: false)); |
511 | else |
512 | DL->push_back(x: std::make_unique<PatternRewriteNamedAliasDescriptor>( |
513 | args&: Source, args&: Transform)); |
514 | |
515 | return true; |
516 | } |
517 | |
518 | PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) { |
519 | if (!runImpl(M)) |
520 | return PreservedAnalyses::all(); |
521 | |
522 | return PreservedAnalyses::none(); |
523 | } |
524 | |
525 | bool RewriteSymbolPass::runImpl(Module &M) { |
526 | bool Changed; |
527 | |
528 | Changed = false; |
529 | for (auto &Descriptor : Descriptors) |
530 | Changed |= Descriptor->performOnModule(M); |
531 | |
532 | return Changed; |
533 | } |
534 | |
535 | void RewriteSymbolPass::loadAndParseMapFiles() { |
536 | const std::vector<std::string> MapFiles(RewriteMapFiles); |
537 | SymbolRewriter::RewriteMapParser Parser; |
538 | |
539 | for (const auto &MapFile : MapFiles) |
540 | Parser.parse(MapFile, DL: &Descriptors); |
541 | } |
542 | |