1//===- verify-uselistorder.cpp - The LLVM Modular Optimizer ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Verify that use-list order can be serialized correctly. After reading the
10// provided IR, this tool shuffles the use-lists and then writes and reads to a
11// separate Module whose use-list orders are compared to the original.
12//
13// The shuffles are deterministic, but guarantee that use-lists will change.
14// The algorithm per iteration is as follows:
15//
16// 1. Seed the random number generator. The seed is different for each
17// shuffle. Shuffle 0 uses default+0, shuffle 1 uses default+1, and so on.
18//
19// 2. Visit every Value in a deterministic order.
20//
21// 3. Assign a random number to each Use in the Value's use-list in order.
22//
23// 4. If the numbers are already in order, reassign numbers until they aren't.
24//
25// 5. Sort the use-list using Value::sortUseList(), which is a stable sort.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/DenseSet.h"
31#include "llvm/AsmParser/Parser.h"
32#include "llvm/Bitcode/BitcodeReader.h"
33#include "llvm/Bitcode/BitcodeWriter.h"
34#include "llvm/IR/LLVMContext.h"
35#include "llvm/IR/Module.h"
36#include "llvm/IR/UseListOrder.h"
37#include "llvm/IR/Verifier.h"
38#include "llvm/IRReader/IRReader.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/FileSystem.h"
43#include "llvm/Support/FileUtilities.h"
44#include "llvm/Support/InitLLVM.h"
45#include "llvm/Support/MemoryBuffer.h"
46#include "llvm/Support/SourceMgr.h"
47#include "llvm/Support/SystemUtils.h"
48#include "llvm/Support/raw_ostream.h"
49#include <random>
50#include <vector>
51
52using namespace llvm;
53
54#define DEBUG_TYPE "uselistorder"
55
56static cl::OptionCategory Cat("verify-uselistorder Options");
57
58static cl::opt<std::string> InputFilename(cl::Positional,
59 cl::desc("<input bitcode file>"),
60 cl::init(Val: "-"),
61 cl::value_desc("filename"));
62
63static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temp files"),
64 cl::cat(Cat));
65
66static cl::opt<unsigned>
67 NumShuffles("num-shuffles",
68 cl::desc("Number of times to shuffle and verify use-lists"),
69 cl::init(Val: 1), cl::cat(Cat));
70
71extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
72
73namespace {
74
75struct TempFile {
76 std::string Filename;
77 FileRemover Remover;
78 bool init(const std::string &Ext);
79 bool writeBitcode(const Module &M) const;
80 bool writeAssembly(const Module &M) const;
81 std::unique_ptr<Module> readBitcode(LLVMContext &Context) const;
82 std::unique_ptr<Module> readAssembly(LLVMContext &Context) const;
83};
84
85struct ValueMapping {
86 DenseMap<const Value *, unsigned> IDs;
87 std::vector<const Value *> Values;
88
89 /// Construct a value mapping for module.
90 ///
91 /// Creates mapping from every value in \c M to an ID. This mapping includes
92 /// un-referencable values.
93 ///
94 /// Every \a Value that gets serialized in some way should be represented
95 /// here. The order needs to be deterministic, but it's unnecessary to match
96 /// the value-ids in the bitcode writer.
97 ///
98 /// All constants that are referenced by other values are included in the
99 /// mapping, but others -- which wouldn't be serialized -- are not.
100 ValueMapping(const Module &M);
101
102 /// Map a value.
103 ///
104 /// Maps a value. If it's a constant, maps all of its operands first.
105 void map(const Value *V);
106 unsigned lookup(const Value *V) const { return IDs.lookup(Val: V); }
107};
108
109} // end namespace
110
111bool TempFile::init(const std::string &Ext) {
112 SmallVector<char, 64> Vector;
113 LLVM_DEBUG(dbgs() << " - create-temp-file\n");
114 if (auto EC = sys::fs::createTemporaryFile(Prefix: "uselistorder", Suffix: Ext, ResultPath&: Vector)) {
115 errs() << "verify-uselistorder: error: " << EC.message() << "\n";
116 return true;
117 }
118 assert(!Vector.empty());
119
120 Filename.assign(first: Vector.data(), last: Vector.data() + Vector.size());
121 Remover.setFile(filename: Filename, deleteIt: !SaveTemps);
122 if (SaveTemps)
123 outs() << " - filename = " << Filename << "\n";
124 return false;
125}
126
127bool TempFile::writeBitcode(const Module &M) const {
128 LLVM_DEBUG(dbgs() << " - write bitcode\n");
129 std::error_code EC;
130 raw_fd_ostream OS(Filename, EC, sys::fs::OF_None);
131 if (EC) {
132 errs() << "verify-uselistorder: error: " << EC.message() << "\n";
133 return true;
134 }
135
136 WriteBitcodeToFile(M, Out&: OS, /* ShouldPreserveUseListOrder */ true);
137 return false;
138}
139
140bool TempFile::writeAssembly(const Module &M) const {
141 LLVM_DEBUG(dbgs() << " - write assembly\n");
142 std::error_code EC;
143 raw_fd_ostream OS(Filename, EC, sys::fs::OF_TextWithCRLF);
144 if (EC) {
145 errs() << "verify-uselistorder: error: " << EC.message() << "\n";
146 return true;
147 }
148
149 M.print(OS, AAW: nullptr, /* ShouldPreserveUseListOrder */ true);
150 return false;
151}
152
153std::unique_ptr<Module> TempFile::readBitcode(LLVMContext &Context) const {
154 LLVM_DEBUG(dbgs() << " - read bitcode\n");
155 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOr =
156 MemoryBuffer::getFile(Filename);
157 if (!BufferOr) {
158 errs() << "verify-uselistorder: error: " << BufferOr.getError().message()
159 << "\n";
160 return nullptr;
161 }
162
163 MemoryBuffer *Buffer = BufferOr.get().get();
164 Expected<std::unique_ptr<Module>> ModuleOr =
165 parseBitcodeFile(Buffer: Buffer->getMemBufferRef(), Context);
166 if (!ModuleOr) {
167 logAllUnhandledErrors(E: ModuleOr.takeError(), OS&: errs(),
168 ErrorBanner: "verify-uselistorder: error: ");
169 return nullptr;
170 }
171
172 return std::move(ModuleOr.get());
173}
174
175std::unique_ptr<Module> TempFile::readAssembly(LLVMContext &Context) const {
176 LLVM_DEBUG(dbgs() << " - read assembly\n");
177 SMDiagnostic Err;
178 std::unique_ptr<Module> M = parseAssemblyFile(Filename, Err, Context);
179 if (!M)
180 Err.print(ProgName: "verify-uselistorder", S&: errs());
181 return M;
182}
183
184ValueMapping::ValueMapping(const Module &M) {
185 // Every value should be mapped, including things like void instructions and
186 // basic blocks that are kept out of the ValueEnumerator.
187 //
188 // The current mapping order makes it easier to debug the tables. It happens
189 // to be similar to the ID mapping when writing ValueEnumerator, but they
190 // aren't (and needn't be) in sync.
191
192 // Globals.
193 for (const GlobalVariable &G : M.globals())
194 map(V: &G);
195 for (const GlobalAlias &A : M.aliases())
196 map(V: &A);
197 for (const GlobalIFunc &IF : M.ifuncs())
198 map(V: &IF);
199 for (const Function &F : M)
200 map(V: &F);
201
202 // Constants used by globals.
203 for (const GlobalVariable &G : M.globals())
204 if (G.hasInitializer())
205 map(V: G.getInitializer());
206 for (const GlobalAlias &A : M.aliases())
207 map(V: A.getAliasee());
208 for (const GlobalIFunc &IF : M.ifuncs())
209 map(V: IF.getResolver());
210 for (const Function &F : M)
211 for (Value *Op : F.operands())
212 map(V: Op);
213
214 // Function bodies.
215 for (const Function &F : M) {
216 for (const Argument &A : F.args())
217 map(V: &A);
218 for (const BasicBlock &BB : F)
219 map(V: &BB);
220 for (const BasicBlock &BB : F)
221 for (const Instruction &I : BB)
222 map(V: &I);
223
224 // Constants used by instructions.
225 for (const BasicBlock &BB : F) {
226 for (const Instruction &I : BB) {
227 for (const DbgVariableRecord &DVR :
228 filterDbgVars(R: I.getDbgRecordRange())) {
229 for (Value *Op : DVR.location_ops())
230 map(V: Op);
231 if (DVR.isDbgAssign())
232 map(V: DVR.getAddress());
233 }
234 for (const Value *Op : I.operands()) {
235 // Look through a metadata wrapper.
236 if (const auto *MAV = dyn_cast<MetadataAsValue>(Val: Op))
237 if (const auto *VAM = dyn_cast<ValueAsMetadata>(Val: MAV->getMetadata()))
238 Op = VAM->getValue();
239
240 if ((isa<Constant>(Val: Op) && !isa<GlobalValue>(Val: *Op)) ||
241 isa<InlineAsm>(Val: Op))
242 map(V: Op);
243 }
244 }
245 }
246 }
247}
248
249void ValueMapping::map(const Value *V) {
250 if (IDs.lookup(Val: V))
251 return;
252
253 if (auto *C = dyn_cast<Constant>(Val: V))
254 if (!isa<GlobalValue>(Val: C))
255 for (const Value *Op : C->operands())
256 map(V: Op);
257
258 Values.push_back(x: V);
259 IDs[V] = Values.size();
260}
261
262#ifndef NDEBUG
263static void dumpMapping(const ValueMapping &VM) {
264 dbgs() << "value-mapping (size = " << VM.Values.size() << "):\n";
265 for (unsigned I = 0, E = VM.Values.size(); I != E; ++I) {
266 dbgs() << " - id = " << I << ", value = ";
267 VM.Values[I]->dump();
268 }
269}
270
271static void debugValue(const ValueMapping &M, unsigned I, StringRef Desc) {
272 const Value *V = M.Values[I];
273 dbgs() << " - " << Desc << " value = ";
274 V->dump();
275 for (const Use &U : V->uses()) {
276 dbgs() << " => use: op = " << U.getOperandNo()
277 << ", user-id = " << M.IDs.lookup(U.getUser()) << ", user = ";
278 U.getUser()->dump();
279 }
280}
281
282static void debugUserMismatch(const ValueMapping &L, const ValueMapping &R,
283 unsigned I) {
284 dbgs() << " - fail: user mismatch: ID = " << I << "\n";
285 debugValue(L, I, "LHS");
286 debugValue(R, I, "RHS");
287
288 dbgs() << "\nlhs-";
289 dumpMapping(L);
290 dbgs() << "\nrhs-";
291 dumpMapping(R);
292}
293
294static void debugSizeMismatch(const ValueMapping &L, const ValueMapping &R) {
295 dbgs() << " - fail: map size: " << L.Values.size()
296 << " != " << R.Values.size() << "\n";
297 dbgs() << "\nlhs-";
298 dumpMapping(L);
299 dbgs() << "\nrhs-";
300 dumpMapping(R);
301}
302#endif
303
304static bool matches(const ValueMapping &LM, const ValueMapping &RM) {
305 LLVM_DEBUG(dbgs() << "compare value maps\n");
306 if (LM.Values.size() != RM.Values.size()) {
307 LLVM_DEBUG(debugSizeMismatch(LM, RM));
308 return false;
309 }
310
311 // This mapping doesn't include dangling constant users, since those don't
312 // get serialized. However, checking if users are constant and calling
313 // isConstantUsed() on every one is very expensive. Instead, just check if
314 // the user is mapped.
315 auto skipUnmappedUsers =
316 [&](Value::const_use_iterator &U, Value::const_use_iterator E,
317 const ValueMapping &M) {
318 while (U != E && !M.lookup(V: U->getUser()))
319 ++U;
320 };
321
322 // Iterate through all values, and check that both mappings have the same
323 // users.
324 for (unsigned I = 0, E = LM.Values.size(); I != E; ++I) {
325 const Value *L = LM.Values[I];
326 const Value *R = RM.Values[I];
327 auto LU = L->use_begin(), LE = L->use_end();
328 auto RU = R->use_begin(), RE = R->use_end();
329 skipUnmappedUsers(LU, LE, LM);
330 skipUnmappedUsers(RU, RE, RM);
331
332 while (LU != LE) {
333 if (RU == RE) {
334 LLVM_DEBUG(debugUserMismatch(LM, RM, I));
335 return false;
336 }
337 if (LM.lookup(V: LU->getUser()) != RM.lookup(V: RU->getUser())) {
338 LLVM_DEBUG(debugUserMismatch(LM, RM, I));
339 return false;
340 }
341 if (LU->getOperandNo() != RU->getOperandNo()) {
342 LLVM_DEBUG(debugUserMismatch(LM, RM, I));
343 return false;
344 }
345 skipUnmappedUsers(++LU, LE, LM);
346 skipUnmappedUsers(++RU, RE, RM);
347 }
348 if (RU != RE) {
349 LLVM_DEBUG(debugUserMismatch(LM, RM, I));
350 return false;
351 }
352 }
353
354 return true;
355}
356
357static void verifyAfterRoundTrip(const Module &M,
358 std::unique_ptr<Module> OtherM) {
359 if (!OtherM)
360 report_fatal_error(reason: "parsing failed");
361 if (verifyModule(M: *OtherM, OS: &errs()))
362 report_fatal_error(reason: "verification failed");
363 if (!matches(LM: ValueMapping(M), RM: ValueMapping(*OtherM)))
364 report_fatal_error(reason: "use-list order changed");
365}
366
367static void verifyBitcodeUseListOrder(const Module &M) {
368 TempFile F;
369 if (F.init(Ext: "bc"))
370 report_fatal_error(reason: "failed to initialize bitcode file");
371
372 if (F.writeBitcode(M))
373 report_fatal_error(reason: "failed to write bitcode");
374
375 LLVMContext Context;
376 verifyAfterRoundTrip(M, OtherM: F.readBitcode(Context));
377}
378
379static void verifyAssemblyUseListOrder(const Module &M) {
380 TempFile F;
381 if (F.init(Ext: "ll"))
382 report_fatal_error(reason: "failed to initialize assembly file");
383
384 if (F.writeAssembly(M))
385 report_fatal_error(reason: "failed to write assembly");
386
387 LLVMContext Context;
388 verifyAfterRoundTrip(M, OtherM: F.readAssembly(Context));
389}
390
391static void verifyUseListOrder(const Module &M) {
392 outs() << "verify bitcode\n";
393 verifyBitcodeUseListOrder(M);
394 outs() << "verify assembly\n";
395 verifyAssemblyUseListOrder(M);
396}
397
398static void shuffleValueUseLists(Value *V, std::minstd_rand0 &Gen,
399 DenseSet<Value *> &Seen) {
400 if (!Seen.insert(V).second)
401 return;
402
403 if (auto *C = dyn_cast<Constant>(Val: V))
404 if (!isa<GlobalValue>(Val: C))
405 for (Value *Op : C->operands())
406 shuffleValueUseLists(V: Op, Gen, Seen);
407
408 if (V->use_empty() || std::next(x: V->use_begin()) == V->use_end())
409 // Nothing to shuffle for 0 or 1 users.
410 return;
411
412 // Generate random numbers between 10 and 99, which will line up nicely in
413 // debug output. We're not worried about collisions here.
414 LLVM_DEBUG(dbgs() << "V = "; V->dump());
415 std::uniform_int_distribution<short> Dist(10, 99);
416 SmallDenseMap<const Use *, short, 16> Order;
417 auto compareUses =
418 [&Order](const Use &L, const Use &R) { return Order[&L] < Order[&R]; };
419 do {
420 for (const Use &U : V->uses()) {
421 auto I = Dist(Gen);
422 Order[&U] = I;
423 LLVM_DEBUG(dbgs() << " - order: " << I << ", op = " << U.getOperandNo()
424 << ", U = ";
425 U.getUser()->dump());
426 }
427 } while (std::is_sorted(first: V->use_begin(), last: V->use_end(), comp: compareUses));
428
429 LLVM_DEBUG(dbgs() << " => shuffle\n");
430 V->sortUseList(Cmp: compareUses);
431
432 LLVM_DEBUG({
433 for (const Use &U : V->uses()) {
434 dbgs() << " - order: " << Order.lookup(&U)
435 << ", op = " << U.getOperandNo() << ", U = ";
436 U.getUser()->dump();
437 }
438 });
439}
440
441static void reverseValueUseLists(Value *V, DenseSet<Value *> &Seen) {
442 if (!Seen.insert(V).second)
443 return;
444
445 if (auto *C = dyn_cast<Constant>(Val: V))
446 if (!isa<GlobalValue>(Val: C))
447 for (Value *Op : C->operands())
448 reverseValueUseLists(V: Op, Seen);
449
450 if (V->use_empty() || std::next(x: V->use_begin()) == V->use_end())
451 // Nothing to shuffle for 0 or 1 users.
452 return;
453
454 LLVM_DEBUG({
455 dbgs() << "V = ";
456 V->dump();
457 for (const Use &U : V->uses()) {
458 dbgs() << " - order: op = " << U.getOperandNo() << ", U = ";
459 U.getUser()->dump();
460 }
461 dbgs() << " => reverse\n";
462 });
463
464 V->reverseUseList();
465
466 LLVM_DEBUG({
467 for (const Use &U : V->uses()) {
468 dbgs() << " - order: op = " << U.getOperandNo() << ", U = ";
469 U.getUser()->dump();
470 }
471 });
472}
473
474template <class Changer>
475static void changeUseLists(Module &M, Changer changeValueUseList) {
476 // Visit every value that would be serialized to an IR file.
477 //
478 // Globals.
479 for (GlobalVariable &G : M.globals())
480 changeValueUseList(&G);
481 for (GlobalAlias &A : M.aliases())
482 changeValueUseList(&A);
483 for (GlobalIFunc &IF : M.ifuncs())
484 changeValueUseList(&IF);
485 for (Function &F : M)
486 changeValueUseList(&F);
487
488 // Constants used by globals.
489 for (GlobalVariable &G : M.globals())
490 if (G.hasInitializer())
491 changeValueUseList(G.getInitializer());
492 for (GlobalAlias &A : M.aliases())
493 changeValueUseList(A.getAliasee());
494 for (GlobalIFunc &IF : M.ifuncs())
495 changeValueUseList(IF.getResolver());
496 for (Function &F : M)
497 for (Value *Op : F.operands())
498 changeValueUseList(Op);
499
500 // Function bodies.
501 for (Function &F : M) {
502 for (Argument &A : F.args())
503 changeValueUseList(&A);
504 for (BasicBlock &BB : F)
505 changeValueUseList(&BB);
506 for (BasicBlock &BB : F)
507 for (Instruction &I : BB)
508 changeValueUseList(&I);
509
510 // Constants used by instructions.
511 for (BasicBlock &BB : F)
512 for (Instruction &I : BB)
513 for (Value *Op : I.operands()) {
514 // Look through a metadata wrapper.
515 if (auto *MAV = dyn_cast<MetadataAsValue>(Val: Op))
516 if (auto *VAM = dyn_cast<ValueAsMetadata>(Val: MAV->getMetadata()))
517 Op = VAM->getValue();
518 if ((isa<Constant>(Val: Op) && !isa<GlobalValue>(Val: *Op)) ||
519 isa<InlineAsm>(Val: Op))
520 changeValueUseList(Op);
521 }
522 }
523
524 if (verifyModule(M, OS: &errs()))
525 report_fatal_error(reason: "verification failed");
526}
527
528static void shuffleUseLists(Module &M, unsigned SeedOffset) {
529 std::minstd_rand0 Gen(std::minstd_rand0::default_seed + SeedOffset);
530 DenseSet<Value *> Seen;
531 changeUseLists(M, changeValueUseList: [&](Value *V) { shuffleValueUseLists(V, Gen, Seen); });
532 LLVM_DEBUG(dbgs() << "\n");
533}
534
535static void reverseUseLists(Module &M) {
536 DenseSet<Value *> Seen;
537 changeUseLists(M, changeValueUseList: [&](Value *V) { reverseValueUseLists(V, Seen); });
538 LLVM_DEBUG(dbgs() << "\n");
539}
540
541int main(int argc, char **argv) {
542 PreserveInputDbgFormat = cl::boolOrDefault::BOU_TRUE;
543 InitLLVM X(argc, argv);
544
545 // Enable debug stream buffering.
546 EnableDebugBuffering = true;
547
548 cl::HideUnrelatedOptions(Category&: Cat);
549 cl::ParseCommandLineOptions(argc, argv,
550 Overview: "llvm tool to verify use-list order\n");
551
552 LLVMContext Context;
553 SMDiagnostic Err;
554
555 // Load the input module...
556 std::unique_ptr<Module> M = parseIRFile(Filename: InputFilename, Err, Context);
557
558 if (!M) {
559 Err.print(ProgName: argv[0], S&: errs());
560 return 1;
561 }
562 if (verifyModule(M: *M, OS: &errs())) {
563 errs() << argv[0] << ": " << InputFilename
564 << ": error: input module is broken!\n";
565 return 1;
566 }
567
568 // Verify the use lists now and after reversing them.
569 outs() << "*** verify-uselistorder ***\n";
570 verifyUseListOrder(M: *M);
571 outs() << "reverse\n";
572 reverseUseLists(M&: *M);
573 verifyUseListOrder(M: *M);
574
575 for (unsigned I = 0, E = NumShuffles; I != E; ++I) {
576 outs() << "\n";
577
578 // Shuffle with a different (deterministic) seed each time.
579 outs() << "shuffle (" << I + 1 << " of " << E << ")\n";
580 shuffleUseLists(M&: *M, SeedOffset: I);
581
582 // Verify again before and after reversing.
583 verifyUseListOrder(M: *M);
584 outs() << "reverse\n";
585 reverseUseLists(M&: *M);
586 verifyUseListOrder(M: *M);
587 }
588
589 return 0;
590}
591