1 | //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the implementation for the Delta Debugging Algorithm: |
10 | // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.) |
11 | // into chunks and tries to reduce the number chunks that are interesting. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "Delta.h" |
16 | #include "DeltaPass.h" |
17 | #include "ReducerWorkItem.h" |
18 | #include "TestRunner.h" |
19 | #include "Utils.h" |
20 | #include "llvm/ADT/STLExtras.h" |
21 | #include "llvm/Bitcode/BitcodeReader.h" |
22 | #include "llvm/Bitcode/BitcodeWriter.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS |
25 | #include "llvm/IR/Module.h" |
26 | #include "llvm/IR/Verifier.h" |
27 | #include "llvm/MC/TargetRegistry.h" |
28 | #include "llvm/Support/CommandLine.h" |
29 | #include "llvm/Support/MemoryBufferRef.h" |
30 | #include "llvm/Support/ThreadPool.h" |
31 | #include "llvm/Support/WithColor.h" |
32 | |
33 | using namespace llvm; |
34 | |
35 | extern cl::OptionCategory LLVMReduceOptions; |
36 | |
37 | static cl::opt<bool> AbortOnInvalidReduction( |
38 | "abort-on-invalid-reduction" , |
39 | cl::desc("Abort if any reduction results in invalid IR" ), |
40 | cl::cat(LLVMReduceOptions)); |
41 | |
42 | static cl::opt<bool> SkipVerifyAfterCountingChunks( |
43 | "skip-verify-interesting-after-counting-chunks" , |
44 | cl::desc("Do not validate testcase is interesting after counting chunks " |
45 | "(may speed up reduction)" ), |
46 | cl::cat(LLVMReduceOptions)); |
47 | |
48 | static cl::opt<unsigned int> StartingGranularityLevel( |
49 | "starting-granularity-level" , |
50 | cl::desc("Number of times to divide chunks prior to first test" ), |
51 | cl::cat(LLVMReduceOptions)); |
52 | |
53 | #ifdef LLVM_ENABLE_THREADS |
54 | static cl::opt<unsigned> NumJobs( |
55 | "j" , |
56 | cl::desc("Maximum number of threads to use to process chunks. Set to 1 to " |
57 | "disable parallelism." ), |
58 | cl::init(Val: 1), cl::cat(LLVMReduceOptions)); |
59 | #else |
60 | unsigned NumJobs = 1; |
61 | #endif |
62 | |
63 | static StringLiteral SeparatorLine = |
64 | "--------------------------------------------------------------------------" |
65 | "------\n" ; |
66 | |
67 | /// Splits Chunks in half and prints them. |
68 | /// If unable to split (when chunk size is 1) returns false. |
69 | static bool increaseGranularity(std::vector<Chunk> &Chunks) { |
70 | if (Verbose) |
71 | errs() << "Increasing granularity..." ; |
72 | std::vector<Chunk> NewChunks; |
73 | bool SplitAny = false; |
74 | |
75 | for (Chunk C : Chunks) { |
76 | if (C.End - C.Begin == 0) |
77 | NewChunks.push_back(x: C); |
78 | else { |
79 | int Half = (C.Begin + C.End) / 2; |
80 | NewChunks.push_back(x: {.Begin: C.Begin, .End: Half}); |
81 | NewChunks.push_back(x: {.Begin: Half + 1, .End: C.End}); |
82 | SplitAny = true; |
83 | } |
84 | } |
85 | if (SplitAny) { |
86 | Chunks = NewChunks; |
87 | if (Verbose) { |
88 | errs() << "Success! " << NewChunks.size() << " New Chunks:\n" ; |
89 | for (auto C : Chunks) { |
90 | errs() << '\t'; |
91 | C.print(); |
92 | errs() << '\n'; |
93 | } |
94 | } |
95 | } |
96 | return SplitAny; |
97 | } |
98 | |
99 | // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the |
100 | // modified module if the chunk resulted in a reduction. |
101 | static std::unique_ptr<ReducerWorkItem> |
102 | CheckChunk(const Chunk ChunkToCheckForUninterestingness, |
103 | std::unique_ptr<ReducerWorkItem> Clone, const TestRunner &Test, |
104 | ReductionFunc , |
105 | const DenseSet<Chunk> &UninterestingChunks, |
106 | const std::vector<Chunk> &ChunksStillConsideredInteresting) { |
107 | // Take all of ChunksStillConsideredInteresting chunks, except those we've |
108 | // already deemed uninteresting (UninterestingChunks) but didn't remove |
109 | // from ChunksStillConsideredInteresting yet, and additionally ignore |
110 | // ChunkToCheckForUninterestingness chunk. |
111 | std::vector<Chunk> CurrentChunks; |
112 | CurrentChunks.reserve(n: ChunksStillConsideredInteresting.size() - |
113 | UninterestingChunks.size() - 1); |
114 | copy_if(Range: ChunksStillConsideredInteresting, Out: std::back_inserter(x&: CurrentChunks), |
115 | P: [&](const Chunk &C) { |
116 | return C != ChunkToCheckForUninterestingness && |
117 | !UninterestingChunks.count(V: C); |
118 | }); |
119 | |
120 | // Generate Module with only Targets inside Current Chunks |
121 | Oracle O(CurrentChunks); |
122 | ExtractChunksFromModule(O, *Clone); |
123 | |
124 | // Some reductions may result in invalid IR. Skip such reductions. |
125 | if (Clone->verify(OS: &errs())) { |
126 | if (AbortOnInvalidReduction) { |
127 | errs() << "Invalid reduction, aborting.\n" ; |
128 | Clone->print(ROS&: errs()); |
129 | exit(status: 1); |
130 | } |
131 | if (Verbose) { |
132 | errs() << " **** WARNING | reduction resulted in invalid module, " |
133 | "skipping\n" ; |
134 | } |
135 | return nullptr; |
136 | } |
137 | |
138 | if (Verbose) { |
139 | errs() << "Ignoring: " ; |
140 | ChunkToCheckForUninterestingness.print(); |
141 | for (const Chunk &C : UninterestingChunks) |
142 | C.print(); |
143 | errs() << "\n" ; |
144 | } |
145 | |
146 | if (!Clone->isReduced(Test)) { |
147 | // Program became non-reduced, so this chunk appears to be interesting. |
148 | if (Verbose) |
149 | errs() << "\n" ; |
150 | return nullptr; |
151 | } |
152 | return Clone; |
153 | } |
154 | |
155 | static SmallString<0> ProcessChunkFromSerializedBitcode( |
156 | const Chunk ChunkToCheckForUninterestingness, const TestRunner &Test, |
157 | ReductionFunc , |
158 | const DenseSet<Chunk> &UninterestingChunks, |
159 | ArrayRef<Chunk> ChunksStillConsideredInteresting, StringRef OriginalBC, |
160 | std::atomic<bool> &AnyReduced) { |
161 | LLVMContext Ctx; |
162 | auto CloneMMM = std::make_unique<ReducerWorkItem>(); |
163 | MemoryBufferRef Data(OriginalBC, "<bc file>" ); |
164 | CloneMMM->readBitcode(Data, Ctx, ToolName: Test.getToolName()); |
165 | |
166 | SmallString<0> Result; |
167 | if (std::unique_ptr<ReducerWorkItem> ChunkResult = |
168 | CheckChunk(ChunkToCheckForUninterestingness, Clone: std::move(CloneMMM), |
169 | Test, ExtractChunksFromModule, UninterestingChunks, |
170 | ChunksStillConsideredInteresting)) { |
171 | raw_svector_ostream BCOS(Result); |
172 | ChunkResult->writeBitcode(OutStream&: BCOS); |
173 | // Communicate that the task reduced a chunk. |
174 | AnyReduced = true; |
175 | } |
176 | return Result; |
177 | } |
178 | |
179 | using SharedTaskQueue = std::deque<std::shared_future<SmallString<0>>>; |
180 | |
181 | /// Runs the Delta Debugging algorithm, splits the code into chunks and |
182 | /// reduces the amount of chunks that are considered interesting by the |
183 | /// given test. The number of chunks is determined by a preliminary run of the |
184 | /// reduction pass where no change must be made to the module. |
185 | void llvm::runDeltaPass(TestRunner &Test, const DeltaPass &Pass) { |
186 | assert(!Test.getProgram().verify(&errs()) && |
187 | "input module is broken before making changes" ); |
188 | errs() << "*** " << Pass.Desc << " (" << Pass.Name << ")...\n" ; |
189 | |
190 | int Targets; |
191 | { |
192 | // Count the number of chunks by counting the number of calls to |
193 | // Oracle::shouldKeep() but always returning true so no changes are |
194 | // made. |
195 | std::vector<Chunk> AllChunks = {{.Begin: 0, INT_MAX}}; |
196 | Oracle Counter(AllChunks); |
197 | Pass.Func(Counter, Test.getProgram()); |
198 | Targets = Counter.count(); |
199 | |
200 | assert(!Test.getProgram().verify(&errs()) && |
201 | "input module is broken after counting chunks" ); |
202 | |
203 | if (!SkipVerifyAfterCountingChunks && !Test.getProgram().isReduced(Test)) { |
204 | WithColor::warning() |
205 | << "input module no longer interesting after counting chunks\n" ; |
206 | WithColor::note() << "the interestingness test may be flaky, or there " |
207 | "may be an llvm-reduce bug\n" ; |
208 | WithColor::note() |
209 | << "use -skip-verify-interesting-after-counting-chunks to " |
210 | "suppress this warning\n" ; |
211 | } |
212 | |
213 | #ifndef NDEBUG |
214 | { |
215 | // Make sure that the number of chunks does not change as we reduce. |
216 | std::vector<Chunk> NoChunks = {{0, INT_MAX}}; |
217 | Oracle NoChunksCounter(NoChunks); |
218 | std::unique_ptr<ReducerWorkItem> Clone = |
219 | Test.getProgram().clone(Test.getTargetMachine()); |
220 | Pass.Func(NoChunksCounter, *Clone); |
221 | assert(Targets == NoChunksCounter.count() && |
222 | "number of chunks changes when reducing" ); |
223 | } |
224 | #endif |
225 | } |
226 | if (!Targets) { |
227 | if (Verbose) |
228 | errs() << "\nNothing to reduce\n" ; |
229 | errs() << SeparatorLine; |
230 | return; |
231 | } |
232 | |
233 | std::vector<Chunk> ChunksStillConsideredInteresting = {{.Begin: 0, .End: Targets - 1}}; |
234 | std::unique_ptr<ReducerWorkItem> ReducedProgram; |
235 | |
236 | for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) { |
237 | increaseGranularity(Chunks&: ChunksStillConsideredInteresting); |
238 | } |
239 | |
240 | std::atomic<bool> AnyReduced; |
241 | std::unique_ptr<ThreadPoolInterface> ChunkThreadPoolPtr; |
242 | if (NumJobs > 1) |
243 | ChunkThreadPoolPtr = |
244 | std::make_unique<DefaultThreadPool>(args: hardware_concurrency(ThreadCount: NumJobs)); |
245 | |
246 | SmallString<0> OriginalBC; |
247 | DenseSet<Chunk> UninterestingChunks; |
248 | UninterestingChunks.reserve(Size: Targets); |
249 | |
250 | bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity; |
251 | do { |
252 | FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false; |
253 | |
254 | UninterestingChunks.clear(); |
255 | |
256 | // When running with more than one thread, serialize the original bitcode |
257 | // to OriginalBC. |
258 | if (NumJobs > 1) { |
259 | OriginalBC.clear(); |
260 | raw_svector_ostream BCOS(OriginalBC); |
261 | Test.getProgram().writeBitcode(OutStream&: BCOS); |
262 | } |
263 | |
264 | SharedTaskQueue TaskQueue; |
265 | for (auto I = ChunksStillConsideredInteresting.rbegin(), |
266 | E = ChunksStillConsideredInteresting.rend(); |
267 | I != E; ++I) { |
268 | std::unique_ptr<ReducerWorkItem> Result = nullptr; |
269 | unsigned WorkLeft = std::distance(first: I, last: E); |
270 | |
271 | // Run in parallel mode, if the user requested more than one thread and |
272 | // there are at least a few chunks to process. |
273 | if (NumJobs > 1 && WorkLeft > 1) { |
274 | unsigned NumInitialTasks = std::min(a: WorkLeft, b: unsigned(NumJobs)); |
275 | unsigned NumChunksProcessed = 0; |
276 | |
277 | ThreadPoolInterface &ChunkThreadPool = *ChunkThreadPoolPtr; |
278 | assert(TaskQueue.empty()); |
279 | |
280 | AnyReduced = false; |
281 | // Queue jobs to process NumInitialTasks chunks in parallel using |
282 | // ChunkThreadPool. When the tasks are added to the pool, parse the |
283 | // original module from OriginalBC with a fresh LLVMContext object. This |
284 | // ensures that the cloned module of each task uses an independent |
285 | // LLVMContext object. If a task reduces the input, serialize the result |
286 | // back in the corresponding Result element. |
287 | for (unsigned J = 0; J < NumInitialTasks; ++J) { |
288 | Chunk ChunkToCheck = *(I + J); |
289 | TaskQueue.emplace_back(args: ChunkThreadPool.async( |
290 | F&: ProcessChunkFromSerializedBitcode, ArgList&: ChunkToCheck, ArgList: std::ref(t&: Test), |
291 | ArgList: Pass.Func, ArgList&: UninterestingChunks, ArgList&: ChunksStillConsideredInteresting, |
292 | ArgList&: OriginalBC, ArgList: std::ref(t&: AnyReduced))); |
293 | } |
294 | |
295 | // Start processing results of the queued tasks. We wait for the first |
296 | // task in the queue to finish. If it reduced a chunk, we parse the |
297 | // result and exit the loop. |
298 | // Otherwise we will try to schedule a new task, if |
299 | // * no other pending job reduced a chunk and |
300 | // * we have not reached the end of the chunk. |
301 | while (!TaskQueue.empty()) { |
302 | auto &Future = TaskQueue.front(); |
303 | Future.wait(); |
304 | |
305 | NumChunksProcessed++; |
306 | SmallString<0> Res = Future.get(); |
307 | TaskQueue.pop_front(); |
308 | if (Res.empty()) { |
309 | unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size(); |
310 | if (!AnyReduced && I + NumScheduledTasks != E) { |
311 | Chunk ChunkToCheck = *(I + NumScheduledTasks); |
312 | TaskQueue.emplace_back(args: ChunkThreadPool.async( |
313 | F&: ProcessChunkFromSerializedBitcode, ArgList&: ChunkToCheck, |
314 | ArgList: std::ref(t&: Test), ArgList: Pass.Func, ArgList&: UninterestingChunks, |
315 | ArgList&: ChunksStillConsideredInteresting, ArgList&: OriginalBC, |
316 | ArgList: std::ref(t&: AnyReduced))); |
317 | } |
318 | continue; |
319 | } |
320 | |
321 | Result = std::make_unique<ReducerWorkItem>(); |
322 | MemoryBufferRef Data(StringRef(Res), "<bc file>" ); |
323 | Result->readBitcode(Data, Ctx&: Test.getProgram().M->getContext(), |
324 | ToolName: Test.getToolName()); |
325 | break; |
326 | } |
327 | |
328 | // If we broke out of the loop, we still need to wait for everything to |
329 | // avoid race access to the chunk set. |
330 | // |
331 | // TODO: Create a way to kill remaining items we're ignoring; they could |
332 | // take a long time. |
333 | ChunkThreadPoolPtr->wait(); |
334 | TaskQueue.clear(); |
335 | |
336 | // Forward I to the last chunk processed in parallel. |
337 | I += NumChunksProcessed - 1; |
338 | } else { |
339 | Result = CheckChunk( |
340 | ChunkToCheckForUninterestingness: *I, Clone: Test.getProgram().clone(TM: Test.getTargetMachine()), Test, |
341 | ExtractChunksFromModule: Pass.Func, UninterestingChunks, ChunksStillConsideredInteresting); |
342 | } |
343 | |
344 | if (!Result) |
345 | continue; |
346 | |
347 | const Chunk ChunkToCheckForUninterestingness = *I; |
348 | FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true; |
349 | UninterestingChunks.insert(V: ChunkToCheckForUninterestingness); |
350 | ReducedProgram = std::move(Result); |
351 | } |
352 | // Delete uninteresting chunks |
353 | erase_if(C&: ChunksStillConsideredInteresting, |
354 | P: [&UninterestingChunks](const Chunk &C) { |
355 | return UninterestingChunks.count(V: C); |
356 | }); |
357 | } while (!ChunksStillConsideredInteresting.empty() && |
358 | (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity || |
359 | increaseGranularity(Chunks&: ChunksStillConsideredInteresting))); |
360 | |
361 | // If we reduced the testcase replace it |
362 | if (ReducedProgram) { |
363 | Test.setProgram(std::move(ReducedProgram)); |
364 | // FIXME: Report meaningful progress info |
365 | Test.writeOutput(Message: " **** SUCCESS | Saved new best reduction to " ); |
366 | } |
367 | if (Verbose) |
368 | errs() << "Couldn't increase anymore.\n" ; |
369 | errs() << SeparatorLine; |
370 | } |
371 | |