| 1 | //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the implementation for the Delta Debugging Algorithm: |
| 10 | // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.) |
| 11 | // into chunks and tries to reduce the number chunks that are interesting. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "Delta.h" |
| 16 | #include "DeltaPass.h" |
| 17 | #include "ReducerWorkItem.h" |
| 18 | #include "TestRunner.h" |
| 19 | #include "Utils.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/Bitcode/BitcodeReader.h" |
| 22 | #include "llvm/Bitcode/BitcodeWriter.h" |
| 23 | #include "llvm/CodeGen/MachineFunction.h" |
| 24 | #include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS |
| 25 | #include "llvm/IR/Module.h" |
| 26 | #include "llvm/IR/Verifier.h" |
| 27 | #include "llvm/MC/TargetRegistry.h" |
| 28 | #include "llvm/Support/CommandLine.h" |
| 29 | #include "llvm/Support/MemoryBufferRef.h" |
| 30 | #include "llvm/Support/ThreadPool.h" |
| 31 | #include "llvm/Support/WithColor.h" |
| 32 | |
| 33 | using namespace llvm; |
| 34 | |
| 35 | extern cl::OptionCategory LLVMReduceOptions; |
| 36 | |
| 37 | static cl::opt<bool> AbortOnInvalidReduction( |
| 38 | "abort-on-invalid-reduction" , |
| 39 | cl::desc("Abort if any reduction results in invalid IR" ), |
| 40 | cl::cat(LLVMReduceOptions)); |
| 41 | |
| 42 | static cl::opt<bool> SkipVerifyAfterCountingChunks( |
| 43 | "skip-verify-interesting-after-counting-chunks" , |
| 44 | cl::desc("Do not validate testcase is interesting after counting chunks " |
| 45 | "(may speed up reduction)" ), |
| 46 | cl::cat(LLVMReduceOptions)); |
| 47 | |
| 48 | static cl::opt<unsigned int> StartingGranularityLevel( |
| 49 | "starting-granularity-level" , |
| 50 | cl::desc("Number of times to divide chunks prior to first test" ), |
| 51 | cl::cat(LLVMReduceOptions)); |
| 52 | |
| 53 | #ifdef LLVM_ENABLE_THREADS |
| 54 | static cl::opt<unsigned> NumJobs( |
| 55 | "j" , |
| 56 | cl::desc("Maximum number of threads to use to process chunks. Set to 1 to " |
| 57 | "disable parallelism." ), |
| 58 | cl::init(Val: 1), cl::cat(LLVMReduceOptions)); |
| 59 | #else |
| 60 | unsigned NumJobs = 1; |
| 61 | #endif |
| 62 | |
| 63 | static StringLiteral SeparatorLine = |
| 64 | "--------------------------------------------------------------------------" |
| 65 | "------\n" ; |
| 66 | |
| 67 | /// Splits Chunks in half and prints them. |
| 68 | /// If unable to split (when chunk size is 1) returns false. |
| 69 | static bool increaseGranularity(std::vector<Chunk> &Chunks) { |
| 70 | if (Verbose) |
| 71 | errs() << "Increasing granularity..." ; |
| 72 | std::vector<Chunk> NewChunks; |
| 73 | bool SplitAny = false; |
| 74 | |
| 75 | for (Chunk C : Chunks) { |
| 76 | if (C.End - C.Begin == 0) |
| 77 | NewChunks.push_back(x: C); |
| 78 | else { |
| 79 | int Half = (C.Begin + C.End) / 2; |
| 80 | NewChunks.push_back(x: {.Begin: C.Begin, .End: Half}); |
| 81 | NewChunks.push_back(x: {.Begin: Half + 1, .End: C.End}); |
| 82 | SplitAny = true; |
| 83 | } |
| 84 | } |
| 85 | if (SplitAny) { |
| 86 | Chunks = NewChunks; |
| 87 | if (Verbose) { |
| 88 | errs() << "Success! " << NewChunks.size() << " New Chunks:\n" ; |
| 89 | for (auto C : Chunks) { |
| 90 | errs() << '\t'; |
| 91 | C.print(); |
| 92 | errs() << '\n'; |
| 93 | } |
| 94 | } |
| 95 | } |
| 96 | return SplitAny; |
| 97 | } |
| 98 | |
| 99 | // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the |
| 100 | // modified module if the chunk resulted in a reduction. |
| 101 | static std::unique_ptr<ReducerWorkItem> |
| 102 | CheckChunk(const Chunk ChunkToCheckForUninterestingness, |
| 103 | std::unique_ptr<ReducerWorkItem> Clone, const TestRunner &Test, |
| 104 | ReductionFunc , |
| 105 | const DenseSet<Chunk> &UninterestingChunks, |
| 106 | const std::vector<Chunk> &ChunksStillConsideredInteresting) { |
| 107 | // Take all of ChunksStillConsideredInteresting chunks, except those we've |
| 108 | // already deemed uninteresting (UninterestingChunks) but didn't remove |
| 109 | // from ChunksStillConsideredInteresting yet, and additionally ignore |
| 110 | // ChunkToCheckForUninterestingness chunk. |
| 111 | std::vector<Chunk> CurrentChunks; |
| 112 | CurrentChunks.reserve(n: ChunksStillConsideredInteresting.size() - |
| 113 | UninterestingChunks.size() - 1); |
| 114 | copy_if(Range: ChunksStillConsideredInteresting, Out: std::back_inserter(x&: CurrentChunks), |
| 115 | P: [&](const Chunk &C) { |
| 116 | return C != ChunkToCheckForUninterestingness && |
| 117 | !UninterestingChunks.count(V: C); |
| 118 | }); |
| 119 | |
| 120 | // Generate Module with only Targets inside Current Chunks |
| 121 | Oracle O(CurrentChunks); |
| 122 | ExtractChunksFromModule(O, *Clone); |
| 123 | |
| 124 | // Some reductions may result in invalid IR. Skip such reductions. |
| 125 | if (Clone->verify(OS: &errs())) { |
| 126 | if (AbortOnInvalidReduction) { |
| 127 | errs() << "Invalid reduction, aborting.\n" ; |
| 128 | Clone->print(ROS&: errs()); |
| 129 | exit(status: 1); |
| 130 | } |
| 131 | if (Verbose) { |
| 132 | errs() << " **** WARNING | reduction resulted in invalid module, " |
| 133 | "skipping\n" ; |
| 134 | } |
| 135 | return nullptr; |
| 136 | } |
| 137 | |
| 138 | if (Verbose) { |
| 139 | errs() << "Ignoring: " ; |
| 140 | ChunkToCheckForUninterestingness.print(); |
| 141 | for (const Chunk &C : UninterestingChunks) |
| 142 | C.print(); |
| 143 | errs() << "\n" ; |
| 144 | } |
| 145 | |
| 146 | if (!Clone->isReduced(Test)) { |
| 147 | // Program became non-reduced, so this chunk appears to be interesting. |
| 148 | if (Verbose) |
| 149 | errs() << "\n" ; |
| 150 | return nullptr; |
| 151 | } |
| 152 | return Clone; |
| 153 | } |
| 154 | |
| 155 | static SmallString<0> ProcessChunkFromSerializedBitcode( |
| 156 | const Chunk ChunkToCheckForUninterestingness, const TestRunner &Test, |
| 157 | ReductionFunc , |
| 158 | const DenseSet<Chunk> &UninterestingChunks, |
| 159 | ArrayRef<Chunk> ChunksStillConsideredInteresting, StringRef OriginalBC, |
| 160 | std::atomic<bool> &AnyReduced) { |
| 161 | LLVMContext Ctx; |
| 162 | auto CloneMMM = std::make_unique<ReducerWorkItem>(); |
| 163 | MemoryBufferRef Data(OriginalBC, "<bc file>" ); |
| 164 | CloneMMM->readBitcode(Data, Ctx, ToolName: Test.getToolName()); |
| 165 | |
| 166 | SmallString<0> Result; |
| 167 | if (std::unique_ptr<ReducerWorkItem> ChunkResult = |
| 168 | CheckChunk(ChunkToCheckForUninterestingness, Clone: std::move(CloneMMM), |
| 169 | Test, ExtractChunksFromModule, UninterestingChunks, |
| 170 | ChunksStillConsideredInteresting)) { |
| 171 | raw_svector_ostream BCOS(Result); |
| 172 | ChunkResult->writeBitcode(OutStream&: BCOS); |
| 173 | // Communicate that the task reduced a chunk. |
| 174 | AnyReduced = true; |
| 175 | } |
| 176 | return Result; |
| 177 | } |
| 178 | |
| 179 | using SharedTaskQueue = std::deque<std::shared_future<SmallString<0>>>; |
| 180 | |
| 181 | /// Runs the Delta Debugging algorithm, splits the code into chunks and |
| 182 | /// reduces the amount of chunks that are considered interesting by the |
| 183 | /// given test. The number of chunks is determined by a preliminary run of the |
| 184 | /// reduction pass where no change must be made to the module. |
| 185 | void llvm::runDeltaPass(TestRunner &Test, const DeltaPass &Pass) { |
| 186 | assert(!Test.getProgram().verify(&errs()) && |
| 187 | "input module is broken before making changes" ); |
| 188 | errs() << "*** " << Pass.Desc << " (" << Pass.Name << ")...\n" ; |
| 189 | |
| 190 | int Targets; |
| 191 | { |
| 192 | // Count the number of chunks by counting the number of calls to |
| 193 | // Oracle::shouldKeep() but always returning true so no changes are |
| 194 | // made. |
| 195 | std::vector<Chunk> AllChunks = {{.Begin: 0, INT_MAX}}; |
| 196 | Oracle Counter(AllChunks); |
| 197 | Pass.Func(Counter, Test.getProgram()); |
| 198 | Targets = Counter.count(); |
| 199 | |
| 200 | assert(!Test.getProgram().verify(&errs()) && |
| 201 | "input module is broken after counting chunks" ); |
| 202 | |
| 203 | if (!SkipVerifyAfterCountingChunks && !Test.getProgram().isReduced(Test)) { |
| 204 | WithColor::warning() |
| 205 | << "input module no longer interesting after counting chunks\n" ; |
| 206 | WithColor::note() << "the interestingness test may be flaky, or there " |
| 207 | "may be an llvm-reduce bug\n" ; |
| 208 | WithColor::note() |
| 209 | << "use -skip-verify-interesting-after-counting-chunks to " |
| 210 | "suppress this warning\n" ; |
| 211 | } |
| 212 | |
| 213 | #ifndef NDEBUG |
| 214 | { |
| 215 | // Make sure that the number of chunks does not change as we reduce. |
| 216 | std::vector<Chunk> NoChunks = {{0, INT_MAX}}; |
| 217 | Oracle NoChunksCounter(NoChunks); |
| 218 | std::unique_ptr<ReducerWorkItem> Clone = |
| 219 | Test.getProgram().clone(Test.getTargetMachine()); |
| 220 | Pass.Func(NoChunksCounter, *Clone); |
| 221 | assert(Targets == NoChunksCounter.count() && |
| 222 | "number of chunks changes when reducing" ); |
| 223 | } |
| 224 | #endif |
| 225 | } |
| 226 | if (!Targets) { |
| 227 | if (Verbose) |
| 228 | errs() << "\nNothing to reduce\n" ; |
| 229 | errs() << SeparatorLine; |
| 230 | return; |
| 231 | } |
| 232 | |
| 233 | std::vector<Chunk> ChunksStillConsideredInteresting = {{.Begin: 0, .End: Targets - 1}}; |
| 234 | std::unique_ptr<ReducerWorkItem> ReducedProgram; |
| 235 | |
| 236 | for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) { |
| 237 | increaseGranularity(Chunks&: ChunksStillConsideredInteresting); |
| 238 | } |
| 239 | |
| 240 | std::atomic<bool> AnyReduced; |
| 241 | std::unique_ptr<ThreadPoolInterface> ChunkThreadPoolPtr; |
| 242 | if (NumJobs > 1) |
| 243 | ChunkThreadPoolPtr = |
| 244 | std::make_unique<DefaultThreadPool>(args: hardware_concurrency(ThreadCount: NumJobs)); |
| 245 | |
| 246 | SmallString<0> OriginalBC; |
| 247 | DenseSet<Chunk> UninterestingChunks; |
| 248 | UninterestingChunks.reserve(Size: Targets); |
| 249 | |
| 250 | bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity; |
| 251 | do { |
| 252 | FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false; |
| 253 | |
| 254 | UninterestingChunks.clear(); |
| 255 | |
| 256 | // When running with more than one thread, serialize the original bitcode |
| 257 | // to OriginalBC. |
| 258 | if (NumJobs > 1) { |
| 259 | OriginalBC.clear(); |
| 260 | raw_svector_ostream BCOS(OriginalBC); |
| 261 | Test.getProgram().writeBitcode(OutStream&: BCOS); |
| 262 | } |
| 263 | |
| 264 | SharedTaskQueue TaskQueue; |
| 265 | for (auto I = ChunksStillConsideredInteresting.rbegin(), |
| 266 | E = ChunksStillConsideredInteresting.rend(); |
| 267 | I != E; ++I) { |
| 268 | std::unique_ptr<ReducerWorkItem> Result = nullptr; |
| 269 | unsigned WorkLeft = std::distance(first: I, last: E); |
| 270 | |
| 271 | // Run in parallel mode, if the user requested more than one thread and |
| 272 | // there are at least a few chunks to process. |
| 273 | if (NumJobs > 1 && WorkLeft > 1) { |
| 274 | unsigned NumInitialTasks = std::min(a: WorkLeft, b: unsigned(NumJobs)); |
| 275 | unsigned NumChunksProcessed = 0; |
| 276 | |
| 277 | ThreadPoolInterface &ChunkThreadPool = *ChunkThreadPoolPtr; |
| 278 | assert(TaskQueue.empty()); |
| 279 | |
| 280 | AnyReduced = false; |
| 281 | // Queue jobs to process NumInitialTasks chunks in parallel using |
| 282 | // ChunkThreadPool. When the tasks are added to the pool, parse the |
| 283 | // original module from OriginalBC with a fresh LLVMContext object. This |
| 284 | // ensures that the cloned module of each task uses an independent |
| 285 | // LLVMContext object. If a task reduces the input, serialize the result |
| 286 | // back in the corresponding Result element. |
| 287 | for (unsigned J = 0; J < NumInitialTasks; ++J) { |
| 288 | Chunk ChunkToCheck = *(I + J); |
| 289 | TaskQueue.emplace_back(args: ChunkThreadPool.async( |
| 290 | F&: ProcessChunkFromSerializedBitcode, ArgList&: ChunkToCheck, ArgList: std::ref(t&: Test), |
| 291 | ArgList: Pass.Func, ArgList&: UninterestingChunks, ArgList&: ChunksStillConsideredInteresting, |
| 292 | ArgList&: OriginalBC, ArgList: std::ref(t&: AnyReduced))); |
| 293 | } |
| 294 | |
| 295 | // Start processing results of the queued tasks. We wait for the first |
| 296 | // task in the queue to finish. If it reduced a chunk, we parse the |
| 297 | // result and exit the loop. |
| 298 | // Otherwise we will try to schedule a new task, if |
| 299 | // * no other pending job reduced a chunk and |
| 300 | // * we have not reached the end of the chunk. |
| 301 | while (!TaskQueue.empty()) { |
| 302 | auto &Future = TaskQueue.front(); |
| 303 | Future.wait(); |
| 304 | |
| 305 | NumChunksProcessed++; |
| 306 | SmallString<0> Res = Future.get(); |
| 307 | TaskQueue.pop_front(); |
| 308 | if (Res.empty()) { |
| 309 | unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size(); |
| 310 | if (!AnyReduced && I + NumScheduledTasks != E) { |
| 311 | Chunk ChunkToCheck = *(I + NumScheduledTasks); |
| 312 | TaskQueue.emplace_back(args: ChunkThreadPool.async( |
| 313 | F&: ProcessChunkFromSerializedBitcode, ArgList&: ChunkToCheck, |
| 314 | ArgList: std::ref(t&: Test), ArgList: Pass.Func, ArgList&: UninterestingChunks, |
| 315 | ArgList&: ChunksStillConsideredInteresting, ArgList&: OriginalBC, |
| 316 | ArgList: std::ref(t&: AnyReduced))); |
| 317 | } |
| 318 | continue; |
| 319 | } |
| 320 | |
| 321 | Result = std::make_unique<ReducerWorkItem>(); |
| 322 | MemoryBufferRef Data(StringRef(Res), "<bc file>" ); |
| 323 | Result->readBitcode(Data, Ctx&: Test.getProgram().M->getContext(), |
| 324 | ToolName: Test.getToolName()); |
| 325 | break; |
| 326 | } |
| 327 | |
| 328 | // If we broke out of the loop, we still need to wait for everything to |
| 329 | // avoid race access to the chunk set. |
| 330 | // |
| 331 | // TODO: Create a way to kill remaining items we're ignoring; they could |
| 332 | // take a long time. |
| 333 | ChunkThreadPoolPtr->wait(); |
| 334 | TaskQueue.clear(); |
| 335 | |
| 336 | // Forward I to the last chunk processed in parallel. |
| 337 | I += NumChunksProcessed - 1; |
| 338 | } else { |
| 339 | Result = CheckChunk( |
| 340 | ChunkToCheckForUninterestingness: *I, Clone: Test.getProgram().clone(TM: Test.getTargetMachine()), Test, |
| 341 | ExtractChunksFromModule: Pass.Func, UninterestingChunks, ChunksStillConsideredInteresting); |
| 342 | } |
| 343 | |
| 344 | if (!Result) |
| 345 | continue; |
| 346 | |
| 347 | const Chunk ChunkToCheckForUninterestingness = *I; |
| 348 | FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true; |
| 349 | UninterestingChunks.insert(V: ChunkToCheckForUninterestingness); |
| 350 | ReducedProgram = std::move(Result); |
| 351 | } |
| 352 | // Delete uninteresting chunks |
| 353 | erase_if(C&: ChunksStillConsideredInteresting, |
| 354 | P: [&UninterestingChunks](const Chunk &C) { |
| 355 | return UninterestingChunks.count(V: C); |
| 356 | }); |
| 357 | } while (!ChunksStillConsideredInteresting.empty() && |
| 358 | (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity || |
| 359 | increaseGranularity(Chunks&: ChunksStillConsideredInteresting))); |
| 360 | |
| 361 | // If we reduced the testcase replace it |
| 362 | if (ReducedProgram) { |
| 363 | Test.setProgram(std::move(ReducedProgram)); |
| 364 | // FIXME: Report meaningful progress info |
| 365 | Test.writeOutput(Message: " **** SUCCESS | Saved new best reduction to " ); |
| 366 | } |
| 367 | if (Verbose) |
| 368 | errs() << "Couldn't increase anymore.\n" ; |
| 369 | errs() << SeparatorLine; |
| 370 | } |
| 371 | |