| 1 | //===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This program finds similar sections of a Module, and exports them as a JSON |
| 10 | // file. |
| 11 | // |
| 12 | // To find similarities contained across multiple modules, please use llvm-link |
| 13 | // first to merge the modules. |
| 14 | // |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "llvm/Analysis/IRSimilarityIdentifier.h" |
| 18 | #include "llvm/IRReader/IRReader.h" |
| 19 | #include "llvm/Support/CommandLine.h" |
| 20 | #include "llvm/Support/FileSystem.h" |
| 21 | #include "llvm/Support/InitLLVM.h" |
| 22 | #include "llvm/Support/JSON.h" |
| 23 | #include "llvm/Support/SourceMgr.h" |
| 24 | #include "llvm/Support/ToolOutputFile.h" |
| 25 | |
| 26 | using namespace llvm; |
| 27 | using namespace IRSimilarity; |
| 28 | |
| 29 | static cl::opt<std::string> OutputFilename("o" , cl::desc("Output Filename" ), |
| 30 | cl::init(Val: "-" ), |
| 31 | cl::value_desc("filename" )); |
| 32 | |
| 33 | static cl::opt<std::string> InputSourceFile(cl::Positional, |
| 34 | cl::desc("<Source file>" ), |
| 35 | cl::init(Val: "-" ), |
| 36 | cl::value_desc("filename" )); |
| 37 | |
| 38 | /// Retrieve the unique number \p I was mapped to in parseBitcodeFile. |
| 39 | /// |
| 40 | /// \param I - The Instruction to find the instruction number for. |
| 41 | /// \param LLVMInstNum - The mapping of Instructions to their location in the |
| 42 | /// module represented by an unsigned integer. |
| 43 | /// \returns The instruction number for \p I if it exists. |
| 44 | std::optional<unsigned> |
| 45 | getPositionInModule(const Instruction *I, |
| 46 | const DenseMap<Instruction *, unsigned> &LLVMInstNum) { |
| 47 | assert(I && "Instruction is nullptr!" ); |
| 48 | DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(Val: I); |
| 49 | if (It == LLVMInstNum.end()) |
| 50 | return std::nullopt; |
| 51 | return It->second; |
| 52 | } |
| 53 | |
| 54 | /// Exports the given SimilarityGroups to a JSON file at \p FilePath. |
| 55 | /// |
| 56 | /// \param FilePath - The path to the output location. |
| 57 | /// \param SimSections - The similarity groups to process. |
| 58 | /// \param LLVMInstNum - The mapping of Instructions to their location in the |
| 59 | /// module represented by an unsigned integer. |
| 60 | /// \returns A nonzero error code if there was a failure creating the file. |
| 61 | std::error_code |
| 62 | exportToFile(const StringRef FilePath, |
| 63 | const SimilarityGroupList &SimSections, |
| 64 | const DenseMap<Instruction *, unsigned> &LLVMInstNum) { |
| 65 | std::error_code EC; |
| 66 | std::unique_ptr<ToolOutputFile> Out( |
| 67 | new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); |
| 68 | if (EC) |
| 69 | return EC; |
| 70 | |
| 71 | json::OStream J(Out->os(), 1); |
| 72 | J.objectBegin(); |
| 73 | |
| 74 | unsigned SimOption = 1; |
| 75 | // Process each list of SimilarityGroups organized by the Module. |
| 76 | for (const SimilarityGroup &G : SimSections) { |
| 77 | std::string SimOptionStr = std::to_string(val: SimOption); |
| 78 | J.attributeBegin(Key: SimOptionStr); |
| 79 | J.arrayBegin(); |
| 80 | // For each file there is a list of the range where the similarity |
| 81 | // exists. |
| 82 | for (const IRSimilarityCandidate &C : G) { |
| 83 | std::optional<unsigned> Start = |
| 84 | getPositionInModule(I: (*C.front()).Inst, LLVMInstNum); |
| 85 | std::optional<unsigned> End = |
| 86 | getPositionInModule(I: (*C.back()).Inst, LLVMInstNum); |
| 87 | |
| 88 | assert(Start && |
| 89 | "Could not find instruction number for first instruction" ); |
| 90 | assert(End && "Could not find instruction number for last instruction" ); |
| 91 | |
| 92 | J.object(Contents: [&] { |
| 93 | J.attribute(Key: "start" , Contents: *Start); |
| 94 | J.attribute(Key: "end" , Contents: *End); |
| 95 | }); |
| 96 | } |
| 97 | J.arrayEnd(); |
| 98 | J.attributeEnd(); |
| 99 | SimOption++; |
| 100 | } |
| 101 | J.objectEnd(); |
| 102 | |
| 103 | Out->keep(); |
| 104 | |
| 105 | return EC; |
| 106 | } |
| 107 | |
| 108 | int main(int argc, const char *argv[]) { |
| 109 | InitLLVM X(argc, argv); |
| 110 | |
| 111 | cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM IR Similarity Visualizer\n" ); |
| 112 | |
| 113 | LLVMContext CurrContext; |
| 114 | SMDiagnostic Err; |
| 115 | std::unique_ptr<Module> ModuleToAnalyze = |
| 116 | parseIRFile(Filename: InputSourceFile, Err, Context&: CurrContext); |
| 117 | |
| 118 | if (!ModuleToAnalyze) { |
| 119 | Err.print(ProgName: argv[0], S&: errs()); |
| 120 | return 1; |
| 121 | } |
| 122 | |
| 123 | // Mapping from an Instruction pointer to its occurrence in a sequential |
| 124 | // list of all the Instructions in a Module. |
| 125 | DenseMap<Instruction *, unsigned> LLVMInstNum; |
| 126 | |
| 127 | // We give each instruction a number, which gives us a start and end value |
| 128 | // for the beginning and end of each IRSimilarityCandidate. |
| 129 | unsigned InstructionNumber = 1; |
| 130 | for (Function &F : *ModuleToAnalyze) |
| 131 | for (BasicBlock &BB : F) |
| 132 | for (Instruction &I : BB.instructionsWithoutDebug()) |
| 133 | LLVMInstNum[&I]= InstructionNumber++; |
| 134 | |
| 135 | // The similarity identifier we will use to find the similar sections. |
| 136 | IRSimilarityIdentifier SimIdent; |
| 137 | SimilarityGroupList SimilaritySections = |
| 138 | SimIdent.findSimilarity(M&: *ModuleToAnalyze); |
| 139 | |
| 140 | std::error_code E = |
| 141 | exportToFile(FilePath: OutputFilename, SimSections: SimilaritySections, LLVMInstNum); |
| 142 | if (E) { |
| 143 | errs() << argv[0] << ": " << E.message() << '\n'; |
| 144 | return 2; |
| 145 | } |
| 146 | |
| 147 | return 0; |
| 148 | } |
| 149 | |