1 | //===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This program finds similar sections of a Module, and exports them as a JSON |
10 | // file. |
11 | // |
12 | // To find similarities contained across multiple modules, please use llvm-link |
13 | // first to merge the modules. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/Analysis/IRSimilarityIdentifier.h" |
18 | #include "llvm/IRReader/IRReader.h" |
19 | #include "llvm/Support/CommandLine.h" |
20 | #include "llvm/Support/FileSystem.h" |
21 | #include "llvm/Support/InitLLVM.h" |
22 | #include "llvm/Support/JSON.h" |
23 | #include "llvm/Support/SourceMgr.h" |
24 | #include "llvm/Support/ToolOutputFile.h" |
25 | |
26 | using namespace llvm; |
27 | using namespace IRSimilarity; |
28 | |
29 | static cl::opt<std::string> OutputFilename("o" , cl::desc("Output Filename" ), |
30 | cl::init(Val: "-" ), |
31 | cl::value_desc("filename" )); |
32 | |
33 | static cl::opt<std::string> InputSourceFile(cl::Positional, |
34 | cl::desc("<Source file>" ), |
35 | cl::init(Val: "-" ), |
36 | cl::value_desc("filename" )); |
37 | |
38 | /// Retrieve the unique number \p I was mapped to in parseBitcodeFile. |
39 | /// |
40 | /// \param I - The Instruction to find the instruction number for. |
41 | /// \param LLVMInstNum - The mapping of Instructions to their location in the |
42 | /// module represented by an unsigned integer. |
43 | /// \returns The instruction number for \p I if it exists. |
44 | std::optional<unsigned> |
45 | getPositionInModule(const Instruction *I, |
46 | const DenseMap<Instruction *, unsigned> &LLVMInstNum) { |
47 | assert(I && "Instruction is nullptr!" ); |
48 | DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(Val: I); |
49 | if (It == LLVMInstNum.end()) |
50 | return std::nullopt; |
51 | return It->second; |
52 | } |
53 | |
54 | /// Exports the given SimilarityGroups to a JSON file at \p FilePath. |
55 | /// |
56 | /// \param FilePath - The path to the output location. |
57 | /// \param SimSections - The similarity groups to process. |
58 | /// \param LLVMInstNum - The mapping of Instructions to their location in the |
59 | /// module represented by an unsigned integer. |
60 | /// \returns A nonzero error code if there was a failure creating the file. |
61 | std::error_code |
62 | exportToFile(const StringRef FilePath, |
63 | const SimilarityGroupList &SimSections, |
64 | const DenseMap<Instruction *, unsigned> &LLVMInstNum) { |
65 | std::error_code EC; |
66 | std::unique_ptr<ToolOutputFile> Out( |
67 | new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); |
68 | if (EC) |
69 | return EC; |
70 | |
71 | json::OStream J(Out->os(), 1); |
72 | J.objectBegin(); |
73 | |
74 | unsigned SimOption = 1; |
75 | // Process each list of SimilarityGroups organized by the Module. |
76 | for (const SimilarityGroup &G : SimSections) { |
77 | std::string SimOptionStr = std::to_string(val: SimOption); |
78 | J.attributeBegin(Key: SimOptionStr); |
79 | J.arrayBegin(); |
80 | // For each file there is a list of the range where the similarity |
81 | // exists. |
82 | for (const IRSimilarityCandidate &C : G) { |
83 | std::optional<unsigned> Start = |
84 | getPositionInModule(I: (*C.front()).Inst, LLVMInstNum); |
85 | std::optional<unsigned> End = |
86 | getPositionInModule(I: (*C.back()).Inst, LLVMInstNum); |
87 | |
88 | assert(Start && |
89 | "Could not find instruction number for first instruction" ); |
90 | assert(End && "Could not find instruction number for last instruction" ); |
91 | |
92 | J.object(Contents: [&] { |
93 | J.attribute(Key: "start" , Contents: *Start); |
94 | J.attribute(Key: "end" , Contents: *End); |
95 | }); |
96 | } |
97 | J.arrayEnd(); |
98 | J.attributeEnd(); |
99 | SimOption++; |
100 | } |
101 | J.objectEnd(); |
102 | |
103 | Out->keep(); |
104 | |
105 | return EC; |
106 | } |
107 | |
108 | int main(int argc, const char *argv[]) { |
109 | InitLLVM X(argc, argv); |
110 | |
111 | cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM IR Similarity Visualizer\n" ); |
112 | |
113 | LLVMContext CurrContext; |
114 | SMDiagnostic Err; |
115 | std::unique_ptr<Module> ModuleToAnalyze = |
116 | parseIRFile(Filename: InputSourceFile, Err, Context&: CurrContext); |
117 | |
118 | if (!ModuleToAnalyze) { |
119 | Err.print(ProgName: argv[0], S&: errs()); |
120 | return 1; |
121 | } |
122 | |
123 | // Mapping from an Instruction pointer to its occurrence in a sequential |
124 | // list of all the Instructions in a Module. |
125 | DenseMap<Instruction *, unsigned> LLVMInstNum; |
126 | |
127 | // We give each instruction a number, which gives us a start and end value |
128 | // for the beginning and end of each IRSimilarityCandidate. |
129 | unsigned InstructionNumber = 1; |
130 | for (Function &F : *ModuleToAnalyze) |
131 | for (BasicBlock &BB : F) |
132 | for (Instruction &I : BB.instructionsWithoutDebug()) |
133 | LLVMInstNum[&I]= InstructionNumber++; |
134 | |
135 | // The similarity identifier we will use to find the similar sections. |
136 | IRSimilarityIdentifier SimIdent; |
137 | SimilarityGroupList SimilaritySections = |
138 | SimIdent.findSimilarity(M&: *ModuleToAnalyze); |
139 | |
140 | std::error_code E = |
141 | exportToFile(FilePath: OutputFilename, SimSections: SimilaritySections, LLVMInstNum); |
142 | if (E) { |
143 | errs() << argv[0] << ": " << E.message() << '\n'; |
144 | return 2; |
145 | } |
146 | |
147 | return 0; |
148 | } |
149 | |