1//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This program finds similar sections of a Module, and exports them as a JSON
10// file.
11//
12// To find similarities contained across multiple modules, please use llvm-link
13// first to merge the modules.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Analysis/IRSimilarityIdentifier.h"
18#include "llvm/IRReader/IRReader.h"
19#include "llvm/Support/CommandLine.h"
20#include "llvm/Support/FileSystem.h"
21#include "llvm/Support/InitLLVM.h"
22#include "llvm/Support/JSON.h"
23#include "llvm/Support/SourceMgr.h"
24#include "llvm/Support/ToolOutputFile.h"
25
26using namespace llvm;
27using namespace IRSimilarity;
28
29static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
30 cl::init(Val: "-"),
31 cl::value_desc("filename"));
32
33static cl::opt<std::string> InputSourceFile(cl::Positional,
34 cl::desc("<Source file>"),
35 cl::init(Val: "-"),
36 cl::value_desc("filename"));
37
38/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
39///
40/// \param I - The Instruction to find the instruction number for.
41/// \param LLVMInstNum - The mapping of Instructions to their location in the
42/// module represented by an unsigned integer.
43/// \returns The instruction number for \p I if it exists.
44std::optional<unsigned>
45getPositionInModule(const Instruction *I,
46 const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
47 assert(I && "Instruction is nullptr!");
48 DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(Val: I);
49 if (It == LLVMInstNum.end())
50 return std::nullopt;
51 return It->second;
52}
53
54/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
55///
56/// \param FilePath - The path to the output location.
57/// \param SimSections - The similarity groups to process.
58/// \param LLVMInstNum - The mapping of Instructions to their location in the
59/// module represented by an unsigned integer.
60/// \returns A nonzero error code if there was a failure creating the file.
61std::error_code
62exportToFile(const StringRef FilePath,
63 const SimilarityGroupList &SimSections,
64 const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
65 std::error_code EC;
66 std::unique_ptr<ToolOutputFile> Out(
67 new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
68 if (EC)
69 return EC;
70
71 json::OStream J(Out->os(), 1);
72 J.objectBegin();
73
74 unsigned SimOption = 1;
75 // Process each list of SimilarityGroups organized by the Module.
76 for (const SimilarityGroup &G : SimSections) {
77 std::string SimOptionStr = std::to_string(val: SimOption);
78 J.attributeBegin(Key: SimOptionStr);
79 J.arrayBegin();
80 // For each file there is a list of the range where the similarity
81 // exists.
82 for (const IRSimilarityCandidate &C : G) {
83 std::optional<unsigned> Start =
84 getPositionInModule(I: (*C.front()).Inst, LLVMInstNum);
85 std::optional<unsigned> End =
86 getPositionInModule(I: (*C.back()).Inst, LLVMInstNum);
87
88 assert(Start &&
89 "Could not find instruction number for first instruction");
90 assert(End && "Could not find instruction number for last instruction");
91
92 J.object(Contents: [&] {
93 J.attribute(Key: "start", Contents: *Start);
94 J.attribute(Key: "end", Contents: *End);
95 });
96 }
97 J.arrayEnd();
98 J.attributeEnd();
99 SimOption++;
100 }
101 J.objectEnd();
102
103 Out->keep();
104
105 return EC;
106}
107
108int main(int argc, const char *argv[]) {
109 InitLLVM X(argc, argv);
110
111 cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM IR Similarity Visualizer\n");
112
113 LLVMContext CurrContext;
114 SMDiagnostic Err;
115 std::unique_ptr<Module> ModuleToAnalyze =
116 parseIRFile(Filename: InputSourceFile, Err, Context&: CurrContext);
117
118 if (!ModuleToAnalyze) {
119 Err.print(ProgName: argv[0], S&: errs());
120 return 1;
121 }
122
123 // Mapping from an Instruction pointer to its occurrence in a sequential
124 // list of all the Instructions in a Module.
125 DenseMap<Instruction *, unsigned> LLVMInstNum;
126
127 // We give each instruction a number, which gives us a start and end value
128 // for the beginning and end of each IRSimilarityCandidate.
129 unsigned InstructionNumber = 1;
130 for (Function &F : *ModuleToAnalyze)
131 for (BasicBlock &BB : F)
132 for (Instruction &I : BB.instructionsWithoutDebug())
133 LLVMInstNum[&I]= InstructionNumber++;
134
135 // The similarity identifier we will use to find the similar sections.
136 IRSimilarityIdentifier SimIdent;
137 SimilarityGroupList SimilaritySections =
138 SimIdent.findSimilarity(M&: *ModuleToAnalyze);
139
140 std::error_code E =
141 exportToFile(FilePath: OutputFilename, SimSections: SimilaritySections, LLVMInstNum);
142 if (E) {
143 errs() << argv[0] << ": " << E.message() << '\n';
144 return 2;
145 }
146
147 return 0;
148}
149