1//===------------- llubi.cpp - LLVM UB-aware Interpreter --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This utility provides an UB-aware interpreter for programs in LLVM bitcode.
10// It is not built on top of the existing ExecutionEngine interface, but instead
11// implements its own value representation, state tracking and interpreter loop.
12//
13//===----------------------------------------------------------------------===//
14
15#include "lib/Context.h"
16#include "llvm/Config/llvm-config.h"
17#include "llvm/IR/LLVMContext.h"
18#include "llvm/IR/Module.h"
19#include "llvm/IR/Type.h"
20#include "llvm/IRReader/IRReader.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Format.h"
23#include "llvm/Support/InitLLVM.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/SourceMgr.h"
26#include "llvm/Support/WithColor.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31static cl::opt<std::string> InputFile(cl::desc("<input bitcode>"),
32 cl::Positional, cl::init(Val: "-"));
33
34static cl::list<std::string> InputArgv(cl::ConsumeAfter,
35 cl::desc("<program arguments>..."));
36
37static cl::opt<std::string>
38 EntryFunc("entry-function",
39 cl::desc("Specify the entry function (default = 'main') "
40 "of the executable"),
41 cl::value_desc("function"), cl::init(Val: "main"));
42
43static cl::opt<std::string>
44 FakeArgv0("fake-argv0",
45 cl::desc("Override the 'argv[0]' value passed into the executing"
46 " program"),
47 cl::value_desc("executable"));
48
49static cl::opt<bool>
50 Verbose("verbose", cl::desc("Print results for each instruction executed."),
51 cl::init(Val: false));
52
53cl::OptionCategory InterpreterCategory("Interpreter Options");
54
55static cl::opt<unsigned> MaxMem(
56 "max-mem",
57 cl::desc("Max amount of memory (in bytes) that can be allocated by the"
58 " program, including stack, heap, and global variables."
59 " Set to 0 to disable the limit."),
60 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
61
62static cl::opt<unsigned>
63 MaxSteps("max-steps",
64 cl::desc("Max number of instructions executed."
65 " Set to 0 to disable the limit."),
66 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
67
68static cl::opt<unsigned> MaxStackDepth(
69 "max-stack-depth",
70 cl::desc("Max stack depth (default = 256). Set to 0 to disable the limit."),
71 cl::value_desc("N"), cl::init(Val: 256), cl::cat(InterpreterCategory));
72
73static cl::opt<unsigned>
74 VScale("vscale", cl::desc("The value of llvm.vscale (default = 4)"),
75 cl::value_desc("N"), cl::init(Val: 4), cl::cat(InterpreterCategory));
76
77class VerboseEventHandler : public ubi::EventHandler {
78public:
79 bool onInstructionExecuted(Instruction &I,
80 const ubi::AnyValue &Result) override {
81 if (Result.isNone()) {
82 errs() << I << '\n';
83 } else {
84 errs() << I << " => " << Result << '\n';
85 }
86
87 return true;
88 }
89
90 void onImmediateUB(StringRef Msg) override {
91 errs() << "Immediate UB detected: " << Msg << '\n';
92 }
93
94 void onError(StringRef Msg) override { errs() << "Error: " << Msg << '\n'; }
95
96 bool onBBJump(Instruction &I, BasicBlock &To) override {
97 errs() << I << " jump to ";
98 To.printAsOperand(O&: errs(), /*PrintType=*/false);
99 errs() << '\n';
100 return true;
101 }
102
103 bool onFunctionEntry(Function &F, ArrayRef<ubi::AnyValue> Args,
104 CallBase *CallSite) override {
105 errs() << "Entering function: " << F.getName() << '\n';
106 size_t ArgSize = F.arg_size();
107 for (auto &&[Idx, Arg] : enumerate(First&: Args)) {
108 if (Idx >= ArgSize)
109 errs() << " vaarg[" << (Idx - ArgSize) << "] = " << Arg << '\n';
110 else
111 errs() << " " << *F.getArg(i: Idx) << " = " << Arg << '\n';
112 }
113 return true;
114 }
115
116 bool onFunctionExit(Function &F, const ubi::AnyValue &RetVal) override {
117 errs() << "Exiting function: " << F.getName() << '\n';
118 return true;
119 }
120
121 void onUnrecognizedInstruction(Instruction &I) override {
122 errs() << "Unrecognized instruction: " << I << '\n';
123 }
124};
125
126int main(int argc, char **argv) {
127 InitLLVM X(argc, argv);
128
129 cl::ParseCommandLineOptions(argc, argv, Overview: "llvm ub-aware interpreter\n");
130
131 if (EntryFunc.empty()) {
132 WithColor::error() << "--entry-function name cannot be empty\n";
133 return 1;
134 }
135
136 LLVMContext Context;
137
138 // Load the bitcode...
139 SMDiagnostic Err;
140 std::unique_ptr<Module> Owner = parseIRFile(Filename: InputFile, Err, Context);
141 Module *Mod = Owner.get();
142 if (!Mod) {
143 Err.print(ProgName: argv[0], S&: errs());
144 return 1;
145 }
146
147 // If the user specifically requested an argv[0] to pass into the program,
148 // do it now.
149 if (!FakeArgv0.empty()) {
150 InputFile = static_cast<std::string>(FakeArgv0);
151 } else {
152 // Otherwise, if there is a .bc suffix on the executable strip it off, it
153 // might confuse the program.
154 if (StringRef(InputFile).ends_with(Suffix: ".bc"))
155 InputFile.erase(pos: InputFile.length() - 3);
156 }
157
158 // Add the module's name to the start of the vector of arguments to main().
159 InputArgv.insert(pos: InputArgv.begin(), value: InputFile);
160
161 // Initialize the execution context and set parameters.
162 ubi::Context Ctx(*Mod);
163 Ctx.setMemoryLimit(MaxMem);
164 Ctx.setVScale(VScale);
165 Ctx.setMaxSteps(MaxSteps);
166 Ctx.setMaxStackDepth(MaxStackDepth);
167
168 if (!Ctx.initGlobalValues()) {
169 WithColor::error() << "Failed to initialize global values (e.g., the "
170 "memory limit may be too low).\n";
171 return 1;
172 }
173
174 // Call the main function from M as if its signature were:
175 // int main (int argc, char **argv)
176 // using the contents of Args to determine argc & argv
177 Function *EntryFn = Mod->getFunction(Name: EntryFunc);
178 if (!EntryFn) {
179 WithColor::error() << '\'' << EntryFunc
180 << "\' function not found in module.\n";
181 return 1;
182 }
183 TargetLibraryInfo TLI(Ctx.getTLIImpl());
184 Type *IntTy = IntegerType::get(C&: Ctx.getContext(), NumBits: TLI.getIntSize());
185 auto *MainFuncTy = FunctionType::get(
186 Result: IntTy, Params: {IntTy, PointerType::getUnqual(C&: Ctx.getContext())}, isVarArg: false);
187 SmallVector<ubi::AnyValue> Args;
188 if (EntryFn->getFunctionType() == MainFuncTy) {
189 Args.push_back(
190 Elt: Ctx.getConstantValue(C: ConstantInt::get(Ty: IntTy, V: InputArgv.size())));
191
192 uint32_t PtrSize = Ctx.getDataLayout().getPointerSize();
193 uint64_t PtrsSize = PtrSize * (InputArgv.size() + 1);
194 auto ArgvPtrsMem = Ctx.allocate(Size: PtrsSize, Align: 8, Name: "argv",
195 /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed);
196 if (!ArgvPtrsMem) {
197 WithColor::error() << "Failed to allocate memory for argv pointers.\n";
198 return 1;
199 }
200 for (const auto &[Idx, Arg] : enumerate(First&: InputArgv)) {
201 uint64_t Size = Arg.length() + 1;
202 auto ArgvStrMem = Ctx.allocate(Size, Align: 8, Name: "argv_str",
203 /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed);
204 if (!ArgvStrMem) {
205 WithColor::error() << "Failed to allocate memory for argv strings.\n";
206 return 1;
207 }
208 ubi::Pointer ArgPtr = Ctx.deriveFromMemoryObject(Obj: ArgvStrMem);
209 ArgvStrMem->writeRawBytes(Offset: 0, Data: Arg.c_str(), Length: Arg.length());
210 ArgvPtrsMem->writePointer(Offset: Idx * PtrSize, Ptr: ArgPtr, DL: Ctx.getDataLayout());
211 }
212 Args.push_back(Elt: Ctx.deriveFromMemoryObject(Obj: ArgvPtrsMem));
213 } else if (!EntryFn->arg_empty()) {
214 // If the signature does not match (e.g., llvm-reduce change the signature
215 // of main), it will pass null values for all arguments.
216 WithColor::warning()
217 << "The signature of function '" << EntryFunc
218 << "' does not match 'int main(int, char**)', passing null values for "
219 "all arguments.\n";
220 Args.reserve(N: EntryFn->arg_size());
221 for (Argument &Arg : EntryFn->args())
222 Args.push_back(Elt: ubi::AnyValue::getNullValue(Ctx, Ty: Arg.getType()));
223 }
224
225 ubi::EventHandler NoopHandler;
226 VerboseEventHandler VerboseHandler;
227 ubi::AnyValue RetVal;
228 if (!Ctx.runFunction(F&: *EntryFn, Args, RetVal,
229 Handler&: Verbose ? VerboseHandler : NoopHandler)) {
230 WithColor::error() << "Execution of function '" << EntryFunc
231 << "' failed.\n";
232 return 1;
233 }
234
235 // If the function returns an integer, return that as the exit code.
236 if (EntryFn->getReturnType()->isIntegerTy()) {
237 assert(!RetVal.isNone() && "Expected a return value from entry function");
238 if (RetVal.isPoison()) {
239 WithColor::error() << "Execution of function '" << EntryFunc
240 << "' resulted in poison return value.\n";
241 return 1;
242 }
243 APInt Result = RetVal.asInteger();
244 return (int)Result.extractBitsAsZExtValue(
245 numBits: std::min(a: Result.getBitWidth(), b: 8U), bitPosition: 0);
246 }
247 return 0;
248}
249