1//===------------- llubi.cpp - LLVM UB-aware Interpreter --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This utility provides an UB-aware interpreter for programs in LLVM bitcode.
10// It is not built on top of the existing ExecutionEngine interface, but instead
11// implements its own value representation, state tracking and interpreter loop.
12//
13//===----------------------------------------------------------------------===//
14
15#include "lib/Context.h"
16#include "llvm/Config/llvm-config.h"
17#include "llvm/IR/LLVMContext.h"
18#include "llvm/IR/Module.h"
19#include "llvm/IR/Type.h"
20#include "llvm/IRReader/IRReader.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Format.h"
23#include "llvm/Support/InitLLVM.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/SourceMgr.h"
26#include "llvm/Support/WithColor.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31static cl::opt<std::string> InputFile(cl::desc("<input bitcode>"),
32 cl::Positional, cl::init(Val: "-"));
33
34static cl::list<std::string> InputArgv(cl::ConsumeAfter,
35 cl::desc("<program arguments>..."));
36
37static cl::opt<std::string>
38 EntryFunc("entry-function",
39 cl::desc("Specify the entry function (default = 'main') "
40 "of the executable"),
41 cl::value_desc("function"), cl::init(Val: "main"));
42
43static cl::opt<std::string>
44 FakeArgv0("fake-argv0",
45 cl::desc("Override the 'argv[0]' value passed into the executing"
46 " program"),
47 cl::value_desc("executable"));
48
49static cl::opt<bool>
50 Verbose("verbose", cl::desc("Print results for each instruction executed."),
51 cl::init(Val: false));
52
53cl::OptionCategory InterpreterCategory("Interpreter Options");
54
55static cl::opt<unsigned> MaxMem(
56 "max-mem",
57 cl::desc("Max amount of memory (in bytes) that can be allocated by the"
58 " program, including stack, heap, and global variables."
59 " Set to 0 to disable the limit."),
60 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
61
62static cl::opt<unsigned>
63 MaxSteps("max-steps",
64 cl::desc("Max number of instructions executed."
65 " Set to 0 to disable the limit."),
66 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
67
68static cl::opt<unsigned> MaxStackDepth(
69 "max-stack-depth",
70 cl::desc("Max stack depth (default = 256). Set to 0 to disable the limit."),
71 cl::value_desc("N"), cl::init(Val: 256), cl::cat(InterpreterCategory));
72
73static cl::opt<unsigned>
74 VScale("vscale", cl::desc("The value of llvm.vscale (default = 4)"),
75 cl::value_desc("N"), cl::init(Val: 4), cl::cat(InterpreterCategory));
76
77static cl::opt<unsigned>
78 Seed("seed",
79 cl::desc("Random seed for non-deterministic behavior (default = 0)"),
80 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
81
82cl::opt<ubi::UndefValueBehavior> UndefBehavior(
83 "", cl::desc("Choose undef value behavior:"),
84 cl::values(clEnumVal(ubi::UndefValueBehavior::NonDeterministic,
85 "Each load of an uninitialized byte yields a freshly "
86 "random value."),
87 clEnumVal(ubi::UndefValueBehavior::Zero,
88 "All uses of an uninitialized byte yield zero.")));
89
90class VerboseEventHandler : public ubi::EventHandler {
91public:
92 bool onInstructionExecuted(Instruction &I,
93 const ubi::AnyValue &Result) override {
94 if (Result.isNone()) {
95 errs() << I << '\n';
96 } else {
97 errs() << I << " => " << Result << '\n';
98 }
99
100 return true;
101 }
102
103 void onImmediateUB(StringRef Msg) override {
104 errs() << "Immediate UB detected: " << Msg << '\n';
105 }
106
107 void onError(StringRef Msg) override { errs() << "Error: " << Msg << '\n'; }
108
109 bool onBBJump(Instruction &I, BasicBlock &To) override {
110 errs() << I << " jump to ";
111 To.printAsOperand(O&: errs(), /*PrintType=*/false);
112 errs() << '\n';
113 return true;
114 }
115
116 bool onFunctionEntry(Function &F, ArrayRef<ubi::AnyValue> Args,
117 CallBase *CallSite) override {
118 errs() << "Entering function: " << F.getName() << '\n';
119 size_t ArgSize = F.arg_size();
120 for (auto &&[Idx, Arg] : enumerate(First&: Args)) {
121 if (Idx >= ArgSize)
122 errs() << " vaarg[" << (Idx - ArgSize) << "] = " << Arg << '\n';
123 else
124 errs() << " " << *F.getArg(i: Idx) << " = " << Arg << '\n';
125 }
126 return true;
127 }
128
129 bool onFunctionExit(Function &F, const ubi::AnyValue &RetVal) override {
130 errs() << "Exiting function: " << F.getName() << '\n';
131 return true;
132 }
133
134 void onUnrecognizedInstruction(Instruction &I) override {
135 errs() << "Unrecognized instruction: " << I << '\n';
136 }
137};
138
139int main(int argc, char **argv) {
140 InitLLVM X(argc, argv);
141
142 cl::ParseCommandLineOptions(argc, argv, Overview: "llvm ub-aware interpreter\n");
143
144 if (EntryFunc.empty()) {
145 WithColor::error() << "--entry-function name cannot be empty\n";
146 return 1;
147 }
148
149 LLVMContext Context;
150
151 // Load the bitcode...
152 SMDiagnostic Err;
153 std::unique_ptr<Module> Owner = parseIRFile(Filename: InputFile, Err, Context);
154 Module *Mod = Owner.get();
155 if (!Mod) {
156 Err.print(ProgName: argv[0], S&: errs());
157 return 1;
158 }
159
160 // If the user specifically requested an argv[0] to pass into the program,
161 // do it now.
162 if (!FakeArgv0.empty()) {
163 InputFile = static_cast<std::string>(FakeArgv0);
164 } else {
165 // Otherwise, if there is a .bc suffix on the executable strip it off, it
166 // might confuse the program.
167 if (StringRef(InputFile).ends_with(Suffix: ".bc"))
168 InputFile.erase(pos: InputFile.length() - 3);
169 }
170
171 // Add the module's name to the start of the vector of arguments to main().
172 InputArgv.insert(pos: InputArgv.begin(), value: InputFile);
173
174 // Initialize the execution context and set parameters.
175 ubi::Context Ctx(*Mod);
176 Ctx.setMemoryLimit(MaxMem);
177 Ctx.setVScale(VScale);
178 Ctx.setMaxSteps(MaxSteps);
179 Ctx.setMaxStackDepth(MaxStackDepth);
180 Ctx.setUndefValueBehavior(UndefBehavior);
181 Ctx.reseed(Seed);
182
183 if (!Ctx.initGlobalValues()) {
184 WithColor::error() << "Failed to initialize global values (e.g., the "
185 "memory limit may be too low).\n";
186 return 1;
187 }
188
189 // Call the main function from M as if its signature were:
190 // int main (int argc, char **argv)
191 // using the contents of Args to determine argc & argv
192 Function *EntryFn = Mod->getFunction(Name: EntryFunc);
193 if (!EntryFn) {
194 WithColor::error() << '\'' << EntryFunc
195 << "\' function not found in module.\n";
196 return 1;
197 }
198 TargetLibraryInfo TLI(Ctx.getTLIImpl());
199 Type *IntTy = IntegerType::get(C&: Ctx.getContext(), NumBits: TLI.getIntSize());
200 Type *PtrTy = PointerType::getUnqual(C&: Ctx.getContext());
201 auto *MainFuncTy = FunctionType::get(Result: IntTy, Params: {IntTy, PtrTy}, isVarArg: false);
202 SmallVector<ubi::AnyValue> Args;
203 if (EntryFn->getFunctionType() == MainFuncTy) {
204 Args.push_back(
205 Elt: Ctx.getConstantValue(C: ConstantInt::get(Ty: IntTy, V: InputArgv.size())));
206
207 uint32_t PtrSize = Ctx.getDataLayout().getPointerSize();
208 uint64_t PtrsSize = PtrSize * (InputArgv.size() + 1);
209 auto ArgvPtrsMem = Ctx.allocate(Size: PtrsSize, Align: 8, Name: "argv",
210 /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed);
211 if (!ArgvPtrsMem) {
212 WithColor::error() << "Failed to allocate memory for argv pointers.\n";
213 return 1;
214 }
215 for (const auto &[Idx, Arg] : enumerate(First&: InputArgv)) {
216 uint64_t Size = Arg.length() + 1;
217 auto ArgvStrMem = Ctx.allocate(Size, Align: 8, Name: "argv_str",
218 /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed);
219 if (!ArgvStrMem) {
220 WithColor::error() << "Failed to allocate memory for argv strings.\n";
221 return 1;
222 }
223 ubi::Pointer ArgPtr = Ctx.deriveFromMemoryObject(Obj: ArgvStrMem);
224 Ctx.storeRawBytes(MO&: *ArgvStrMem, Offset: 0, Data: Arg.c_str(), Size: Arg.length());
225 Ctx.store(MO&: *ArgvPtrsMem, Offset: Idx * PtrSize, Val: ArgPtr, ValTy: PtrTy);
226 }
227 Args.push_back(Elt: Ctx.deriveFromMemoryObject(Obj: ArgvPtrsMem));
228 } else if (!EntryFn->arg_empty()) {
229 // If the signature does not match (e.g., llvm-reduce change the signature
230 // of main), it will pass null values for all arguments.
231 WithColor::warning()
232 << "The signature of function '" << EntryFunc
233 << "' does not match 'int main(int, char**)', passing null values for "
234 "all arguments.\n";
235 Args.reserve(N: EntryFn->arg_size());
236 for (Argument &Arg : EntryFn->args())
237 Args.push_back(Elt: ubi::AnyValue::getNullValue(Ctx, Ty: Arg.getType()));
238 }
239
240 ubi::EventHandler NoopHandler;
241 VerboseEventHandler VerboseHandler;
242 ubi::AnyValue RetVal;
243 if (!Ctx.runFunction(F&: *EntryFn, Args, RetVal,
244 Handler&: Verbose ? VerboseHandler : NoopHandler)) {
245 WithColor::error() << "Execution of function '" << EntryFunc
246 << "' failed.\n";
247 return 1;
248 }
249
250 // If the function returns an integer, return that as the exit code.
251 if (EntryFn->getReturnType()->isIntegerTy()) {
252 assert(!RetVal.isNone() && "Expected a return value from entry function");
253 if (RetVal.isPoison()) {
254 WithColor::error() << "Execution of function '" << EntryFunc
255 << "' resulted in poison return value.\n";
256 return 1;
257 }
258 APInt Result = RetVal.asInteger();
259 return (int)Result.extractBitsAsZExtValue(
260 numBits: std::min(a: Result.getBitWidth(), b: 8U), bitPosition: 0);
261 }
262 return 0;
263}
264