1//===------------- llubi.cpp - LLVM UB-aware Interpreter --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This utility provides an UB-aware interpreter for programs in LLVM bitcode.
10// It is not built on top of the existing ExecutionEngine interface, but instead
11// implements its own value representation, state tracking and interpreter loop.
12//
13//===----------------------------------------------------------------------===//
14
15#include "lib/Context.h"
16#include "llvm/Config/llvm-config.h"
17#include "llvm/IR/LLVMContext.h"
18#include "llvm/IR/Module.h"
19#include "llvm/IR/Type.h"
20#include "llvm/IR/Verifier.h"
21#include "llvm/IRReader/IRReader.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/Format.h"
24#include "llvm/Support/InitLLVM.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/SourceMgr.h"
27#include "llvm/Support/WithColor.h"
28#include "llvm/Support/raw_ostream.h"
29
30using namespace llvm;
31
32static cl::opt<std::string> InputFile(cl::desc("<input bitcode>"),
33 cl::Positional, cl::init(Val: "-"));
34
35static cl::list<std::string> InputArgv(cl::ConsumeAfter,
36 cl::desc("<program arguments>..."));
37
38static cl::opt<std::string>
39 EntryFunc("entry-function",
40 cl::desc("Specify the entry function (default = 'main') "
41 "of the executable"),
42 cl::value_desc("function"), cl::init(Val: "main"));
43
44static cl::opt<std::string>
45 FakeArgv0("fake-argv0",
46 cl::desc("Override the 'argv[0]' value passed into the executing"
47 " program"),
48 cl::value_desc("executable"));
49
50static cl::opt<bool>
51 Verbose("verbose", cl::desc("Print results for each instruction executed."),
52 cl::init(Val: false));
53
54cl::OptionCategory InterpreterCategory("Interpreter Options");
55
56static cl::opt<unsigned> MaxMem(
57 "max-mem",
58 cl::desc("Max amount of memory (in bytes) that can be allocated by the"
59 " program, including stack, heap, and global variables."
60 " Set to 0 to disable the limit."),
61 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
62
63static cl::opt<unsigned>
64 MaxSteps("max-steps",
65 cl::desc("Max number of instructions executed."
66 " Set to 0 to disable the limit."),
67 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
68
69static cl::opt<unsigned> MaxStackDepth(
70 "max-stack-depth",
71 cl::desc("Max stack depth (default = 256). Set to 0 to disable the limit."),
72 cl::value_desc("N"), cl::init(Val: 256), cl::cat(InterpreterCategory));
73
74static cl::opt<unsigned>
75 VScale("vscale", cl::desc("The value of llvm.vscale (default = 4)"),
76 cl::value_desc("N"), cl::init(Val: 4), cl::cat(InterpreterCategory));
77
78static cl::opt<unsigned>
79 Seed("seed",
80 cl::desc("Random seed for non-deterministic behavior (default = 0)"),
81 cl::value_desc("N"), cl::init(Val: 0), cl::cat(InterpreterCategory));
82
83static cl::opt<bool>
84 Deterministic("deterministic",
85 cl::desc("Disable interpreter-introduced non-determinism."),
86 cl::init(Val: false), cl::cat(InterpreterCategory));
87
88static cl::opt<bool> FuseFMulAdd("fuse-fmuladd",
89 cl::desc("Fuse llvm.fmuladd.* intrinsic"),
90 cl::init(Val: true), cl::cat(InterpreterCategory));
91
92static cl::opt<bool> NoVerify("disable-verify",
93 cl::desc("Do not run the IR verifier"),
94 cl::init(Val: false), cl::cat(InterpreterCategory));
95
96cl::opt<ubi::UndefValueBehavior> UndefBehavior(
97 "", cl::desc("Choose undef value behavior:"),
98 cl::values(clEnumVal(ubi::UndefValueBehavior::NonDeterministic,
99 "Each load of an uninitialized byte yields a freshly "
100 "random value."),
101 clEnumVal(ubi::UndefValueBehavior::Zero,
102 "All uses of an uninitialized byte yield zero.")));
103
104cl::opt<ubi::NaNPropagationBehavior> NaNPropagationBehavior(
105 "", cl::desc("Choose NaN propagation behavior:"),
106 cl::values(
107 clEnumValN(ubi::NaNPropagationBehavior::NonDeterministic, "nan-nodet",
108 "Non-deterministically choose from valid NaN results as "
109 "specified by language reference."),
110 clEnumValN(ubi::NaNPropagationBehavior::PreferredNaN, "nan-preferred",
111 "The quiet bit is set and the payload is all-zero."),
112 clEnumValN(
113 ubi::NaNPropagationBehavior::QuietingNaN, "nan-quieting",
114 "The quiet bit is set and the payload is copied from any input"
115 "operand that is a NaN."),
116 clEnumValN(ubi::NaNPropagationBehavior::UnchangedNaN, "nan-unchanged",
117 "The quiet bit and payload are copied from any input operand"
118 "that is a NaN"),
119 clEnumValN(ubi::NaNPropagationBehavior::TargetSpecificNaN,
120 "nan-target-specific",
121 "The quiet bit is set and the payload is picked from a "
122 "known target-specific set of \"extra\" possible NaN "
123 "payloads.")),
124 cl::init(Val: ubi::NaNPropagationBehavior::NonDeterministic));
125
126class NoopEventHandler : public ubi::EventHandler {
127 void onImmediateUB(StringRef Msg) override {
128 errs() << "Immediate UB detected: " << Msg << '\n';
129 }
130
131 void onError(StringRef Msg) override { errs() << "Error: " << Msg << '\n'; }
132
133 void onUnrecognizedInstruction(Instruction &I) override {
134 errs() << "Unrecognized instruction: " << I << '\n';
135 }
136};
137
138class VerboseEventHandler : public NoopEventHandler {
139public:
140 bool onInstructionExecuted(Instruction &I,
141 const ubi::AnyValue &Result) override {
142 if (Result.isNone()) {
143 errs() << I << '\n';
144 } else {
145 errs() << I << " => " << Result << '\n';
146 }
147
148 return true;
149 }
150
151 bool onBBJump(Instruction &I, BasicBlock &To) override {
152 errs() << I << " jump to ";
153 To.printAsOperand(O&: errs(), /*PrintType=*/false);
154 errs() << '\n';
155 return true;
156 }
157
158 bool onFunctionEntry(Function &F, ArrayRef<ubi::AnyValue> Args,
159 CallBase *CallSite) override {
160 errs() << "Entering function: " << F.getName() << '\n';
161 size_t ArgSize = F.arg_size();
162 for (auto &&[Idx, Arg] : enumerate(First&: Args)) {
163 if (Idx >= ArgSize)
164 errs() << " vaarg[" << (Idx - ArgSize) << "] = " << Arg << '\n';
165 else
166 errs() << " " << *F.getArg(i: Idx) << " = " << Arg << '\n';
167 }
168 return true;
169 }
170
171 bool onFunctionExit(Function &F, const ubi::AnyValue &RetVal) override {
172 errs() << "Exiting function: " << F.getName() << '\n';
173 return true;
174 }
175
176 void onProgramExit(const ubi::ProgramExitInfo &Info) override {
177 switch (Info.Kind) {
178 case ubi::ProgramExitInfo::ProgramExitKind::Returned:
179 return;
180 case ubi::ProgramExitInfo::ProgramExitKind::Failed:
181 return;
182 case ubi::ProgramExitInfo::ProgramExitKind::Exited:
183 errs() << "Program exited with code " << Info.ExitCode << '\n';
184 return;
185 case ubi::ProgramExitInfo::ProgramExitKind::Aborted:
186 errs() << "Program aborted.\n";
187 return;
188 case ubi::ProgramExitInfo::ProgramExitKind::Terminated:
189 errs() << "Program terminated.\n";
190 return;
191 }
192
193 llvm_unreachable("Unknown ProgramExitKind");
194 }
195};
196
197int main(int argc, char **argv) {
198 InitLLVM X(argc, argv);
199
200 cl::ParseCommandLineOptions(argc, argv, Overview: "llvm ub-aware interpreter\n");
201
202 if (EntryFunc.empty()) {
203 WithColor::error() << "--entry-function name cannot be empty\n";
204 return 1;
205 }
206
207 if (VScale == 0) {
208 WithColor::error() << "--vscale value must be positive\n";
209 return 1;
210 }
211
212 if (!isPowerOf2_32(Value: VScale)) {
213 WithColor::error() << "--vscale value must be a power of 2\n";
214 return 1;
215 }
216
217 LLVMContext Context;
218
219 // Load the bitcode...
220 SMDiagnostic Err;
221 AsmParserContext ParserContext;
222 std::unique_ptr<Module> Owner =
223 parseIRFile(Filename: InputFile, Err, Context, /*Callbacks=*/{}, ParserContext: &ParserContext);
224 Module *Mod = Owner.get();
225 if (!Mod) {
226 Err.print(ProgName: argv[0], S&: errs());
227 return 1;
228 }
229
230 if (!NoVerify && verifyModule(M: *Mod, OS: &errs())) {
231 WithColor::error() << InputFile << ": input module is broken!\n";
232 return 1;
233 }
234
235 // If the user specifically requested an argv[0] to pass into the program,
236 // do it now.
237 if (!FakeArgv0.empty()) {
238 InputFile = static_cast<std::string>(FakeArgv0);
239 } else {
240 // Otherwise, if there is a .bc suffix on the executable strip it off, it
241 // might confuse the program.
242 if (StringRef(InputFile).ends_with(Suffix: ".bc"))
243 InputFile.erase(pos: InputFile.length() - 3);
244 }
245
246 // Add the module's name to the start of the vector of arguments to main().
247 InputArgv.insert(pos: InputArgv.begin(), value: InputFile);
248
249 // Initialize the execution context and set parameters.
250 ubi::Context Ctx(*Mod, &ParserContext);
251 Ctx.setMemoryLimit(MaxMem);
252 Ctx.setVScale(VScale);
253 Ctx.setMaxSteps(MaxSteps);
254 Ctx.setMaxStackDepth(MaxStackDepth);
255 Ctx.setFusedMultiplyAdd(FuseFMulAdd);
256 Ctx.setDeterministic(Deterministic);
257 Ctx.setUndefValueBehavior(UndefBehavior);
258 Ctx.setNaNPropagationBehavior(NaNPropagationBehavior);
259 Ctx.reseed(Seed);
260
261 if (!Ctx.initGlobalValues()) {
262 WithColor::error() << "Failed to initialize global values (e.g., the "
263 "memory limit may be too low).\n";
264 return 1;
265 }
266
267 // Call the main function from M as if its signature were:
268 // int main (int argc, char **argv)
269 // using the contents of Args to determine argc & argv
270 Function *EntryFn = Mod->getFunction(Name: EntryFunc);
271 if (!EntryFn) {
272 WithColor::error() << '\'' << EntryFunc
273 << "\' function not found in module.\n";
274 return 1;
275 }
276 TargetLibraryInfo TLI(Ctx.getTLIImpl());
277 Type *IntTy = IntegerType::get(C&: Ctx.getContext(), NumBits: TLI.getIntSize());
278 Type *PtrTy = PointerType::getUnqual(C&: Ctx.getContext());
279 auto *MainFuncTy = FunctionType::get(Result: IntTy, Params: {IntTy, PtrTy}, isVarArg: false);
280 SmallVector<ubi::AnyValue> Args;
281 if (EntryFn->getFunctionType() == MainFuncTy) {
282 const ubi::AnyValue *Argc =
283 Ctx.getConstantValue(C: ConstantInt::get(Ty: IntTy, V: InputArgv.size()));
284 assert(Argc && "failed to initialize argc");
285 Args.push_back(Elt: *Argc);
286
287 uint32_t PtrSize = Ctx.getDataLayout().getPointerSize();
288 uint64_t PtrsSize = PtrSize * (InputArgv.size() + 1);
289 auto ArgvPtrsMem = Ctx.allocate(Size: PtrsSize, Align: 8, Name: "argv",
290 /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed,
291 AllocKind: ubi::MemAllocKind::Global);
292 if (!ArgvPtrsMem) {
293 WithColor::error() << "Failed to allocate memory for argv pointers.\n";
294 return 1;
295 }
296 for (const auto &[Idx, Arg] : enumerate(First&: InputArgv)) {
297 uint64_t Size = Arg.length() + 1;
298 auto ArgvStrMem = Ctx.allocate(Size, Align: 8, Name: "argv_str",
299 /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed,
300 AllocKind: ubi::MemAllocKind::Global);
301 if (!ArgvStrMem) {
302 WithColor::error() << "Failed to allocate memory for argv strings.\n";
303 return 1;
304 }
305 ubi::Pointer ArgPtr = Ctx.deriveFromMemoryObject(Obj: ArgvStrMem);
306 Ctx.storeRawBytes(MO&: *ArgvStrMem, Offset: 0, Data: Arg.c_str(), Size: Arg.length());
307 Ctx.store(MO&: *ArgvPtrsMem, Offset: Idx * PtrSize, Val: ArgPtr, ValTy: PtrTy);
308 }
309 Args.push_back(Elt: Ctx.deriveFromMemoryObject(Obj: ArgvPtrsMem));
310 } else if (!EntryFn->arg_empty()) {
311 // If the signature does not match (e.g., llvm-reduce change the signature
312 // of main), it will pass null values for all arguments.
313 WithColor::warning()
314 << "The signature of function '" << EntryFunc
315 << "' does not match 'int main(int, char**)', passing null values for "
316 "all arguments.\n";
317 Args.reserve(N: EntryFn->arg_size());
318 for (Argument &Arg : EntryFn->args())
319 Args.push_back(Elt: ubi::AnyValue::getNullValue(Ctx, Ty: Arg.getType()));
320 }
321
322 NoopEventHandler NoopHandler;
323 VerboseEventHandler VerboseHandler;
324 ubi::AnyValue RetVal;
325 ubi::ProgramExitInfo ExitInfo = Ctx.runFunction(
326 F&: *EntryFn, Args, RetVal, Handler&: Verbose ? VerboseHandler : NoopHandler);
327 switch (ExitInfo.Kind) {
328 case ubi::ProgramExitInfo::ProgramExitKind::Failed:
329 WithColor::error() << "Execution of function '" << EntryFunc
330 << "' failed.\n";
331 return 1;
332 case ubi::ProgramExitInfo::ProgramExitKind::Aborted:
333 case ubi::ProgramExitInfo::ProgramExitKind::Terminated:
334 return 134;
335 case ubi::ProgramExitInfo::ProgramExitKind::Exited:
336 return static_cast<int>(ExitInfo.ExitCode & 0xFF);
337 case ubi::ProgramExitInfo::ProgramExitKind::Returned:
338 // If the function returns an integer, return that as the exit code.
339 if (EntryFn->getReturnType()->isIntegerTy()) {
340 assert(!RetVal.isNone() && "Expected a return value from entry function");
341 if (RetVal.isPoison()) {
342 WithColor::error() << "Execution of function '" << EntryFunc
343 << "' resulted in poison return value.\n";
344 return 1;
345 }
346 APInt Result = RetVal.asInteger();
347 return (int)Result.extractBitsAsZExtValue(
348 numBits: std::min(a: Result.getBitWidth(), b: 8U), bitPosition: 0);
349 }
350 return 0;
351 }
352
353 llvm_unreachable("Unknown ProgramExitKind");
354}
355