| 1 | //===------------- llubi.cpp - LLVM UB-aware Interpreter --------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This utility provides an UB-aware interpreter for programs in LLVM bitcode. |
| 10 | // It is not built on top of the existing ExecutionEngine interface, but instead |
| 11 | // implements its own value representation, state tracking and interpreter loop. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "lib/Context.h" |
| 16 | #include "llvm/Config/llvm-config.h" |
| 17 | #include "llvm/IR/LLVMContext.h" |
| 18 | #include "llvm/IR/Module.h" |
| 19 | #include "llvm/IR/Type.h" |
| 20 | #include "llvm/IRReader/IRReader.h" |
| 21 | #include "llvm/Support/CommandLine.h" |
| 22 | #include "llvm/Support/Format.h" |
| 23 | #include "llvm/Support/InitLLVM.h" |
| 24 | #include "llvm/Support/MathExtras.h" |
| 25 | #include "llvm/Support/SourceMgr.h" |
| 26 | #include "llvm/Support/WithColor.h" |
| 27 | #include "llvm/Support/raw_ostream.h" |
| 28 | |
| 29 | using namespace llvm; |
| 30 | |
| 31 | static cl::opt<std::string> InputFile(cl::desc("<input bitcode>" ), |
| 32 | cl::Positional, cl::init(Val: "-" )); |
| 33 | |
| 34 | static cl::list<std::string> InputArgv(cl::ConsumeAfter, |
| 35 | cl::desc("<program arguments>..." )); |
| 36 | |
| 37 | static cl::opt<std::string> |
| 38 | EntryFunc("entry-function" , |
| 39 | cl::desc("Specify the entry function (default = 'main') " |
| 40 | "of the executable" ), |
| 41 | cl::value_desc("function" ), cl::init(Val: "main" )); |
| 42 | |
| 43 | static cl::opt<std::string> |
| 44 | FakeArgv0("fake-argv0" , |
| 45 | cl::desc("Override the 'argv[0]' value passed into the executing" |
| 46 | " program" ), |
| 47 | cl::value_desc("executable" )); |
| 48 | |
| 49 | static cl::opt<bool> |
| 50 | Verbose("verbose" , cl::desc("Print results for each instruction executed." ), |
| 51 | cl::init(Val: false)); |
| 52 | |
| 53 | cl::OptionCategory InterpreterCategory("Interpreter Options" ); |
| 54 | |
| 55 | static cl::opt<unsigned> MaxMem( |
| 56 | "max-mem" , |
| 57 | cl::desc("Max amount of memory (in bytes) that can be allocated by the" |
| 58 | " program, including stack, heap, and global variables." |
| 59 | " Set to 0 to disable the limit." ), |
| 60 | cl::value_desc("N" ), cl::init(Val: 0), cl::cat(InterpreterCategory)); |
| 61 | |
| 62 | static cl::opt<unsigned> |
| 63 | MaxSteps("max-steps" , |
| 64 | cl::desc("Max number of instructions executed." |
| 65 | " Set to 0 to disable the limit." ), |
| 66 | cl::value_desc("N" ), cl::init(Val: 0), cl::cat(InterpreterCategory)); |
| 67 | |
| 68 | static cl::opt<unsigned> MaxStackDepth( |
| 69 | "max-stack-depth" , |
| 70 | cl::desc("Max stack depth (default = 256). Set to 0 to disable the limit." ), |
| 71 | cl::value_desc("N" ), cl::init(Val: 256), cl::cat(InterpreterCategory)); |
| 72 | |
| 73 | static cl::opt<unsigned> |
| 74 | VScale("vscale" , cl::desc("The value of llvm.vscale (default = 4)" ), |
| 75 | cl::value_desc("N" ), cl::init(Val: 4), cl::cat(InterpreterCategory)); |
| 76 | |
| 77 | class VerboseEventHandler : public ubi::EventHandler { |
| 78 | public: |
| 79 | bool onInstructionExecuted(Instruction &I, |
| 80 | const ubi::AnyValue &Result) override { |
| 81 | if (Result.isNone()) { |
| 82 | errs() << I << '\n'; |
| 83 | } else { |
| 84 | errs() << I << " => " << Result << '\n'; |
| 85 | } |
| 86 | |
| 87 | return true; |
| 88 | } |
| 89 | |
| 90 | void onImmediateUB(StringRef Msg) override { |
| 91 | errs() << "Immediate UB detected: " << Msg << '\n'; |
| 92 | } |
| 93 | |
| 94 | void onError(StringRef Msg) override { errs() << "Error: " << Msg << '\n'; } |
| 95 | |
| 96 | bool onBBJump(Instruction &I, BasicBlock &To) override { |
| 97 | errs() << I << " jump to " ; |
| 98 | To.printAsOperand(O&: errs(), /*PrintType=*/false); |
| 99 | errs() << '\n'; |
| 100 | return true; |
| 101 | } |
| 102 | |
| 103 | bool onFunctionEntry(Function &F, ArrayRef<ubi::AnyValue> Args, |
| 104 | CallBase *CallSite) override { |
| 105 | errs() << "Entering function: " << F.getName() << '\n'; |
| 106 | size_t ArgSize = F.arg_size(); |
| 107 | for (auto &&[Idx, Arg] : enumerate(First&: Args)) { |
| 108 | if (Idx >= ArgSize) |
| 109 | errs() << " vaarg[" << (Idx - ArgSize) << "] = " << Arg << '\n'; |
| 110 | else |
| 111 | errs() << " " << *F.getArg(i: Idx) << " = " << Arg << '\n'; |
| 112 | } |
| 113 | return true; |
| 114 | } |
| 115 | |
| 116 | bool onFunctionExit(Function &F, const ubi::AnyValue &RetVal) override { |
| 117 | errs() << "Exiting function: " << F.getName() << '\n'; |
| 118 | return true; |
| 119 | } |
| 120 | |
| 121 | void onUnrecognizedInstruction(Instruction &I) override { |
| 122 | errs() << "Unrecognized instruction: " << I << '\n'; |
| 123 | } |
| 124 | }; |
| 125 | |
| 126 | int main(int argc, char **argv) { |
| 127 | InitLLVM X(argc, argv); |
| 128 | |
| 129 | cl::ParseCommandLineOptions(argc, argv, Overview: "llvm ub-aware interpreter\n" ); |
| 130 | |
| 131 | if (EntryFunc.empty()) { |
| 132 | WithColor::error() << "--entry-function name cannot be empty\n" ; |
| 133 | return 1; |
| 134 | } |
| 135 | |
| 136 | LLVMContext Context; |
| 137 | |
| 138 | // Load the bitcode... |
| 139 | SMDiagnostic Err; |
| 140 | std::unique_ptr<Module> Owner = parseIRFile(Filename: InputFile, Err, Context); |
| 141 | Module *Mod = Owner.get(); |
| 142 | if (!Mod) { |
| 143 | Err.print(ProgName: argv[0], S&: errs()); |
| 144 | return 1; |
| 145 | } |
| 146 | |
| 147 | // If the user specifically requested an argv[0] to pass into the program, |
| 148 | // do it now. |
| 149 | if (!FakeArgv0.empty()) { |
| 150 | InputFile = static_cast<std::string>(FakeArgv0); |
| 151 | } else { |
| 152 | // Otherwise, if there is a .bc suffix on the executable strip it off, it |
| 153 | // might confuse the program. |
| 154 | if (StringRef(InputFile).ends_with(Suffix: ".bc" )) |
| 155 | InputFile.erase(pos: InputFile.length() - 3); |
| 156 | } |
| 157 | |
| 158 | // Add the module's name to the start of the vector of arguments to main(). |
| 159 | InputArgv.insert(pos: InputArgv.begin(), value: InputFile); |
| 160 | |
| 161 | // Initialize the execution context and set parameters. |
| 162 | ubi::Context Ctx(*Mod); |
| 163 | Ctx.setMemoryLimit(MaxMem); |
| 164 | Ctx.setVScale(VScale); |
| 165 | Ctx.setMaxSteps(MaxSteps); |
| 166 | Ctx.setMaxStackDepth(MaxStackDepth); |
| 167 | |
| 168 | if (!Ctx.initGlobalValues()) { |
| 169 | WithColor::error() << "Failed to initialize global values (e.g., the " |
| 170 | "memory limit may be too low).\n" ; |
| 171 | return 1; |
| 172 | } |
| 173 | |
| 174 | // Call the main function from M as if its signature were: |
| 175 | // int main (int argc, char **argv) |
| 176 | // using the contents of Args to determine argc & argv |
| 177 | Function *EntryFn = Mod->getFunction(Name: EntryFunc); |
| 178 | if (!EntryFn) { |
| 179 | WithColor::error() << '\'' << EntryFunc |
| 180 | << "\' function not found in module.\n" ; |
| 181 | return 1; |
| 182 | } |
| 183 | TargetLibraryInfo TLI(Ctx.getTLIImpl()); |
| 184 | Type *IntTy = IntegerType::get(C&: Ctx.getContext(), NumBits: TLI.getIntSize()); |
| 185 | auto *MainFuncTy = FunctionType::get( |
| 186 | Result: IntTy, Params: {IntTy, PointerType::getUnqual(C&: Ctx.getContext())}, isVarArg: false); |
| 187 | SmallVector<ubi::AnyValue> Args; |
| 188 | if (EntryFn->getFunctionType() == MainFuncTy) { |
| 189 | Args.push_back( |
| 190 | Elt: Ctx.getConstantValue(C: ConstantInt::get(Ty: IntTy, V: InputArgv.size()))); |
| 191 | |
| 192 | uint32_t PtrSize = Ctx.getDataLayout().getPointerSize(); |
| 193 | uint64_t = PtrSize * (InputArgv.size() + 1); |
| 194 | auto ArgvPtrsMem = Ctx.allocate(Size: PtrsSize, Align: 8, Name: "argv" , |
| 195 | /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed); |
| 196 | if (!ArgvPtrsMem) { |
| 197 | WithColor::error() << "Failed to allocate memory for argv pointers.\n" ; |
| 198 | return 1; |
| 199 | } |
| 200 | for (const auto &[Idx, Arg] : enumerate(First&: InputArgv)) { |
| 201 | uint64_t Size = Arg.length() + 1; |
| 202 | auto ArgvStrMem = Ctx.allocate(Size, Align: 8, Name: "argv_str" , |
| 203 | /*AS=*/0, InitKind: ubi::MemInitKind::Zeroed); |
| 204 | if (!ArgvStrMem) { |
| 205 | WithColor::error() << "Failed to allocate memory for argv strings.\n" ; |
| 206 | return 1; |
| 207 | } |
| 208 | ubi::Pointer ArgPtr = Ctx.deriveFromMemoryObject(Obj: ArgvStrMem); |
| 209 | ArgvStrMem->writeRawBytes(Offset: 0, Data: Arg.c_str(), Length: Arg.length()); |
| 210 | ArgvPtrsMem->writePointer(Offset: Idx * PtrSize, Ptr: ArgPtr, DL: Ctx.getDataLayout()); |
| 211 | } |
| 212 | Args.push_back(Elt: Ctx.deriveFromMemoryObject(Obj: ArgvPtrsMem)); |
| 213 | } else if (!EntryFn->arg_empty()) { |
| 214 | // If the signature does not match (e.g., llvm-reduce change the signature |
| 215 | // of main), it will pass null values for all arguments. |
| 216 | WithColor::warning() |
| 217 | << "The signature of function '" << EntryFunc |
| 218 | << "' does not match 'int main(int, char**)', passing null values for " |
| 219 | "all arguments.\n" ; |
| 220 | Args.reserve(N: EntryFn->arg_size()); |
| 221 | for (Argument &Arg : EntryFn->args()) |
| 222 | Args.push_back(Elt: ubi::AnyValue::getNullValue(Ctx, Ty: Arg.getType())); |
| 223 | } |
| 224 | |
| 225 | ubi::EventHandler NoopHandler; |
| 226 | VerboseEventHandler VerboseHandler; |
| 227 | ubi::AnyValue RetVal; |
| 228 | if (!Ctx.runFunction(F&: *EntryFn, Args, RetVal, |
| 229 | Handler&: Verbose ? VerboseHandler : NoopHandler)) { |
| 230 | WithColor::error() << "Execution of function '" << EntryFunc |
| 231 | << "' failed.\n" ; |
| 232 | return 1; |
| 233 | } |
| 234 | |
| 235 | // If the function returns an integer, return that as the exit code. |
| 236 | if (EntryFn->getReturnType()->isIntegerTy()) { |
| 237 | assert(!RetVal.isNone() && "Expected a return value from entry function" ); |
| 238 | if (RetVal.isPoison()) { |
| 239 | WithColor::error() << "Execution of function '" << EntryFunc |
| 240 | << "' resulted in poison return value.\n" ; |
| 241 | return 1; |
| 242 | } |
| 243 | APInt Result = RetVal.asInteger(); |
| 244 | return (int)Result.extractBitsAsZExtValue( |
| 245 | numBits: std::min(a: Result.getBitWidth(), b: 8U), bitPosition: 0); |
| 246 | } |
| 247 | return 0; |
| 248 | } |
| 249 | |