| 1 | //===--- Context.h - State Tracking for llubi -------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_TOOLS_LLUBI_CONTEXT_H |
| 10 | #define LLVM_TOOLS_LLUBI_CONTEXT_H |
| 11 | |
| 12 | #include "Value.h" |
| 13 | #include "llvm/ADT/DenseMap.h" |
| 14 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 15 | #include "llvm/IR/Module.h" |
| 16 | #include <map> |
| 17 | #include <random> |
| 18 | |
| 19 | namespace llvm::ubi { |
| 20 | |
| 21 | enum class MemInitKind { |
| 22 | Zeroed, |
| 23 | Uninitialized, |
| 24 | Poisoned, |
| 25 | }; |
| 26 | |
| 27 | enum class MemoryObjectState { |
| 28 | // This memory object is accessible. |
| 29 | // Valid transitions: |
| 30 | // -> Dead (after the end of lifetime of an alloca) |
| 31 | // -> Freed (after free is called on a heap object) |
| 32 | Alive, |
| 33 | // This memory object is out of lifetime. It is OK to perform |
| 34 | // operations that do not access its content, e.g., getelementptr. |
| 35 | // Otherwise, an immediate UB occurs. |
| 36 | // Valid transition: |
| 37 | // -> Alive (after the start of lifetime of an alloca) |
| 38 | Dead, |
| 39 | // This heap memory object has been freed. Any access to it |
| 40 | // causes immediate UB. Like dead objects, it is still possible to |
| 41 | // perform operations that do not access its content. |
| 42 | Freed, |
| 43 | }; |
| 44 | |
| 45 | enum class UndefValueBehavior { |
| 46 | NonDeterministic, // Each use of the undef value can yield different results. |
| 47 | Zero, // All uses of the undef value yield zero. |
| 48 | }; |
| 49 | |
| 50 | class MemoryObject : public RefCountedBase<MemoryObject> { |
| 51 | uint64_t Address; |
| 52 | uint64_t Size; |
| 53 | SmallVector<Byte, 8> Bytes; |
| 54 | StringRef Name; |
| 55 | unsigned AS; |
| 56 | |
| 57 | MemoryObjectState State; |
| 58 | bool IsConstant = false; |
| 59 | |
| 60 | public: |
| 61 | MemoryObject(uint64_t Addr, uint64_t Size, StringRef Name, unsigned AS, |
| 62 | MemInitKind InitKind); |
| 63 | MemoryObject(const MemoryObject &) = delete; |
| 64 | MemoryObject(MemoryObject &&) = delete; |
| 65 | MemoryObject &operator=(const MemoryObject &) = delete; |
| 66 | MemoryObject &operator=(MemoryObject &&) = delete; |
| 67 | ~MemoryObject(); |
| 68 | |
| 69 | uint64_t getAddress() const { return Address; } |
| 70 | uint64_t getSize() const { return Size; } |
| 71 | StringRef getName() const { return Name; } |
| 72 | unsigned getAddressSpace() const { return AS; } |
| 73 | MemoryObjectState getState() const { return State; } |
| 74 | void setState(MemoryObjectState S) { State = S; } |
| 75 | bool isConstant() const { return IsConstant; } |
| 76 | void setIsConstant(bool C) { IsConstant = C; } |
| 77 | |
| 78 | bool inBounds(const APInt &NewAddr) const { |
| 79 | return NewAddr.uge(RHS: Address) && NewAddr.ule(RHS: Address + Size); |
| 80 | } |
| 81 | |
| 82 | Byte &operator[](uint64_t Offset) { |
| 83 | assert(Offset < Size && "Offset out of bounds" ); |
| 84 | return Bytes[Offset]; |
| 85 | } |
| 86 | ArrayRef<Byte> getBytes() const { return Bytes; } |
| 87 | MutableArrayRef<Byte> getBytes() { return Bytes; } |
| 88 | |
| 89 | void markAsFreed(); |
| 90 | }; |
| 91 | |
| 92 | /// An interface for handling events and managing outputs during interpretation. |
| 93 | /// If the handler returns false from any of the methods, the interpreter will |
| 94 | /// stop execution immediately. |
| 95 | class EventHandler { |
| 96 | public: |
| 97 | virtual ~EventHandler() = default; |
| 98 | |
| 99 | virtual bool onInstructionExecuted(Instruction &I, const AnyValue &Result) { |
| 100 | return true; |
| 101 | } |
| 102 | virtual void onError(StringRef Msg) {} |
| 103 | virtual void onUnrecognizedInstruction(Instruction &I) {} |
| 104 | virtual void onImmediateUB(StringRef Msg) {} |
| 105 | virtual bool onBBJump(Instruction &I, BasicBlock &To) { return true; } |
| 106 | virtual bool onFunctionEntry(Function &F, ArrayRef<AnyValue> Args, |
| 107 | CallBase *CallSite) { |
| 108 | return true; |
| 109 | } |
| 110 | virtual bool onFunctionExit(Function &F, const AnyValue &RetVal) { |
| 111 | return true; |
| 112 | } |
| 113 | virtual bool onPrint(StringRef Msg) { |
| 114 | outs() << Msg; |
| 115 | return true; |
| 116 | } |
| 117 | }; |
| 118 | |
| 119 | /// Endianness aware accessor for bytes. |
| 120 | template <typename ArrayRefT> class BytesView { |
| 121 | ArrayRefT Bytes; |
| 122 | bool IsLittleEndian; |
| 123 | |
| 124 | public: |
| 125 | explicit BytesView(ArrayRefT Ref, const DataLayout &DL) |
| 126 | : Bytes(Ref), IsLittleEndian(DL.isLittleEndian()) {} |
| 127 | |
| 128 | auto &operator[](uint32_t Index) { |
| 129 | return Bytes[IsLittleEndian ? Index : Bytes.size() - 1 - Index]; |
| 130 | } |
| 131 | }; |
| 132 | |
| 133 | using ConstBytesView = BytesView<ArrayRef<Byte>>; |
| 134 | using MutableBytesView = BytesView<MutableArrayRef<Byte>>; |
| 135 | |
| 136 | /// The global context for the interpreter. |
| 137 | /// It tracks global state such as heap memory objects and floating point |
| 138 | /// environment. |
| 139 | class Context { |
| 140 | // Module |
| 141 | LLVMContext &Ctx; |
| 142 | Module &M; |
| 143 | const DataLayout &DL; |
| 144 | const TargetLibraryInfoImpl TLIImpl; |
| 145 | |
| 146 | // Configuration |
| 147 | uint64_t MaxMem = 0; |
| 148 | uint32_t VScale = 4; |
| 149 | uint32_t MaxSteps = 0; |
| 150 | uint32_t MaxStackDepth = 256; |
| 151 | UndefValueBehavior UndefBehavior = UndefValueBehavior::NonDeterministic; |
| 152 | |
| 153 | std::mt19937_64 Rng; |
| 154 | |
| 155 | // Memory |
| 156 | uint64_t UsedMem = 0; |
| 157 | // The addresses of memory objects are monotonically increasing. |
| 158 | // For now we don't model the behavior of address reuse, which is common |
| 159 | // with stack coloring. |
| 160 | uint64_t AllocationBase = 8; |
| 161 | // Maintains a global list of 'exposed' provenances. This is used to form a |
| 162 | // pointer with an exposed provenance. |
| 163 | // FIXME: Currently all the allocations are considered exposed, regardless of |
| 164 | // their interaction with ptrtoint. That is, ptrtoint is allowed to recover |
| 165 | // the provenance of any allocation. We may track the exposed provenances more |
| 166 | // precisely after we make ptrtoint have the implicit side-effect of exposing |
| 167 | // the provenance. |
| 168 | std::map<uint64_t, IntrusiveRefCntPtr<MemoryObject>> MemoryObjects; |
| 169 | AnyValue fromBytes(ConstBytesView Bytes, Type *Ty, uint32_t OffsetInBits, |
| 170 | bool CheckPaddingBits); |
| 171 | void toBytes(const AnyValue &Val, Type *Ty, uint32_t OffsetInBits, |
| 172 | MutableBytesView Bytes, bool PaddingBits); |
| 173 | |
| 174 | // Constants |
| 175 | // Use std::map to avoid iterator/reference invalidation. |
| 176 | std::map<Constant *, AnyValue> ConstCache; |
| 177 | DenseMap<Function *, Pointer> FuncAddrMap; |
| 178 | DenseMap<BasicBlock *, Pointer> BlockAddrMap; |
| 179 | DenseMap<uint64_t, std::pair<Function *, IntrusiveRefCntPtr<MemoryObject>>> |
| 180 | ValidFuncTargets; |
| 181 | DenseMap<uint64_t, std::pair<BasicBlock *, IntrusiveRefCntPtr<MemoryObject>>> |
| 182 | ValidBlockTargets; |
| 183 | AnyValue getConstantValueImpl(Constant *C); |
| 184 | |
| 185 | // TODO: errno and fpenv |
| 186 | |
| 187 | public: |
| 188 | explicit Context(Module &M); |
| 189 | Context(const Context &) = delete; |
| 190 | Context(Context &&) = delete; |
| 191 | Context &operator=(const Context &) = delete; |
| 192 | Context &operator=(Context &&) = delete; |
| 193 | ~Context(); |
| 194 | |
| 195 | void setMemoryLimit(uint64_t Max) { MaxMem = Max; } |
| 196 | void setVScale(uint32_t VS) { VScale = VS; } |
| 197 | void setMaxSteps(uint32_t MS) { MaxSteps = MS; } |
| 198 | void setMaxStackDepth(uint32_t Depth) { MaxStackDepth = Depth; } |
| 199 | uint64_t getMemoryLimit() const { return MaxMem; } |
| 200 | uint32_t getVScale() const { return VScale; } |
| 201 | uint32_t getMaxSteps() const { return MaxSteps; } |
| 202 | uint32_t getMaxStackDepth() const { return MaxStackDepth; } |
| 203 | void setUndefValueBehavior(UndefValueBehavior UB) { UndefBehavior = UB; } |
| 204 | void reseed(uint32_t Seed) { Rng.seed(sd: Seed); } |
| 205 | |
| 206 | LLVMContext &getContext() const { return Ctx; } |
| 207 | const DataLayout &getDataLayout() const { return DL; } |
| 208 | const TargetLibraryInfoImpl &getTLIImpl() const { return TLIImpl; } |
| 209 | /// Get the effective vector length for a vector type. |
| 210 | uint32_t getEVL(ElementCount EC) const { |
| 211 | if (EC.isScalable()) |
| 212 | return VScale * EC.getKnownMinValue(); |
| 213 | return EC.getFixedValue(); |
| 214 | } |
| 215 | /// The result is multiplied by VScale for scalable type sizes. |
| 216 | uint64_t getEffectiveTypeSize(TypeSize Size) const { |
| 217 | if (Size.isScalable()) |
| 218 | return VScale * Size.getKnownMinValue(); |
| 219 | return Size.getFixedValue(); |
| 220 | } |
| 221 | /// Returns DL.getTypeAllocSize/getTypeStoreSize for the given type. |
| 222 | /// An exception to this is that for scalable vector types, the size is |
| 223 | /// computed as if the vector has getEVL(ElementCount) elements. |
| 224 | uint64_t getEffectiveTypeAllocSize(Type *Ty); |
| 225 | uint64_t getEffectiveTypeStoreSize(Type *Ty); |
| 226 | |
| 227 | const AnyValue &getConstantValue(Constant *C); |
| 228 | IntrusiveRefCntPtr<MemoryObject> allocate(uint64_t Size, uint64_t Align, |
| 229 | StringRef Name, unsigned AS, |
| 230 | MemInitKind InitKind); |
| 231 | bool free(uint64_t Address); |
| 232 | /// Derive a pointer from a memory object with offset 0. |
| 233 | /// Please use Pointer's interface for further manipulations. |
| 234 | Pointer deriveFromMemoryObject(IntrusiveRefCntPtr<MemoryObject> Obj); |
| 235 | /// Convert byte sequence to a value of the given type. Uninitialized bits are |
| 236 | /// flushed according to the options. |
| 237 | AnyValue fromBytes(ArrayRef<Byte> Bytes, Type *Ty); |
| 238 | /// Convert a value to byte sequence. Padding bits are set to zero. |
| 239 | void toBytes(const AnyValue &Val, Type *Ty, MutableArrayRef<Byte> Bytes); |
| 240 | /// Direct memory load without checks. |
| 241 | AnyValue load(MemoryObject &MO, uint64_t Offset, Type *ValTy); |
| 242 | /// Direct memory store without checks. |
| 243 | void store(MemoryObject &MO, uint64_t Offset, const AnyValue &Val, |
| 244 | Type *ValTy); |
| 245 | void storeRawBytes(MemoryObject &MO, uint64_t Offset, const void *Data, |
| 246 | uint64_t Size); |
| 247 | |
| 248 | /// Freeze the value in-place. |
| 249 | void freeze(AnyValue &Val, Type *Ty); |
| 250 | |
| 251 | Function *getTargetFunction(const Pointer &Ptr); |
| 252 | BasicBlock *getTargetBlock(const Pointer &Ptr); |
| 253 | |
| 254 | /// Initialize global variables and function/block objects. This function |
| 255 | /// should be called before executing any function. Returns false if the |
| 256 | /// initialization fails (e.g., the memory limit is exceeded during |
| 257 | /// initialization). |
| 258 | bool initGlobalValues(); |
| 259 | /// Execute the function \p F with arguments \p Args, and store the return |
| 260 | /// value in \p RetVal if the function is not void. |
| 261 | /// Returns true if the function executed successfully. False indicates an |
| 262 | /// error occurred during execution. |
| 263 | bool runFunction(Function &F, ArrayRef<AnyValue> Args, AnyValue &RetVal, |
| 264 | EventHandler &Handler); |
| 265 | }; |
| 266 | |
| 267 | } // namespace llvm::ubi |
| 268 | |
| 269 | #endif |
| 270 | |