1//===--- Context.h - State Tracking for llubi -------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLUBI_CONTEXT_H
10#define LLVM_TOOLS_LLUBI_CONTEXT_H
11
12#include "Value.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/Analysis/TargetLibraryInfo.h"
15#include "llvm/IR/Module.h"
16#include <map>
17#include <random>
18
19namespace llvm::ubi {
20
21enum class MemInitKind {
22 Zeroed,
23 Uninitialized,
24 Poisoned,
25};
26
27enum class MemoryObjectState {
28 // This memory object is accessible.
29 // Valid transitions:
30 // -> Dead (after the end of lifetime of an alloca)
31 // -> Freed (after free is called on a heap object)
32 Alive,
33 // This memory object is out of lifetime. It is OK to perform
34 // operations that do not access its content, e.g., getelementptr.
35 // Otherwise, an immediate UB occurs.
36 // Valid transition:
37 // -> Alive (after the start of lifetime of an alloca)
38 Dead,
39 // This heap memory object has been freed. Any access to it
40 // causes immediate UB. Like dead objects, it is still possible to
41 // perform operations that do not access its content.
42 Freed,
43};
44
45enum class UndefValueBehavior {
46 NonDeterministic, // Each use of the undef value can yield different results.
47 Zero, // All uses of the undef value yield zero.
48};
49
50class MemoryObject : public RefCountedBase<MemoryObject> {
51 uint64_t Address;
52 uint64_t Size;
53 SmallVector<Byte, 8> Bytes;
54 StringRef Name;
55 unsigned AS;
56
57 MemoryObjectState State;
58 bool IsConstant = false;
59
60public:
61 MemoryObject(uint64_t Addr, uint64_t Size, StringRef Name, unsigned AS,
62 MemInitKind InitKind);
63 MemoryObject(const MemoryObject &) = delete;
64 MemoryObject(MemoryObject &&) = delete;
65 MemoryObject &operator=(const MemoryObject &) = delete;
66 MemoryObject &operator=(MemoryObject &&) = delete;
67 ~MemoryObject();
68
69 uint64_t getAddress() const { return Address; }
70 uint64_t getSize() const { return Size; }
71 StringRef getName() const { return Name; }
72 unsigned getAddressSpace() const { return AS; }
73 MemoryObjectState getState() const { return State; }
74 void setState(MemoryObjectState S) { State = S; }
75 bool isConstant() const { return IsConstant; }
76 void setIsConstant(bool C) { IsConstant = C; }
77
78 bool inBounds(const APInt &NewAddr) const {
79 return NewAddr.uge(RHS: Address) && NewAddr.ule(RHS: Address + Size);
80 }
81
82 Byte &operator[](uint64_t Offset) {
83 assert(Offset < Size && "Offset out of bounds");
84 return Bytes[Offset];
85 }
86 ArrayRef<Byte> getBytes() const { return Bytes; }
87 MutableArrayRef<Byte> getBytes() { return Bytes; }
88
89 void markAsFreed();
90};
91
92/// An interface for handling events and managing outputs during interpretation.
93/// If the handler returns false from any of the methods, the interpreter will
94/// stop execution immediately.
95class EventHandler {
96public:
97 virtual ~EventHandler() = default;
98
99 virtual bool onInstructionExecuted(Instruction &I, const AnyValue &Result) {
100 return true;
101 }
102 virtual void onError(StringRef Msg) {}
103 virtual void onUnrecognizedInstruction(Instruction &I) {}
104 virtual void onImmediateUB(StringRef Msg) {}
105 virtual bool onBBJump(Instruction &I, BasicBlock &To) { return true; }
106 virtual bool onFunctionEntry(Function &F, ArrayRef<AnyValue> Args,
107 CallBase *CallSite) {
108 return true;
109 }
110 virtual bool onFunctionExit(Function &F, const AnyValue &RetVal) {
111 return true;
112 }
113 virtual bool onPrint(StringRef Msg) {
114 outs() << Msg;
115 return true;
116 }
117};
118
119/// Endianness aware accessor for bytes.
120template <typename ArrayRefT> class BytesView {
121 ArrayRefT Bytes;
122 bool IsLittleEndian;
123
124public:
125 explicit BytesView(ArrayRefT Ref, const DataLayout &DL)
126 : Bytes(Ref), IsLittleEndian(DL.isLittleEndian()) {}
127
128 auto &operator[](uint32_t Index) {
129 return Bytes[IsLittleEndian ? Index : Bytes.size() - 1 - Index];
130 }
131};
132
133using ConstBytesView = BytesView<ArrayRef<Byte>>;
134using MutableBytesView = BytesView<MutableArrayRef<Byte>>;
135
136/// The global context for the interpreter.
137/// It tracks global state such as heap memory objects and floating point
138/// environment.
139class Context {
140 // Module
141 LLVMContext &Ctx;
142 Module &M;
143 const DataLayout &DL;
144 const TargetLibraryInfoImpl TLIImpl;
145
146 // Configuration
147 uint64_t MaxMem = 0;
148 uint32_t VScale = 4;
149 uint32_t MaxSteps = 0;
150 uint32_t MaxStackDepth = 256;
151 UndefValueBehavior UndefBehavior = UndefValueBehavior::NonDeterministic;
152
153 std::mt19937_64 Rng;
154
155 // Memory
156 uint64_t UsedMem = 0;
157 // The addresses of memory objects are monotonically increasing.
158 // For now we don't model the behavior of address reuse, which is common
159 // with stack coloring.
160 uint64_t AllocationBase = 8;
161 // Maintains a global list of 'exposed' provenances. This is used to form a
162 // pointer with an exposed provenance.
163 // FIXME: Currently all the allocations are considered exposed, regardless of
164 // their interaction with ptrtoint. That is, ptrtoint is allowed to recover
165 // the provenance of any allocation. We may track the exposed provenances more
166 // precisely after we make ptrtoint have the implicit side-effect of exposing
167 // the provenance.
168 std::map<uint64_t, IntrusiveRefCntPtr<MemoryObject>> MemoryObjects;
169 AnyValue fromBytes(ConstBytesView Bytes, Type *Ty, uint32_t OffsetInBits,
170 bool CheckPaddingBits);
171 void toBytes(const AnyValue &Val, Type *Ty, uint32_t OffsetInBits,
172 MutableBytesView Bytes, bool PaddingBits);
173
174 // Constants
175 // Use std::map to avoid iterator/reference invalidation.
176 std::map<Constant *, AnyValue> ConstCache;
177 DenseMap<Function *, Pointer> FuncAddrMap;
178 DenseMap<BasicBlock *, Pointer> BlockAddrMap;
179 DenseMap<uint64_t, std::pair<Function *, IntrusiveRefCntPtr<MemoryObject>>>
180 ValidFuncTargets;
181 DenseMap<uint64_t, std::pair<BasicBlock *, IntrusiveRefCntPtr<MemoryObject>>>
182 ValidBlockTargets;
183 AnyValue getConstantValueImpl(Constant *C);
184
185 // TODO: errno and fpenv
186
187public:
188 explicit Context(Module &M);
189 Context(const Context &) = delete;
190 Context(Context &&) = delete;
191 Context &operator=(const Context &) = delete;
192 Context &operator=(Context &&) = delete;
193 ~Context();
194
195 void setMemoryLimit(uint64_t Max) { MaxMem = Max; }
196 void setVScale(uint32_t VS) { VScale = VS; }
197 void setMaxSteps(uint32_t MS) { MaxSteps = MS; }
198 void setMaxStackDepth(uint32_t Depth) { MaxStackDepth = Depth; }
199 uint64_t getMemoryLimit() const { return MaxMem; }
200 uint32_t getVScale() const { return VScale; }
201 uint32_t getMaxSteps() const { return MaxSteps; }
202 uint32_t getMaxStackDepth() const { return MaxStackDepth; }
203 void setUndefValueBehavior(UndefValueBehavior UB) { UndefBehavior = UB; }
204 void reseed(uint32_t Seed) { Rng.seed(sd: Seed); }
205
206 LLVMContext &getContext() const { return Ctx; }
207 const DataLayout &getDataLayout() const { return DL; }
208 const TargetLibraryInfoImpl &getTLIImpl() const { return TLIImpl; }
209 /// Get the effective vector length for a vector type.
210 uint32_t getEVL(ElementCount EC) const {
211 if (EC.isScalable())
212 return VScale * EC.getKnownMinValue();
213 return EC.getFixedValue();
214 }
215 /// The result is multiplied by VScale for scalable type sizes.
216 uint64_t getEffectiveTypeSize(TypeSize Size) const {
217 if (Size.isScalable())
218 return VScale * Size.getKnownMinValue();
219 return Size.getFixedValue();
220 }
221 /// Returns DL.getTypeAllocSize/getTypeStoreSize for the given type.
222 /// An exception to this is that for scalable vector types, the size is
223 /// computed as if the vector has getEVL(ElementCount) elements.
224 uint64_t getEffectiveTypeAllocSize(Type *Ty);
225 uint64_t getEffectiveTypeStoreSize(Type *Ty);
226
227 const AnyValue &getConstantValue(Constant *C);
228 IntrusiveRefCntPtr<MemoryObject> allocate(uint64_t Size, uint64_t Align,
229 StringRef Name, unsigned AS,
230 MemInitKind InitKind);
231 bool free(uint64_t Address);
232 /// Derive a pointer from a memory object with offset 0.
233 /// Please use Pointer's interface for further manipulations.
234 Pointer deriveFromMemoryObject(IntrusiveRefCntPtr<MemoryObject> Obj);
235 /// Convert byte sequence to a value of the given type. Uninitialized bits are
236 /// flushed according to the options.
237 AnyValue fromBytes(ArrayRef<Byte> Bytes, Type *Ty);
238 /// Convert a value to byte sequence. Padding bits are set to zero.
239 void toBytes(const AnyValue &Val, Type *Ty, MutableArrayRef<Byte> Bytes);
240 /// Direct memory load without checks.
241 AnyValue load(MemoryObject &MO, uint64_t Offset, Type *ValTy);
242 /// Direct memory store without checks.
243 void store(MemoryObject &MO, uint64_t Offset, const AnyValue &Val,
244 Type *ValTy);
245 void storeRawBytes(MemoryObject &MO, uint64_t Offset, const void *Data,
246 uint64_t Size);
247
248 /// Freeze the value in-place.
249 void freeze(AnyValue &Val, Type *Ty);
250
251 Function *getTargetFunction(const Pointer &Ptr);
252 BasicBlock *getTargetBlock(const Pointer &Ptr);
253
254 /// Initialize global variables and function/block objects. This function
255 /// should be called before executing any function. Returns false if the
256 /// initialization fails (e.g., the memory limit is exceeded during
257 /// initialization).
258 bool initGlobalValues();
259 /// Execute the function \p F with arguments \p Args, and store the return
260 /// value in \p RetVal if the function is not void.
261 /// Returns true if the function executed successfully. False indicates an
262 /// error occurred during execution.
263 bool runFunction(Function &F, ArrayRef<AnyValue> Args, AnyValue &RetVal,
264 EventHandler &Handler);
265};
266
267} // namespace llvm::ubi
268
269#endif
270