1//===--- Context.h - State Tracking for llubi -------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLUBI_CONTEXT_H
10#define LLVM_TOOLS_LLUBI_CONTEXT_H
11
12#include "Value.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/Analysis/TargetLibraryInfo.h"
15#include "llvm/AsmParser/AsmParserContext.h"
16#include "llvm/IR/FPEnv.h"
17#include "llvm/IR/Module.h"
18#include "llvm/IR/Operator.h"
19#include <map>
20#include <optional>
21#include <random>
22
23namespace llvm::ubi {
24
25enum class MemInitKind {
26 Zeroed,
27 Uninitialized,
28 Poisoned,
29};
30
31enum class MemAllocKind {
32 Global,
33 BlockAddress,
34 Stack,
35 Malloc,
36 New,
37 NewArray,
38};
39
40enum class MemoryObjectState {
41 // This memory object is accessible.
42 // Valid transitions:
43 // -> Dead (after the end of lifetime of an alloca)
44 // -> Freed (after free is called on a heap object)
45 Alive,
46 // This memory object is out of lifetime. Its contents are poison. Loads and
47 // memory transfers from it are allowed and propagate poison, stores to it
48 // cause immediate UB, and non-accessing operations such as getelementptr are
49 // allowed.
50 // Valid transition:
51 // -> Alive (after the start of lifetime of an alloca)
52 Dead,
53 // This heap memory object has been freed. Any access to it
54 // causes immediate UB. Like dead objects, it is still possible to
55 // perform operations that do not access its content.
56 Freed,
57};
58
59enum class UndefValueBehavior {
60 NonDeterministic, // Each use of the undef value can yield different results.
61 Zero, // All uses of the undef value yield zero.
62};
63
64enum class NaNPropagationBehavior {
65 NonDeterministic, // Non-deterministically choose from valid NaN results
66 PreferredNaN, // The quiet bit is set and the payload is all-zero
67 QuietingNaN, // The quiet bit is set and the payload is copied from any input
68 // operand that is a NaN
69 UnchangedNaN, // The quiet bit and payload are copied from any input operand
70 // that is a NaN
71 TargetSpecificNaN // The quiet bit is set and the payload is picked from a
72 // known target-specific set of "extra" possible NaN
73 // payloads
74};
75
76struct ProgramExitInfo {
77 enum class ProgramExitKind {
78 // Program exited via a normal return
79 Returned,
80 // Program exited with an interpreter error (UB/Unsupported
81 // instruction/etc.)
82 Failed,
83 // Program exited via a call to exit()
84 Exited,
85 // Program exited via a call to abort()
86 Aborted,
87 // Program exited via a call to terminate()
88 Terminated,
89 };
90
91 ProgramExitKind Kind;
92 uint64_t ExitCode;
93
94 explicit ProgramExitInfo(ProgramExitKind Kind, uint64_t ExitCode)
95 : Kind(Kind), ExitCode(ExitCode) {}
96
97 bool isExitedByLibcall() const {
98 return Kind == ProgramExitKind::Exited ||
99 Kind == ProgramExitKind::Aborted ||
100 Kind == ProgramExitKind::Terminated;
101 }
102};
103
104class MemoryObject : public RefCountedBase<MemoryObject> {
105 uint64_t Address;
106 uint64_t Size;
107 SmallVector<Byte, 8> Bytes;
108 StringRef Name;
109 unsigned AS;
110
111 MemoryObjectState State;
112 MemAllocKind AllocKind;
113 bool IsConstant = false;
114 bool IsIRGlobalValue = false;
115
116 // Tagged provenances related to this memory object.
117 // It is used to erasing the tags after the memory object is freed.
118 SmallVector<APInt> AssociatedTags;
119
120 friend class Context;
121
122public:
123 MemoryObject(uint64_t Addr, uint64_t Size, StringRef Name, unsigned AS,
124 MemInitKind InitKind, MemAllocKind AllocKind,
125 bool IsIRGlobalValue = false);
126 MemoryObject(const MemoryObject &) = delete;
127 MemoryObject(MemoryObject &&) = delete;
128 MemoryObject &operator=(const MemoryObject &) = delete;
129 MemoryObject &operator=(MemoryObject &&) = delete;
130 ~MemoryObject();
131
132 uint64_t getAddress() const { return Address; }
133 uint64_t getSize() const { return Size; }
134 StringRef getName() const { return Name; }
135 unsigned getAddressSpace() const { return AS; }
136 MemoryObjectState getState() const { return State; }
137 void setState(MemoryObjectState S) { State = S; }
138 MemAllocKind getAllocKind() const { return AllocKind; }
139 bool isIRGlobalValue() const { return IsIRGlobalValue; }
140 bool isConstant() const { return IsConstant; }
141 void setIsConstant(bool C) { IsConstant = C; }
142
143 bool inBounds(const APInt &NewAddr) const {
144 return NewAddr.uge(RHS: Address) && NewAddr.ule(RHS: Address + Size);
145 }
146
147 Byte &operator[](uint64_t Offset) {
148 assert(Offset < Size && "Offset out of bounds");
149 return Bytes[Offset];
150 }
151 ArrayRef<Byte> getBytes() const { return Bytes; }
152 MutableArrayRef<Byte> getBytes() { return Bytes; }
153
154 bool isGlobal() const;
155 bool isStackAllocated() const;
156 bool isHeapAllocated() const;
157};
158
159/// An interface for handling events and managing outputs during interpretation.
160/// If the handler returns false from any of the methods, the interpreter will
161/// stop execution immediately.
162class EventHandler {
163public:
164 virtual ~EventHandler() = default;
165
166 virtual bool onInstructionExecuted(Instruction &I, const AnyValue &Result) {
167 return true;
168 }
169 virtual void onError(StringRef Msg) {}
170 virtual void onUnrecognizedInstruction(Instruction &I) {}
171 virtual void onImmediateUB(StringRef Msg) {}
172 virtual bool onBBJump(Instruction &I, BasicBlock &To) { return true; }
173 virtual bool onFunctionEntry(Function &F, ArrayRef<AnyValue> Args,
174 CallBase *CallSite) {
175 return true;
176 }
177 virtual bool onFunctionExit(Function &F, const AnyValue &RetVal) {
178 return true;
179 }
180 virtual void onProgramExit(const ProgramExitInfo &ExitInfo) {}
181 virtual bool onPrint(StringRef Msg) {
182 outs() << Msg;
183 outs().flush();
184 return true;
185 }
186};
187
188/// Endianness aware accessor for bytes.
189template <typename ArrayRefT> class BytesView {
190 ArrayRefT Bytes;
191 bool IsLittleEndian;
192
193public:
194 explicit BytesView(ArrayRefT Ref, const DataLayout &DL)
195 : Bytes(Ref), IsLittleEndian(DL.isLittleEndian()) {}
196
197 auto &operator[](uint32_t Index) {
198 return Bytes[IsLittleEndian ? Index : Bytes.size() - 1 - Index];
199 }
200};
201
202using ConstBytesView = BytesView<ArrayRef<Byte>>;
203using MutableBytesView = BytesView<MutableArrayRef<Byte>>;
204
205class MaterializedConstant : public AnyValue {
206 bool Cacheable;
207
208public:
209 MaterializedConstant(std::nullopt_t) : Cacheable(false) {}
210 MaterializedConstant(AnyValue V, bool Cacheable)
211 : AnyValue(std::move(V)), Cacheable(Cacheable) {}
212
213 bool isCacheable() const { return Cacheable; }
214};
215
216/// The global context for the interpreter.
217/// It tracks global state such as heap memory objects and floating point
218/// environment.
219class Context {
220 // Module
221 LLVMContext &Ctx;
222 Module &M;
223 const AsmParserContext *ParserContext;
224 const DataLayout &DL;
225 const TargetLibraryInfoImpl TLIImpl;
226
227 // Configuration
228 uint64_t MaxMem = 0;
229 uint32_t VScale = 4;
230 uint32_t MaxSteps = 0;
231 uint32_t MaxStackDepth = 256;
232 bool Deterministic = false;
233 UndefValueBehavior UndefBehavior = UndefValueBehavior::NonDeterministic;
234 NaNPropagationBehavior NaNBehavior = NaNPropagationBehavior::NonDeterministic;
235 bool FusedMultiplyAdd = false;
236
237 std::mt19937_64 Rng;
238 /// Always returns a random APInt value. It is not controlled by
239 /// Deterministic.
240 APInt generateRandomAPInt(uint32_t BitWidth);
241
242 // Memory
243 uint64_t UsedMem = 0;
244 // The addresses of memory objects are monotonically increasing.
245 // For now we don't model the behavior of address reuse, which is common
246 // with stack coloring.
247 uint64_t AllocationBase = 8;
248 // All live memory objects.
249 DenseMap<uint64_t, IntrusiveRefCntPtr<MemoryObject>> MemoryObjects;
250 // Mapping from tags to provenances. Tags are lazily generated when a
251 // pointer is captured by memory.
252 DenseMap<APInt, IntrusiveRefCntPtr<Provenance>> TaggedProvenances;
253 // Maintains a global list of 'exposed' provenances. This is used to convert
254 // an address back to a pointer with a previously exposed provenance. In
255 // theory the provenance is picked from all previously exposed provenances
256 // using angelic non-determinism. Since llubi is just an interpreter, we make
257 // two approximations:
258 // 1. Each address maps to at most one memory object during the execution of
259 // the program, as AllocationBase increases monotonically.
260 // 2. We maintain the set of exposed provenances. When ptrtoint executes,
261 // the provenance is inserted to the set. When inttoptr executes, it yields
262 // a pointer with a wildcard provenance. That is, each later use will check
263 // whether there is an exposed provenance in the snapshot allowing the
264 // operation. The invalid provenance will be masked out after the operation.
265 // If we cannot pick one, it is UB.
266
267 /// Exposed provenances are grouped by associated memory objects for efficient
268 /// invalidation.
269 struct ExposedProvenance {
270 IntrusiveRefCntPtr<Provenance> Prov;
271 uint64_t Generation;
272
273 bool operator<(const ExposedProvenance &RHS) const {
274 return Generation < RHS.Generation;
275 }
276 };
277 struct ExposedProvenanceSet {
278 // (Provenance, Generation)
279 SmallVector<ExposedProvenance> List;
280 // FIXME: Implement a partial order comparator for provenance instead of
281 // deduplicating by pointers.
282 SmallPtrSet<Provenance *, 4> Set;
283 };
284 std::map<uint64_t, ExposedProvenanceSet> ExposedProvenances;
285 // Global version number for the set of exposed provenances.
286 uint64_t ExposedProvenanceSetGeneration = 0;
287
288 /// Get the tag for the given pointer provenance.
289 APInt getTag(uint32_t BitWidth, Provenance &Prov);
290 AnyValue fromBytes(ConstBytesView Bytes, Type *Ty, uint32_t OffsetInBits,
291 bool CheckPaddingBits, bool *ContainsUndefinedBits);
292 void toBytes(const AnyValue &Val, Type *Ty, uint32_t OffsetInBits,
293 MutableBytesView Bytes, bool PaddingBits);
294
295 AnyValue computePtrAdd(const Pointer &Ptr, const APInt &Offset,
296 GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset);
297 AnyValue computePtrAdd(const AnyValue &Ptr, const APInt &Offset,
298 GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset);
299 AnyValue computeScaledPtrAdd(const AnyValue &Ptr, const AnyValue &Index,
300 const APInt &Scale, GEPNoWrapFlags Flags,
301 AnyValue &AccumulatedOffset);
302
303 // Constants
304 // Use std::map to avoid iterator/reference invalidation.
305 std::map<Constant *, MaterializedConstant> ConstCache;
306 // Temporary buffer for non-cacheable constants (e.g.,
307 // undef/ptrtoint/inttoptr).
308 SpecificBumpPtrAllocator<MaterializedConstant> NoncacheableConstBuffer;
309 size_t NoncacheableConstCount = 0;
310 DenseMap<Function *, Pointer> FuncAddrMap;
311 DenseMap<BasicBlock *, Pointer> BlockAddrMap;
312 DenseMap<uint64_t, std::pair<Function *, IntrusiveRefCntPtr<MemoryObject>>>
313 ValidFuncTargets;
314 DenseMap<uint64_t, std::pair<BasicBlock *, IntrusiveRefCntPtr<MemoryObject>>>
315 ValidBlockTargets;
316 DenseMap<GlobalVariable *, Pointer> GlobalAddrMap;
317 MaterializedConstant getConstantValueImpl(Constant *C);
318 MaterializedConstant evaluateConstantExpression(ConstantExpr *CE);
319
320 // Floating-point environment
321 RoundingMode CurrentRoundingMode = RoundingMode::NearestTiesToEven;
322 fp::ExceptionBehavior CurrentExceptionBehavior =
323 fp::ExceptionBehavior::ebIgnore;
324
325 // TODO: errno
326
327public:
328 explicit Context(Module &M, const AsmParserContext *ParserContext);
329 Context(const Context &) = delete;
330 Context(Context &&) = delete;
331 Context &operator=(const Context &) = delete;
332 Context &operator=(Context &&) = delete;
333 ~Context();
334
335 void setMemoryLimit(uint64_t Max) { MaxMem = Max; }
336 void setVScale(uint32_t VS) { VScale = VS; }
337 void setMaxSteps(uint32_t MS) { MaxSteps = MS; }
338 void setMaxStackDepth(uint32_t Depth) { MaxStackDepth = Depth; }
339 void setFusedMultiplyAdd(bool F) { FusedMultiplyAdd = F; }
340 uint64_t getMemoryLimit() const { return MaxMem; }
341 uint32_t getVScale() const { return VScale; }
342 uint32_t getMaxSteps() const { return MaxSteps; }
343 uint32_t getMaxStackDepth() const { return MaxStackDepth; }
344 void setDeterministic(bool D) { Deterministic = D; }
345 bool isDeterministic() const { return Deterministic; }
346 bool mayUseNonDeterminism() const { return !Deterministic; }
347 UndefValueBehavior getEffectiveUndefValueBehavior() const;
348 NaNPropagationBehavior getEffectiveNaNPropagationBehavior() const;
349 bool fuseMultiplyAdd() const { return FusedMultiplyAdd; }
350 void setUndefValueBehavior(UndefValueBehavior UB) { UndefBehavior = UB; }
351 void setNaNPropagationBehavior(NaNPropagationBehavior NaNBehav) {
352 NaNBehavior = NaNBehav;
353 }
354 void reseed(uint32_t Seed) { Rng.seed(sd: Seed); }
355
356 LLVMContext &getContext() const { return Ctx; }
357 Module &getModule() const { return M; }
358 const AsmParserContext *getParserContext() const { return ParserContext; }
359 const DataLayout &getDataLayout() const { return DL; }
360 const Triple &getTargetTriple() const { return M.getTargetTriple(); }
361 const TargetLibraryInfoImpl &getTLIImpl() const { return TLIImpl; }
362 /// Get the effective vector length for a vector type.
363 uint32_t getEVL(ElementCount EC) const {
364 if (EC.isScalable())
365 return VScale * EC.getKnownMinValue();
366 return EC.getFixedValue();
367 }
368 /// The result is multiplied by VScale for scalable type sizes.
369 uint64_t getEffectiveTypeSize(TypeSize Size) const {
370 if (Size.isScalable())
371 return VScale * Size.getKnownMinValue();
372 return Size.getFixedValue();
373 }
374 /// Returns DL.getTypeAllocSize/getTypeStoreSize for the given type.
375 /// An exception to this is that for scalable vector types, the size is
376 /// computed as if the vector has getEVL(ElementCount) elements.
377 uint64_t getEffectiveTypeAllocSize(Type *Ty);
378 uint64_t getEffectiveTypeStoreSize(Type *Ty);
379
380 /// Returns a pointer to an evaluated constant \p C. If it cannot be
381 /// evaluated, returns nullptr. Note that it returns a pointer to a temporary
382 /// buffer when \p C is not context-free. The caller is responsible for
383 /// calling resetNoncacheableConstantBuffer after all references are dropped.
384 const MaterializedConstant *getConstantValue(Constant *C);
385 void resetNoncacheableConstantBuffer();
386 IntrusiveRefCntPtr<MemoryObject> allocate(uint64_t Size, uint64_t Align,
387 StringRef Name, unsigned AS,
388 MemInitKind InitKind,
389 MemAllocKind AllocKind,
390 bool IsIRGlobalValue = false);
391 bool free(const MemoryObject &Obj);
392 /// Derive a pointer from a memory object with offset 0.
393 /// Please use Pointer's interface for further manipulations.
394 Pointer deriveFromMemoryObject(IntrusiveRefCntPtr<MemoryObject> Obj);
395 /// Mark this provenance as exposed. It is no-op if it is not associated with
396 /// a memory object or a wildcard provenance.
397 void exposeProvenance(Provenance &Prov);
398 /// A helper to check both concrete and wildcard provenance. Please don't
399 /// report UB inside the \p Check callback due to the existence of wildcard
400 /// provenance.
401 /// Returns the resolved memory object if success. \p Ptr is guaranteed to be
402 /// within the bounds of the returned memory object. But the state is not
403 /// checked, for better diagnostic messages. If \p HasSideEffect is true, some
404 /// invalid provenances will be masked out. Note that in this case the caller
405 /// must report UB when the result is nullptr.
406 MemoryObject *checkProvenance(const Pointer &Ptr,
407 function_ref<bool(const Provenance &)> Check,
408 bool HasSideEffect = true);
409 /// Returns the snapshot of currently exposed provenances.
410 IntrusiveRefCntPtr<Provenance> getWildcardProvenance();
411 /// Convert byte sequence to a value of the given type. Uninitialized bits are
412 /// flushed according to the options.
413 /// If \p ContainsUndefinedBits is provided, it will be set to true when there
414 /// are poison or undef bits in the value (i.e., padding bits are ignored).
415 AnyValue fromBytes(ArrayRef<Byte> Bytes, Type *Ty,
416 bool *ContainsUndefinedBits = nullptr);
417 /// Convert a value to byte sequence. Padding bits are set to zero.
418 void toBytes(const AnyValue &Val, Type *Ty, MutableArrayRef<Byte> Bytes);
419 /// Direct memory load without checks.
420 AnyValue load(MemoryObject &MO, uint64_t Offset, Type *ValTy,
421 bool *ContainsUndefinedBits = nullptr);
422 /// Direct memory store without checks.
423 void store(MemoryObject &MO, uint64_t Offset, const AnyValue &Val,
424 Type *ValTy);
425 void storeRawBytes(MemoryObject &MO, uint64_t Offset, const void *Data,
426 uint64_t Size);
427
428 /// Freeze the value in-place.
429 void freeze(AnyValue &Val, Type *Ty);
430
431 AnyValue computeGEP(GEPOperator &GEP,
432 function_ref<const AnyValue &(Value *V)> GetValue);
433
434 Function *getTargetFunction(const Pointer &Ptr);
435 BasicBlock *getTargetBlock(const Pointer &Ptr);
436
437 /// Initialize global variables and function/block objects. This function
438 /// should be called before executing any function. Returns false if the
439 /// initialization fails (e.g., the memory limit is exceeded during
440 /// initialization).
441 bool initGlobalValues();
442 /// Execute the function \p F with arguments \p Args, and store the return
443 /// value in \p RetVal if the function is not void.
444 /// Returns a `ProgramExitInfo` indicating how the program finished:
445 /// Kind = Returned: The program executed successfully and returned normally.
446 /// Kind = Failed: The interpreter encountered an error and could not execute
447 /// the program.
448 /// Kind = Exited/Aborted/Terminated: The program ended via an
449 /// explicit call to `exit()`, `abort()`, or `terminate()`.
450 ProgramExitInfo runFunction(Function &F, ArrayRef<AnyValue> Args,
451 AnyValue &RetVal, EventHandler &Handler);
452
453 RoundingMode getCurrentRoundingMode() const;
454 fp::ExceptionBehavior getCurrentExceptionBehavior() const;
455 void setCurrentRoundingMode(RoundingMode RM);
456 void setCurrentExceptionBehavior(fp::ExceptionBehavior EB);
457 bool isDefaultFPEnv() const;
458
459 bool getRandomBool();
460 uint64_t getRandomUInt64();
461};
462
463} // namespace llvm::ubi
464
465#endif
466