1//===--- Value.h - Value Representation for llubi ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLUBI_VALUE_H
10#define LLVM_TOOLS_LLUBI_VALUE_H
11
12#include "llvm/ADT/APFloat.h"
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/IntrusiveRefCntPtr.h"
15#include "llvm/IR/DataLayout.h"
16#include "llvm/IR/Type.h"
17#include "llvm/Support/raw_ostream.h"
18
19namespace llvm::ubi {
20
21class MemoryObject;
22class Context;
23class AnyValue;
24
25/// Representation of a byte in memory.
26/// How to interpret the byte per bit:
27/// - If the concrete mask bit is 0, the bit is either undef or poison. The
28/// value bit indicates whether it is undef.
29/// - If the concrete mask bit is 1, the bit is a concrete value. The value bit
30/// stores the concrete bit value. The tag mask bit indicates whether it is a
31/// pointer bit, and the tag value bit is used for provenance tracking of
32/// pointers.
33///
34/// Note that the idealized interpreter would store a full pointer tag for every
35/// single pointer bit, as well as the position of that bit in the pointer. The
36/// provenance is preserved as the bit is copied around, and when a sequence of
37/// bytes is eventually converted back to a pointer, all bits must be in the
38/// original order and have the same provenance. However, that would be
39/// prohibitively expensive. So instead, we rely on randomized ptr-sized tags.
40/// This means that if bits get reordered, or if bits from different pointers
41/// get mixed, then the result is unlikely to be a valid tag.
42struct Byte {
43 uint8_t ConcreteMask;
44 uint8_t Value;
45 uint8_t TagMask; // A mask to indicate which bits are pointer bits.
46 uint8_t TagValue; // For each pointer bit, the corresponding bit of the tag
47 // for provenance tracking.
48
49 static Byte poison() { return Byte{.ConcreteMask: 0, .Value: 0, .TagMask: 0, .TagValue: 0}; }
50 static Byte undef() { return Byte{.ConcreteMask: 0, .Value: 255, .TagMask: 0, .TagValue: 0}; }
51 static Byte concrete(uint8_t Val) { return Byte{.ConcreteMask: 255, .Value: Val, .TagMask: 0, .TagValue: 0}; }
52
53 void zeroBits(uint8_t Mask) {
54 ConcreteMask |= Mask;
55 Value &= ~Mask;
56 }
57
58 void poisonBits(uint8_t Mask) {
59 ConcreteMask &= ~Mask;
60 Value &= ~Mask;
61 }
62
63 void undefBits(uint8_t Mask) {
64 ConcreteMask &= ~Mask;
65 Value |= Mask;
66 }
67
68 void writeBits(uint8_t Mask, uint8_t Val) {
69 ConcreteMask |= Mask;
70 Value = (Value & ~Mask) | (Val & Mask);
71 TagMask &= ~Mask;
72 }
73
74 void writeTagBits(uint8_t Mask, uint8_t Tag) {
75 assert(
76 (ConcreteMask & Mask) == Mask &&
77 "Please ensure pointer bits are concrete before calling writeTagBits.");
78 TagMask |= Mask;
79 TagValue = (TagValue & ~Mask) | (Tag & Mask);
80 }
81
82 /// Returns a logical byte that is part of two adjacent bytes.
83 /// Example with ShAmt = 5:
84 /// | Low | High |
85 /// LSB | 0 1 0 1 0 1 0 1 | 0 0 0 0 1 1 1 1 | MSB
86 /// Result = | 1 0 1 0 0 0 0 1 |
87 static Byte fshr(const Byte &Low, const Byte &High, uint32_t ShAmt) {
88 return Byte{
89 .ConcreteMask: static_cast<uint8_t>((Low.ConcreteMask | (High.ConcreteMask << 8)) >>
90 ShAmt),
91 .Value: static_cast<uint8_t>((Low.Value | (High.Value << 8)) >> ShAmt),
92 .TagMask: static_cast<uint8_t>((Low.TagMask | (High.TagMask << 8)) >> ShAmt),
93 .TagValue: static_cast<uint8_t>((Low.TagValue | (High.TagValue << 8)) >> ShAmt)};
94 }
95
96 Byte lshr(uint8_t Shift) const {
97 return Byte{.ConcreteMask: static_cast<uint8_t>(ConcreteMask >> Shift),
98 .Value: static_cast<uint8_t>(Value >> Shift),
99 .TagMask: static_cast<uint8_t>(TagMask >> Shift),
100 .TagValue: static_cast<uint8_t>(TagValue >> Shift)};
101 }
102};
103
104// TODO: Byte
105enum class StorageKind {
106 Integer,
107 Float,
108 Pointer,
109 Poison,
110 None, // Placeholder for void type
111 Aggregate, // Struct, Array or Vector
112};
113
114/// Tri-state boolean value.
115enum class BooleanKind { False, True, Poison };
116
117/// A set of previously exposed provenances. It is originally yielded by
118/// inttoptr, and shared by pointers derived from the result.
119///
120/// Each capability check may invalidate some provenances. If we cannot
121/// pick one, it is UB. That is, from the angelic non-determinism view,
122/// we cannot pick a provenance to make the program reach this point.
123///
124/// For efficiency, this class has different forms in two stages:
125/// 1. Before any memory access is performed, ActiveMask is set to zero and
126/// Generation represents the global generation number of the snapshot.
127/// 2. After a memory access is performed, we can determine exactly one memory
128/// object to be accessed (address ranges are distinct). In this case,
129/// BaseAddress is set and ActiveMask is non-zero. ActiveMask represents the
130/// validity of the first N exposed provenances associated with the memory
131/// object. The bitwidth N is the number of provenances in the list with
132/// List[I].Generation <= WildcardProvenance::Generation (The generation field
133/// in the list is monotonically increasing). That is, we can only access
134/// through exposed provenances before inttoptr executes. Note that if
135/// ActiveMask becomes zero again, UB must be triggered.
136class WildcardProvenance : public RefCountedBase<WildcardProvenance> {
137 APInt ActiveMask;
138 union {
139 uint64_t Generation;
140 uint64_t BaseAddress;
141 };
142
143 friend class Context;
144
145public:
146 explicit WildcardProvenance(uint64_t Generation)
147 : ActiveMask(), Generation(Generation) {}
148};
149
150/// Components of a pointer excluding address. They are shared between pointer
151/// values, as most of operations don't change the provenance.
152/// Each node will be assigned a unique, pointer-sized tag, which is used to
153/// represent the pointer in the memory.
154/// The provenance can be either concrete or wildcard, as determined by the
155/// cases below:
156/// Obj Wildcard State
157/// Null Null Invalid
158/// Null NonNull Wildcard
159/// NonNull Null Concrete
160/// NonNull NonNull Wildcard (associated with a specific MO)
161class Provenance : public RefCountedBase<Provenance> {
162 // TODO: store reference to the provenance of the pointer it is derived from
163
164 // The underlying memory object. It can be null for invalid or dangling
165 // pointers. Besides, for pointers with wildcard provenance, it can be null
166 // until the memory object is resolved by gep inbounds.
167 IntrusiveRefCntPtr<MemoryObject> Obj;
168
169 // A tag is a randomly generated unique identifier to recover the provenance
170 // of a pointer. The length of tag is equal to the store size of the pointer
171 // type, in bits. It may produce false negatives in some corner cases. But in
172 // real practice the false negative rate should be negligible.
173 // A zero tag is invalid.
174 APInt Tag;
175
176 // Null if it is concrete.
177 IntrusiveRefCntPtr<WildcardProvenance> Wildcard;
178
179 // TODO: modeling nofree
180 // TODO: modeling captures
181 // TODO: modeling inrange(Start, End) attribute
182
183 const APInt &getTag() const { return Tag; }
184 void setTag(const APInt &T) { Tag = T; }
185
186 friend class Context;
187
188public:
189 Provenance(IntrusiveRefCntPtr<MemoryObject> Obj) : Obj(std::move(Obj)) {}
190 static IntrusiveRefCntPtr<Provenance> nullary();
191 IntrusiveRefCntPtr<Provenance> getWithKnownMemoryObject(MemoryObject &Obj);
192 MemoryObject *getMemoryObject() const { return Obj.get(); }
193 bool isWildcard() const { return Wildcard != nullptr; }
194};
195
196class Pointer {
197 // The provenance of the pointer.
198 IntrusiveRefCntPtr<Provenance> Prov;
199 // The address of the pointer. The bit width is determined by
200 // DataLayout::getPointerSizeInBits.
201 APInt Address;
202
203public:
204 explicit Pointer(const APInt &Address)
205 : Prov(Provenance::nullary()), Address(Address) {}
206 explicit Pointer(IntrusiveRefCntPtr<Provenance> Prov, const APInt &Address)
207 : Prov(std::move(Prov)), Address(Address) {
208 assert(this->Prov && "Invalid provenance.");
209 }
210 Pointer getWithNewAddr(const APInt &NewAddr) const {
211 return Pointer(Prov, NewAddr);
212 }
213 Pointer getWithNewProvenance(IntrusiveRefCntPtr<Provenance> NewProv) const {
214 return Pointer(NewProv, Address);
215 }
216 static AnyValue null(unsigned AS, const DataLayout &DL);
217 bool isNullPtr(unsigned AS, const DataLayout &DL) const;
218 void print(raw_ostream &OS) const;
219 const APInt &address() const { return Address; }
220 Provenance &provenance() const { return *Prov; }
221};
222
223// Value representation for actual values of LLVM values.
224// We don't model undef values here (except for byte types).
225class [[nodiscard]] AnyValue {
226 StorageKind Kind;
227 union {
228 APInt IntVal;
229 APFloat FloatVal;
230 Pointer PtrVal;
231 std::vector<AnyValue> AggVal;
232 };
233
234 struct PoisonTag {};
235 void destroy();
236
237public:
238 AnyValue() : Kind(StorageKind::None) {}
239 explicit AnyValue(PoisonTag) : Kind(StorageKind::Poison) {}
240 AnyValue(APInt Val) : Kind(StorageKind::Integer), IntVal(std::move(Val)) {}
241 AnyValue(APFloat Val) : Kind(StorageKind::Float), FloatVal(std::move(Val)) {}
242 AnyValue(Pointer Val) : Kind(StorageKind::Pointer), PtrVal(std::move(Val)) {}
243 AnyValue(std::vector<AnyValue> Val)
244 : Kind(StorageKind::Aggregate), AggVal(std::move(Val)) {}
245 AnyValue(const AnyValue &Other);
246 AnyValue(AnyValue &&Other);
247 AnyValue &operator=(const AnyValue &);
248 AnyValue &operator=(AnyValue &&);
249 ~AnyValue() { destroy(); }
250
251 void print(raw_ostream &OS) const;
252
253 static AnyValue poison() { return AnyValue(PoisonTag{}); }
254 static AnyValue boolean(bool Val) { return AnyValue(APInt(1, Val)); }
255 static AnyValue getPoisonValue(Context &Ctx, Type *Ty);
256 static AnyValue getNullValue(Context &Ctx, Type *Ty);
257 static AnyValue getVectorSplat(const AnyValue &Scalar, size_t NumElements);
258
259 bool isNone() const { return Kind == StorageKind::None; }
260 bool isPoison() const { return Kind == StorageKind::Poison; }
261 bool isInteger() const { return Kind == StorageKind::Integer; }
262 bool isFloat() const { return Kind == StorageKind::Float; }
263 bool isPointer() const { return Kind == StorageKind::Pointer; }
264 bool isAggregate() const { return Kind == StorageKind::Aggregate; }
265
266 bool isCompatibleWith(Type *Ty) const {
267 switch (Kind) {
268 case StorageKind::None:
269 return Ty->isVoidTy();
270 case StorageKind::Poison:
271 return Ty->isFloatingPointTy() || Ty->isIntegerTy() || Ty->isPointerTy();
272 case StorageKind::Integer:
273 return Ty->isIntegerTy();
274 case StorageKind::Float:
275 return Ty->isFloatingPointTy();
276 case StorageKind::Pointer:
277 return Ty->isPointerTy();
278 // We don't check elements recursively.
279 case StorageKind::Aggregate:
280 return Ty->isAggregateType() || Ty->isVectorTy();
281 }
282 llvm_unreachable("Unhandled storage kind.");
283 }
284
285 const APInt &asInteger() const {
286 assert(Kind == StorageKind::Integer && "Expect an integer value");
287 return IntVal;
288 }
289
290 const APFloat &asFloat() const {
291 assert(Kind == StorageKind::Float && "Expect a float value");
292 return FloatVal;
293 }
294
295 const Pointer &asPointer() const {
296 assert(Kind == StorageKind::Pointer && "Expect a pointer value");
297 return PtrVal;
298 }
299
300 const std::vector<AnyValue> &asAggregate() const {
301 assert(Kind == StorageKind::Aggregate &&
302 "Expect an aggregate/vector value");
303 return AggVal;
304 }
305
306 std::vector<AnyValue> &asAggregate() {
307 assert(Kind == StorageKind::Aggregate &&
308 "Expect an aggregate/vector value");
309 return AggVal;
310 }
311
312 // Helper function for C++ 17 structured bindings.
313 template <size_t I> const AnyValue &get() const {
314 assert(Kind == StorageKind::Aggregate &&
315 "Expect an aggregate/vector value");
316 assert(I < AggVal.size() && "Index out of bounds");
317 return AggVal[I];
318 }
319
320 BooleanKind asBoolean() const {
321 if (isPoison())
322 return BooleanKind::Poison;
323 return asInteger().isZero() ? BooleanKind::False : BooleanKind::True;
324 }
325};
326
327inline raw_ostream &operator<<(raw_ostream &OS, const AnyValue &V) {
328 V.print(OS);
329 return OS;
330}
331
332inline raw_ostream &operator<<(raw_ostream &OS, const Pointer &P) {
333 P.print(OS);
334 return OS;
335}
336
337inline AnyValue addNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
338 bool HasNUW) {
339 APInt Res = LHS + RHS;
340 if (HasNUW && Res.ult(RHS))
341 return AnyValue::poison();
342 if (HasNSW && LHS.isNonNegative() == RHS.isNonNegative() &&
343 LHS.isNonNegative() != Res.isNonNegative())
344 return AnyValue::poison();
345 return Res;
346}
347
348inline AnyValue subNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
349 bool HasNUW) {
350 APInt Res = LHS - RHS;
351 if (HasNUW && Res.ugt(RHS: LHS))
352 return AnyValue::poison();
353 if (HasNSW && LHS.isNonNegative() != RHS.isNonNegative() &&
354 LHS.isNonNegative() != Res.isNonNegative())
355 return AnyValue::poison();
356 return Res;
357}
358
359inline AnyValue mulNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
360 bool HasNUW) {
361 bool Overflow = false;
362 APInt Res = LHS.smul_ov(RHS, Overflow);
363 if (HasNSW && Overflow)
364 return AnyValue::poison();
365 if (HasNUW) {
366 (void)LHS.umul_ov(RHS, Overflow);
367 if (Overflow)
368 return AnyValue::poison();
369 }
370 return Res;
371}
372
373} // namespace llvm::ubi
374
375#endif
376