| 1 | //===--- Value.h - Value Representation for llubi ---------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_TOOLS_LLUBI_VALUE_H |
| 10 | #define LLVM_TOOLS_LLUBI_VALUE_H |
| 11 | |
| 12 | #include "llvm/ADT/APFloat.h" |
| 13 | #include "llvm/ADT/APInt.h" |
| 14 | #include "llvm/ADT/IntrusiveRefCntPtr.h" |
| 15 | #include "llvm/IR/DataLayout.h" |
| 16 | #include "llvm/IR/Type.h" |
| 17 | #include "llvm/Support/raw_ostream.h" |
| 18 | |
| 19 | namespace llvm::ubi { |
| 20 | |
| 21 | class MemoryObject; |
| 22 | class Context; |
| 23 | class AnyValue; |
| 24 | |
| 25 | /// Representation of a byte in memory. |
| 26 | /// How to interpret the byte per bit: |
| 27 | /// - If the concrete mask bit is 0, the bit is either undef or poison. The |
| 28 | /// value bit indicates whether it is undef. |
| 29 | /// - If the concrete mask bit is 1, the bit is a concrete value. The value bit |
| 30 | /// stores the concrete bit value. The tag mask bit indicates whether it is a |
| 31 | /// pointer bit, and the tag value bit is used for provenance tracking of |
| 32 | /// pointers. |
| 33 | /// |
| 34 | /// Note that the idealized interpreter would store a full pointer tag for every |
| 35 | /// single pointer bit, as well as the position of that bit in the pointer. The |
| 36 | /// provenance is preserved as the bit is copied around, and when a sequence of |
| 37 | /// bytes is eventually converted back to a pointer, all bits must be in the |
| 38 | /// original order and have the same provenance. However, that would be |
| 39 | /// prohibitively expensive. So instead, we rely on randomized ptr-sized tags. |
| 40 | /// This means that if bits get reordered, or if bits from different pointers |
| 41 | /// get mixed, then the result is unlikely to be a valid tag. |
| 42 | struct Byte { |
| 43 | uint8_t ConcreteMask; |
| 44 | uint8_t Value; |
| 45 | uint8_t TagMask; // A mask to indicate which bits are pointer bits. |
| 46 | uint8_t TagValue; // For each pointer bit, the corresponding bit of the tag |
| 47 | // for provenance tracking. |
| 48 | |
| 49 | static Byte poison() { return Byte{.ConcreteMask: 0, .Value: 0, .TagMask: 0, .TagValue: 0}; } |
| 50 | static Byte undef() { return Byte{.ConcreteMask: 0, .Value: 255, .TagMask: 0, .TagValue: 0}; } |
| 51 | static Byte concrete(uint8_t Val) { return Byte{.ConcreteMask: 255, .Value: Val, .TagMask: 0, .TagValue: 0}; } |
| 52 | |
| 53 | void zeroBits(uint8_t Mask) { |
| 54 | ConcreteMask |= Mask; |
| 55 | Value &= ~Mask; |
| 56 | } |
| 57 | |
| 58 | void poisonBits(uint8_t Mask) { |
| 59 | ConcreteMask &= ~Mask; |
| 60 | Value &= ~Mask; |
| 61 | } |
| 62 | |
| 63 | void undefBits(uint8_t Mask) { |
| 64 | ConcreteMask &= ~Mask; |
| 65 | Value |= Mask; |
| 66 | } |
| 67 | |
| 68 | void writeBits(uint8_t Mask, uint8_t Val) { |
| 69 | ConcreteMask |= Mask; |
| 70 | Value = (Value & ~Mask) | (Val & Mask); |
| 71 | TagMask &= ~Mask; |
| 72 | } |
| 73 | |
| 74 | void writeTagBits(uint8_t Mask, uint8_t Tag) { |
| 75 | assert( |
| 76 | (ConcreteMask & Mask) == Mask && |
| 77 | "Please ensure pointer bits are concrete before calling writeTagBits." ); |
| 78 | TagMask |= Mask; |
| 79 | TagValue = (TagValue & ~Mask) | (Tag & Mask); |
| 80 | } |
| 81 | |
| 82 | /// Returns a logical byte that is part of two adjacent bytes. |
| 83 | /// Example with ShAmt = 5: |
| 84 | /// | Low | High | |
| 85 | /// LSB | 0 1 0 1 0 1 0 1 | 0 0 0 0 1 1 1 1 | MSB |
| 86 | /// Result = | 1 0 1 0 0 0 0 1 | |
| 87 | static Byte fshr(const Byte &Low, const Byte &High, uint32_t ShAmt) { |
| 88 | return Byte{ |
| 89 | .ConcreteMask: static_cast<uint8_t>((Low.ConcreteMask | (High.ConcreteMask << 8)) >> |
| 90 | ShAmt), |
| 91 | .Value: static_cast<uint8_t>((Low.Value | (High.Value << 8)) >> ShAmt), |
| 92 | .TagMask: static_cast<uint8_t>((Low.TagMask | (High.TagMask << 8)) >> ShAmt), |
| 93 | .TagValue: static_cast<uint8_t>((Low.TagValue | (High.TagValue << 8)) >> ShAmt)}; |
| 94 | } |
| 95 | |
| 96 | Byte lshr(uint8_t Shift) const { |
| 97 | return Byte{.ConcreteMask: static_cast<uint8_t>(ConcreteMask >> Shift), |
| 98 | .Value: static_cast<uint8_t>(Value >> Shift), |
| 99 | .TagMask: static_cast<uint8_t>(TagMask >> Shift), |
| 100 | .TagValue: static_cast<uint8_t>(TagValue >> Shift)}; |
| 101 | } |
| 102 | }; |
| 103 | |
| 104 | // TODO: Byte |
| 105 | enum class StorageKind { |
| 106 | Integer, |
| 107 | Float, |
| 108 | Pointer, |
| 109 | Poison, |
| 110 | None, // Placeholder for void type |
| 111 | Aggregate, // Struct, Array or Vector |
| 112 | }; |
| 113 | |
| 114 | /// Tri-state boolean value. |
| 115 | enum class BooleanKind { False, True, Poison }; |
| 116 | |
| 117 | /// A set of previously exposed provenances. It is originally yielded by |
| 118 | /// inttoptr, and shared by pointers derived from the result. |
| 119 | /// |
| 120 | /// Each capability check may invalidate some provenances. If we cannot |
| 121 | /// pick one, it is UB. That is, from the angelic non-determinism view, |
| 122 | /// we cannot pick a provenance to make the program reach this point. |
| 123 | /// |
| 124 | /// For efficiency, this class has different forms in two stages: |
| 125 | /// 1. Before any memory access is performed, ActiveMask is set to zero and |
| 126 | /// Generation represents the global generation number of the snapshot. |
| 127 | /// 2. After a memory access is performed, we can determine exactly one memory |
| 128 | /// object to be accessed (address ranges are distinct). In this case, |
| 129 | /// BaseAddress is set and ActiveMask is non-zero. ActiveMask represents the |
| 130 | /// validity of the first N exposed provenances associated with the memory |
| 131 | /// object. The bitwidth N is the number of provenances in the list with |
| 132 | /// List[I].Generation <= WildcardProvenance::Generation (The generation field |
| 133 | /// in the list is monotonically increasing). That is, we can only access |
| 134 | /// through exposed provenances before inttoptr executes. Note that if |
| 135 | /// ActiveMask becomes zero again, UB must be triggered. |
| 136 | class WildcardProvenance : public RefCountedBase<WildcardProvenance> { |
| 137 | APInt ActiveMask; |
| 138 | union { |
| 139 | uint64_t Generation; |
| 140 | uint64_t BaseAddress; |
| 141 | }; |
| 142 | |
| 143 | friend class Context; |
| 144 | |
| 145 | public: |
| 146 | explicit WildcardProvenance(uint64_t Generation) |
| 147 | : ActiveMask(), Generation(Generation) {} |
| 148 | }; |
| 149 | |
| 150 | /// Components of a pointer excluding address. They are shared between pointer |
| 151 | /// values, as most of operations don't change the provenance. |
| 152 | /// Each node will be assigned a unique, pointer-sized tag, which is used to |
| 153 | /// represent the pointer in the memory. |
| 154 | /// The provenance can be either concrete or wildcard, as determined by the |
| 155 | /// cases below: |
| 156 | /// Obj Wildcard State |
| 157 | /// Null Null Invalid |
| 158 | /// Null NonNull Wildcard |
| 159 | /// NonNull Null Concrete |
| 160 | /// NonNull NonNull Wildcard (associated with a specific MO) |
| 161 | class Provenance : public RefCountedBase<Provenance> { |
| 162 | // TODO: store reference to the provenance of the pointer it is derived from |
| 163 | |
| 164 | // The underlying memory object. It can be null for invalid or dangling |
| 165 | // pointers. Besides, for pointers with wildcard provenance, it can be null |
| 166 | // until the memory object is resolved by gep inbounds. |
| 167 | IntrusiveRefCntPtr<MemoryObject> Obj; |
| 168 | |
| 169 | // A tag is a randomly generated unique identifier to recover the provenance |
| 170 | // of a pointer. The length of tag is equal to the store size of the pointer |
| 171 | // type, in bits. It may produce false negatives in some corner cases. But in |
| 172 | // real practice the false negative rate should be negligible. |
| 173 | // A zero tag is invalid. |
| 174 | APInt Tag; |
| 175 | |
| 176 | // Null if it is concrete. |
| 177 | IntrusiveRefCntPtr<WildcardProvenance> Wildcard; |
| 178 | |
| 179 | // TODO: modeling nofree |
| 180 | // TODO: modeling captures |
| 181 | // TODO: modeling inrange(Start, End) attribute |
| 182 | |
| 183 | const APInt &getTag() const { return Tag; } |
| 184 | void setTag(const APInt &T) { Tag = T; } |
| 185 | |
| 186 | friend class Context; |
| 187 | |
| 188 | public: |
| 189 | Provenance(IntrusiveRefCntPtr<MemoryObject> Obj) : Obj(std::move(Obj)) {} |
| 190 | static IntrusiveRefCntPtr<Provenance> nullary(); |
| 191 | IntrusiveRefCntPtr<Provenance> getWithKnownMemoryObject(MemoryObject &Obj); |
| 192 | MemoryObject *getMemoryObject() const { return Obj.get(); } |
| 193 | bool isWildcard() const { return Wildcard != nullptr; } |
| 194 | }; |
| 195 | |
| 196 | class Pointer { |
| 197 | // The provenance of the pointer. |
| 198 | IntrusiveRefCntPtr<Provenance> Prov; |
| 199 | // The address of the pointer. The bit width is determined by |
| 200 | // DataLayout::getPointerSizeInBits. |
| 201 | APInt Address; |
| 202 | |
| 203 | public: |
| 204 | explicit Pointer(const APInt &Address) |
| 205 | : Prov(Provenance::nullary()), Address(Address) {} |
| 206 | explicit Pointer(IntrusiveRefCntPtr<Provenance> Prov, const APInt &Address) |
| 207 | : Prov(std::move(Prov)), Address(Address) { |
| 208 | assert(this->Prov && "Invalid provenance." ); |
| 209 | } |
| 210 | Pointer getWithNewAddr(const APInt &NewAddr) const { |
| 211 | return Pointer(Prov, NewAddr); |
| 212 | } |
| 213 | Pointer getWithNewProvenance(IntrusiveRefCntPtr<Provenance> NewProv) const { |
| 214 | return Pointer(NewProv, Address); |
| 215 | } |
| 216 | static AnyValue null(unsigned AS, const DataLayout &DL); |
| 217 | bool isNullPtr(unsigned AS, const DataLayout &DL) const; |
| 218 | void print(raw_ostream &OS) const; |
| 219 | const APInt &address() const { return Address; } |
| 220 | Provenance &provenance() const { return *Prov; } |
| 221 | }; |
| 222 | |
| 223 | // Value representation for actual values of LLVM values. |
| 224 | // We don't model undef values here (except for byte types). |
| 225 | class [[nodiscard]] AnyValue { |
| 226 | StorageKind Kind; |
| 227 | union { |
| 228 | APInt IntVal; |
| 229 | APFloat FloatVal; |
| 230 | Pointer PtrVal; |
| 231 | std::vector<AnyValue> AggVal; |
| 232 | }; |
| 233 | |
| 234 | struct PoisonTag {}; |
| 235 | void destroy(); |
| 236 | |
| 237 | public: |
| 238 | AnyValue() : Kind(StorageKind::None) {} |
| 239 | explicit AnyValue(PoisonTag) : Kind(StorageKind::Poison) {} |
| 240 | AnyValue(APInt Val) : Kind(StorageKind::Integer), IntVal(std::move(Val)) {} |
| 241 | AnyValue(APFloat Val) : Kind(StorageKind::Float), FloatVal(std::move(Val)) {} |
| 242 | AnyValue(Pointer Val) : Kind(StorageKind::Pointer), PtrVal(std::move(Val)) {} |
| 243 | AnyValue(std::vector<AnyValue> Val) |
| 244 | : Kind(StorageKind::Aggregate), AggVal(std::move(Val)) {} |
| 245 | AnyValue(const AnyValue &Other); |
| 246 | AnyValue(AnyValue &&Other); |
| 247 | AnyValue &operator=(const AnyValue &); |
| 248 | AnyValue &operator=(AnyValue &&); |
| 249 | ~AnyValue() { destroy(); } |
| 250 | |
| 251 | void print(raw_ostream &OS) const; |
| 252 | |
| 253 | static AnyValue poison() { return AnyValue(PoisonTag{}); } |
| 254 | static AnyValue boolean(bool Val) { return AnyValue(APInt(1, Val)); } |
| 255 | static AnyValue getPoisonValue(Context &Ctx, Type *Ty); |
| 256 | static AnyValue getNullValue(Context &Ctx, Type *Ty); |
| 257 | static AnyValue getVectorSplat(const AnyValue &Scalar, size_t NumElements); |
| 258 | |
| 259 | bool isNone() const { return Kind == StorageKind::None; } |
| 260 | bool isPoison() const { return Kind == StorageKind::Poison; } |
| 261 | bool isInteger() const { return Kind == StorageKind::Integer; } |
| 262 | bool isFloat() const { return Kind == StorageKind::Float; } |
| 263 | bool isPointer() const { return Kind == StorageKind::Pointer; } |
| 264 | bool isAggregate() const { return Kind == StorageKind::Aggregate; } |
| 265 | |
| 266 | bool isCompatibleWith(Type *Ty) const { |
| 267 | switch (Kind) { |
| 268 | case StorageKind::None: |
| 269 | return Ty->isVoidTy(); |
| 270 | case StorageKind::Poison: |
| 271 | return Ty->isFloatingPointTy() || Ty->isIntegerTy() || Ty->isPointerTy(); |
| 272 | case StorageKind::Integer: |
| 273 | return Ty->isIntegerTy(); |
| 274 | case StorageKind::Float: |
| 275 | return Ty->isFloatingPointTy(); |
| 276 | case StorageKind::Pointer: |
| 277 | return Ty->isPointerTy(); |
| 278 | // We don't check elements recursively. |
| 279 | case StorageKind::Aggregate: |
| 280 | return Ty->isAggregateType() || Ty->isVectorTy(); |
| 281 | } |
| 282 | llvm_unreachable("Unhandled storage kind." ); |
| 283 | } |
| 284 | |
| 285 | const APInt &asInteger() const { |
| 286 | assert(Kind == StorageKind::Integer && "Expect an integer value" ); |
| 287 | return IntVal; |
| 288 | } |
| 289 | |
| 290 | const APFloat &asFloat() const { |
| 291 | assert(Kind == StorageKind::Float && "Expect a float value" ); |
| 292 | return FloatVal; |
| 293 | } |
| 294 | |
| 295 | const Pointer &asPointer() const { |
| 296 | assert(Kind == StorageKind::Pointer && "Expect a pointer value" ); |
| 297 | return PtrVal; |
| 298 | } |
| 299 | |
| 300 | const std::vector<AnyValue> &asAggregate() const { |
| 301 | assert(Kind == StorageKind::Aggregate && |
| 302 | "Expect an aggregate/vector value" ); |
| 303 | return AggVal; |
| 304 | } |
| 305 | |
| 306 | std::vector<AnyValue> &asAggregate() { |
| 307 | assert(Kind == StorageKind::Aggregate && |
| 308 | "Expect an aggregate/vector value" ); |
| 309 | return AggVal; |
| 310 | } |
| 311 | |
| 312 | // Helper function for C++ 17 structured bindings. |
| 313 | template <size_t I> const AnyValue &get() const { |
| 314 | assert(Kind == StorageKind::Aggregate && |
| 315 | "Expect an aggregate/vector value" ); |
| 316 | assert(I < AggVal.size() && "Index out of bounds" ); |
| 317 | return AggVal[I]; |
| 318 | } |
| 319 | |
| 320 | BooleanKind asBoolean() const { |
| 321 | if (isPoison()) |
| 322 | return BooleanKind::Poison; |
| 323 | return asInteger().isZero() ? BooleanKind::False : BooleanKind::True; |
| 324 | } |
| 325 | }; |
| 326 | |
| 327 | inline raw_ostream &operator<<(raw_ostream &OS, const AnyValue &V) { |
| 328 | V.print(OS); |
| 329 | return OS; |
| 330 | } |
| 331 | |
| 332 | inline raw_ostream &operator<<(raw_ostream &OS, const Pointer &P) { |
| 333 | P.print(OS); |
| 334 | return OS; |
| 335 | } |
| 336 | |
| 337 | inline AnyValue addNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW, |
| 338 | bool HasNUW) { |
| 339 | APInt Res = LHS + RHS; |
| 340 | if (HasNUW && Res.ult(RHS)) |
| 341 | return AnyValue::poison(); |
| 342 | if (HasNSW && LHS.isNonNegative() == RHS.isNonNegative() && |
| 343 | LHS.isNonNegative() != Res.isNonNegative()) |
| 344 | return AnyValue::poison(); |
| 345 | return Res; |
| 346 | } |
| 347 | |
| 348 | inline AnyValue subNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW, |
| 349 | bool HasNUW) { |
| 350 | APInt Res = LHS - RHS; |
| 351 | if (HasNUW && Res.ugt(RHS: LHS)) |
| 352 | return AnyValue::poison(); |
| 353 | if (HasNSW && LHS.isNonNegative() != RHS.isNonNegative() && |
| 354 | LHS.isNonNegative() != Res.isNonNegative()) |
| 355 | return AnyValue::poison(); |
| 356 | return Res; |
| 357 | } |
| 358 | |
| 359 | inline AnyValue mulNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW, |
| 360 | bool HasNUW) { |
| 361 | bool Overflow = false; |
| 362 | APInt Res = LHS.smul_ov(RHS, Overflow); |
| 363 | if (HasNSW && Overflow) |
| 364 | return AnyValue::poison(); |
| 365 | if (HasNUW) { |
| 366 | (void)LHS.umul_ov(RHS, Overflow); |
| 367 | if (Overflow) |
| 368 | return AnyValue::poison(); |
| 369 | } |
| 370 | return Res; |
| 371 | } |
| 372 | |
| 373 | } // namespace llvm::ubi |
| 374 | |
| 375 | #endif |
| 376 | |