1// SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
14#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
15#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
16#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
18#include <optional>
19
20using namespace clang;
21using namespace ento;
22
23namespace {
24class SimpleSValBuilder : public SValBuilder {
25
26 // Query the constraint manager whether the SVal has only one possible
27 // (integer) value. If that is the case, the value is returned. Otherwise,
28 // returns NULL.
29 // This is an implementation detail. Checkers should use `getKnownValue()`
30 // instead.
31 static const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
32
33 // Helper function that returns the value stored in a nonloc::ConcreteInt or
34 // loc::ConcreteInt.
35 static const llvm::APSInt *getConcreteValue(SVal V);
36
37 // With one `simplifySValOnce` call, a compound symbols might collapse to
38 // simpler symbol tree that is still possible to further simplify. Thus, we
39 // do the simplification on a new symbol tree until we reach the simplest
40 // form, i.e. the fixpoint.
41 // Consider the following symbol `(b * b) * b * b` which has this tree:
42 // *
43 // / \
44 // * b
45 // / \
46 // / b
47 // (b * b)
48 // Now, if the `b * b == 1` new constraint is added then during the first
49 // iteration we have the following transformations:
50 // * *
51 // / \ / \
52 // * b --> b b
53 // / \
54 // / b
55 // 1
56 // We need another iteration to reach the final result `1`.
57 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
58
59 // Recursively descends into symbolic expressions and replaces symbols
60 // with their known values (in the sense of the getConstValue() method).
61 // We traverse the symbol tree and query the constraint values for the
62 // sub-trees and if a value is a constant we do the constant folding.
63 SVal simplifySValOnce(ProgramStateRef State, SVal V);
64
65public:
66 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
67 ProgramStateManager &stateMgr)
68 : SValBuilder(alloc, context, stateMgr) {}
69 ~SimpleSValBuilder() override {}
70
71 SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
72 NonLoc lhs, NonLoc rhs, QualType resultTy) override;
73 SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
74 Loc lhs, Loc rhs, QualType resultTy) override;
75 SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
76 Loc lhs, NonLoc rhs, QualType resultTy) override;
77
78 /// Evaluates a given SVal by recursively evaluating and
79 /// simplifying the children SVals. If the SVal has only one possible
80 /// (integer) value, that value is returned. Otherwise, returns NULL.
81 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
82
83 /// Evaluates a given SVal by recursively evaluating and simplifying the
84 /// children SVals, then returns its minimal possible (integer) value. If the
85 /// constraint manager cannot provide a meaningful answer, this returns NULL.
86 const llvm::APSInt *getMinValue(ProgramStateRef state, SVal V) override;
87
88 /// Evaluates a given SVal by recursively evaluating and simplifying the
89 /// children SVals, then returns its maximal possible (integer) value. If the
90 /// constraint manager cannot provide a meaningful answer, this returns NULL.
91 const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal V) override;
92
93 SVal simplifySVal(ProgramStateRef State, SVal V) override;
94
95 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
96 const llvm::APSInt &RHS, QualType resultTy);
97};
98} // end anonymous namespace
99
100SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
101 ASTContext &context,
102 ProgramStateManager &stateMgr) {
103 return new SimpleSValBuilder(alloc, context, stateMgr);
104}
105
106// Checks if the negation the value and flipping sign preserve
107// the semantics on the operation in the resultType
108static bool isNegationValuePreserving(const llvm::APSInt &Value,
109 APSIntType ResultType) {
110 const unsigned ValueBits = Value.getSignificantBits();
111 if (ValueBits == ResultType.getBitWidth()) {
112 // The value is the lowest negative value that is representable
113 // in signed integer with bitWith of result type. The
114 // negation is representable if resultType is unsigned.
115 return ResultType.isUnsigned();
116 }
117
118 // If resultType bitWith is higher that number of bits required
119 // to represent RHS, the sign flip produce same value.
120 return ValueBits < ResultType.getBitWidth();
121}
122
123//===----------------------------------------------------------------------===//
124// Transfer function for binary operators.
125//===----------------------------------------------------------------------===//
126
127SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
128 BinaryOperator::Opcode op,
129 const llvm::APSInt &RHS,
130 QualType resultTy) {
131 bool isIdempotent = false;
132
133 // Check for a few special cases with known reductions first.
134 switch (op) {
135 default:
136 // We can't reduce this case; just treat it normally.
137 break;
138 case BO_Mul:
139 // a*0 and a*1
140 if (RHS == 0)
141 return makeIntVal(integer: 0, type: resultTy);
142 else if (RHS == 1)
143 isIdempotent = true;
144 break;
145 case BO_Div:
146 // a/0 and a/1
147 if (RHS == 0)
148 // This is also handled elsewhere.
149 return UndefinedVal();
150 else if (RHS == 1)
151 isIdempotent = true;
152 break;
153 case BO_Rem:
154 // a%0 and a%1
155 if (RHS == 0)
156 // This is also handled elsewhere.
157 return UndefinedVal();
158 else if (RHS == 1)
159 return makeIntVal(integer: 0, type: resultTy);
160 break;
161 case BO_Add:
162 case BO_Sub:
163 case BO_Shl:
164 case BO_Shr:
165 case BO_Xor:
166 // a+0, a-0, a<<0, a>>0, a^0
167 if (RHS == 0)
168 isIdempotent = true;
169 break;
170 case BO_And:
171 // a&0 and a&(~0)
172 if (RHS == 0)
173 return makeIntVal(integer: 0, type: resultTy);
174 else if (RHS.isAllOnes())
175 isIdempotent = true;
176 break;
177 case BO_Or:
178 // a|0 and a|(~0)
179 if (RHS == 0)
180 isIdempotent = true;
181 else if (RHS.isAllOnes()) {
182 const llvm::APSInt &Result = BasicVals.Convert(T: resultTy, From: RHS);
183 return nonloc::ConcreteInt(Result);
184 }
185 break;
186 }
187
188 // Idempotent ops (like a*1) can still change the type of an expression.
189 // Wrap the LHS up in a NonLoc again and let evalCast do the
190 // dirty work.
191 if (isIdempotent)
192 return evalCast(V: nonloc::SymbolVal(LHS), CastTy: resultTy, OriginalTy: QualType{});
193
194 // If we reach this point, the expression cannot be simplified.
195 // Make a SymbolVal for the entire expression, after converting the RHS.
196 const llvm::APSInt *ConvertedRHS = &RHS;
197 if (BinaryOperator::isComparisonOp(Opc: op)) {
198 // We're looking for a type big enough to compare the symbolic value
199 // with the given constant.
200 // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
201 ASTContext &Ctx = getContext();
202 QualType SymbolType = LHS->getType();
203 uint64_t ValWidth = RHS.getBitWidth();
204 uint64_t TypeWidth = Ctx.getTypeSize(T: SymbolType);
205
206 if (ValWidth < TypeWidth) {
207 // If the value is too small, extend it.
208 ConvertedRHS = &BasicVals.Convert(T: SymbolType, From: RHS);
209 } else if (ValWidth == TypeWidth) {
210 // If the value is signed but the symbol is unsigned, do the comparison
211 // in unsigned space. [C99 6.3.1.8]
212 // (For the opposite case, the value is already unsigned.)
213 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
214 ConvertedRHS = &BasicVals.Convert(T: SymbolType, From: RHS);
215 }
216 } else if (BinaryOperator::isAdditiveOp(Opc: op) && RHS.isNegative()) {
217 // Change a+(-N) into a-N, and a-(-N) into a+N
218 // Adjust addition/subtraction of negative value, to
219 // subtraction/addition of the negated value.
220 APSIntType resultIntTy = BasicVals.getAPSIntType(T: resultTy);
221 if (isNegationValuePreserving(Value: RHS, ResultType: resultIntTy)) {
222 ConvertedRHS = &BasicVals.getValue(X: -resultIntTy.convert(Value: RHS));
223 op = (op == BO_Add) ? BO_Sub : BO_Add;
224 } else {
225 ConvertedRHS = &BasicVals.Convert(T: resultTy, From: RHS);
226 }
227 } else
228 ConvertedRHS = &BasicVals.Convert(T: resultTy, From: RHS);
229
230 return makeNonLoc(lhs: LHS, op, rhs: *ConvertedRHS, type: resultTy);
231}
232
233// See if Sym is known to be a relation Rel with Bound.
234static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym,
235 llvm::APSInt Bound, ProgramStateRef State) {
236 SValBuilder &SVB = State->getStateManager().getSValBuilder();
237 SVal Result =
238 SVB.evalBinOpNN(state: State, op: Rel, lhs: nonloc::SymbolVal(Sym),
239 rhs: nonloc::ConcreteInt(Bound), resultTy: SVB.getConditionType());
240 if (auto DV = Result.getAs<DefinedSVal>()) {
241 return !State->assume(Cond: *DV, Assumption: false);
242 }
243 return false;
244}
245
246// See if Sym is known to be within [min/4, max/4], where min and max
247// are the bounds of the symbol's integral type. With such symbols,
248// some manipulations can be performed without the risk of overflow.
249// assume() doesn't cause infinite recursion because we should be dealing
250// with simpler symbols on every recursive call.
251static bool isWithinConstantOverflowBounds(SymbolRef Sym,
252 ProgramStateRef State) {
253 SValBuilder &SVB = State->getStateManager().getSValBuilder();
254 BasicValueFactory &BV = SVB.getBasicValueFactory();
255
256 QualType T = Sym->getType();
257 assert(T->isSignedIntegerOrEnumerationType() &&
258 "This only works with signed integers!");
259 APSIntType AT = BV.getAPSIntType(T);
260
261 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(RawValue: 4), Min = -Max;
262 return isInRelation(Rel: BO_LE, Sym, Bound: Max, State) &&
263 isInRelation(Rel: BO_GE, Sym, Bound: Min, State);
264}
265
266// Same for the concrete integers: see if I is within [min/4, max/4].
267static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
268 APSIntType AT(I);
269 assert(!AT.isUnsigned() &&
270 "This only works with signed integers!");
271
272 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(RawValue: 4), Min = -Max;
273 return (I <= Max) && (I >= -Max);
274}
275
276static std::pair<SymbolRef, llvm::APSInt>
277decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) {
278 if (const auto *SymInt = dyn_cast<SymIntExpr>(Val: Sym))
279 if (BinaryOperator::isAdditiveOp(Opc: SymInt->getOpcode()))
280 return std::make_pair(x: SymInt->getLHS(),
281 y: (SymInt->getOpcode() == BO_Add) ?
282 (SymInt->getRHS()) :
283 (-SymInt->getRHS()));
284
285 // Fail to decompose: "reduce" the problem to the "$x + 0" case.
286 return std::make_pair(x&: Sym, y: BV.getValue(X: 0, T: Sym->getType()));
287}
288
289// Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
290// same signed integral type and no overflows occur (which should be checked
291// by the caller).
292static NonLoc doRearrangeUnchecked(ProgramStateRef State,
293 BinaryOperator::Opcode Op,
294 SymbolRef LSym, llvm::APSInt LInt,
295 SymbolRef RSym, llvm::APSInt RInt) {
296 SValBuilder &SVB = State->getStateManager().getSValBuilder();
297 BasicValueFactory &BV = SVB.getBasicValueFactory();
298 SymbolManager &SymMgr = SVB.getSymbolManager();
299
300 QualType SymTy = LSym->getType();
301 assert(SymTy == RSym->getType() &&
302 "Symbols are not of the same type!");
303 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
304 "Integers are not of the same type as symbols!");
305 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
306 "Integers are not of the same type as symbols!");
307
308 QualType ResultTy;
309 if (BinaryOperator::isComparisonOp(Opc: Op))
310 ResultTy = SVB.getConditionType();
311 else if (BinaryOperator::isAdditiveOp(Opc: Op))
312 ResultTy = SymTy;
313 else
314 llvm_unreachable("Operation not suitable for unchecked rearrangement!");
315
316 if (LSym == RSym)
317 return SVB.evalBinOpNN(state: State, op: Op, lhs: nonloc::ConcreteInt(LInt),
318 rhs: nonloc::ConcreteInt(RInt), resultTy: ResultTy)
319 .castAs<NonLoc>();
320
321 SymbolRef ResultSym = nullptr;
322 BinaryOperator::Opcode ResultOp;
323 llvm::APSInt ResultInt;
324 if (BinaryOperator::isComparisonOp(Opc: Op)) {
325 // Prefer comparing to a non-negative number.
326 // FIXME: Maybe it'd be better to have consistency in
327 // "$x - $y" vs. "$y - $x" because those are solver's keys.
328 if (LInt > RInt) {
329 ResultSym = SymMgr.getSymSymExpr(lhs: RSym, op: BO_Sub, rhs: LSym, t: SymTy);
330 ResultOp = BinaryOperator::reverseComparisonOp(Opc: Op);
331 ResultInt = LInt - RInt; // Opposite order!
332 } else {
333 ResultSym = SymMgr.getSymSymExpr(lhs: LSym, op: BO_Sub, rhs: RSym, t: SymTy);
334 ResultOp = Op;
335 ResultInt = RInt - LInt; // Opposite order!
336 }
337 } else {
338 ResultSym = SymMgr.getSymSymExpr(lhs: LSym, op: Op, rhs: RSym, t: SymTy);
339 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
340 ResultOp = BO_Add;
341 // Bring back the cosmetic difference.
342 if (ResultInt < 0) {
343 ResultInt = -ResultInt;
344 ResultOp = BO_Sub;
345 } else if (ResultInt == 0) {
346 // Shortcut: Simplify "$x + 0" to "$x".
347 return nonloc::SymbolVal(ResultSym);
348 }
349 }
350 const llvm::APSInt &PersistentResultInt = BV.getValue(X: ResultInt);
351 return nonloc::SymbolVal(
352 SymMgr.getSymIntExpr(lhs: ResultSym, op: ResultOp, rhs: PersistentResultInt, t: ResultTy));
353}
354
355// Rearrange if symbol type matches the result type and if the operator is a
356// comparison operator, both symbol and constant must be within constant
357// overflow bounds.
358static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op,
359 SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
360 return Sym->getType() == Ty &&
361 (!BinaryOperator::isComparisonOp(Opc: Op) ||
362 (isWithinConstantOverflowBounds(Sym, State) &&
363 isWithinConstantOverflowBounds(I: Int)));
364}
365
366static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
367 BinaryOperator::Opcode Op, NonLoc Lhs,
368 NonLoc Rhs, QualType ResultTy) {
369 ProgramStateManager &StateMgr = State->getStateManager();
370 SValBuilder &SVB = StateMgr.getSValBuilder();
371
372 // We expect everything to be of the same type - this type.
373 QualType SingleTy;
374
375 // FIXME: After putting complexity threshold to the symbols we can always
376 // rearrange additive operations but rearrange comparisons only if
377 // option is set.
378 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
379 return std::nullopt;
380
381 SymbolRef LSym = Lhs.getAsSymbol();
382 if (!LSym)
383 return std::nullopt;
384
385 if (BinaryOperator::isComparisonOp(Opc: Op)) {
386 SingleTy = LSym->getType();
387 if (ResultTy != SVB.getConditionType())
388 return std::nullopt;
389 // Initialize SingleTy later with a symbol's type.
390 } else if (BinaryOperator::isAdditiveOp(Opc: Op)) {
391 SingleTy = ResultTy;
392 if (LSym->getType() != SingleTy)
393 return std::nullopt;
394 } else {
395 // Don't rearrange other operations.
396 return std::nullopt;
397 }
398
399 assert(!SingleTy.isNull() && "We should have figured out the type by now!");
400
401 // Rearrange signed symbolic expressions only
402 if (!SingleTy->isSignedIntegerOrEnumerationType())
403 return std::nullopt;
404
405 SymbolRef RSym = Rhs.getAsSymbol();
406 if (!RSym || RSym->getType() != SingleTy)
407 return std::nullopt;
408
409 BasicValueFactory &BV = State->getBasicVals();
410 llvm::APSInt LInt, RInt;
411 std::tie(args&: LSym, args&: LInt) = decomposeSymbol(Sym: LSym, BV);
412 std::tie(args&: RSym, args&: RInt) = decomposeSymbol(Sym: RSym, BV);
413 if (!shouldRearrange(State, Op, Sym: LSym, Int: LInt, Ty: SingleTy) ||
414 !shouldRearrange(State, Op, Sym: RSym, Int: RInt, Ty: SingleTy))
415 return std::nullopt;
416
417 // We know that no overflows can occur anymore.
418 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
419}
420
421SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
422 BinaryOperator::Opcode op,
423 NonLoc lhs, NonLoc rhs,
424 QualType resultTy) {
425 NonLoc InputLHS = lhs;
426 NonLoc InputRHS = rhs;
427
428 // Constraints may have changed since the creation of a bound SVal. Check if
429 // the values can be simplified based on those new constraints.
430 SVal simplifiedLhs = simplifySVal(State: state, V: lhs);
431 SVal simplifiedRhs = simplifySVal(State: state, V: rhs);
432 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
433 lhs = *simplifiedLhsAsNonLoc;
434 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
435 rhs = *simplifiedRhsAsNonLoc;
436
437 // Handle trivial case where left-side and right-side are the same.
438 if (lhs == rhs)
439 switch (op) {
440 default:
441 break;
442 case BO_EQ:
443 case BO_LE:
444 case BO_GE:
445 return makeTruthVal(b: true, type: resultTy);
446 case BO_LT:
447 case BO_GT:
448 case BO_NE:
449 return makeTruthVal(b: false, type: resultTy);
450 case BO_Xor:
451 case BO_Sub:
452 if (resultTy->isIntegralOrEnumerationType())
453 return makeIntVal(integer: 0, type: resultTy);
454 return evalCast(V: makeIntVal(integer: 0, /*isUnsigned=*/false), CastTy: resultTy,
455 OriginalTy: QualType{});
456 case BO_Or:
457 case BO_And:
458 return evalCast(V: lhs, CastTy: resultTy, OriginalTy: QualType{});
459 }
460
461 while (true) {
462 switch (lhs.getKind()) {
463 default:
464 return makeSymExprValNN(op, lhs, rhs, resultTy);
465 case nonloc::PointerToMemberKind: {
466 assert(rhs.getKind() == nonloc::PointerToMemberKind &&
467 "Both SVals should have pointer-to-member-type");
468 auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
469 RPTM = rhs.castAs<nonloc::PointerToMember>();
470 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
471 switch (op) {
472 case BO_EQ:
473 return makeTruthVal(b: LPTMD == RPTMD, type: resultTy);
474 case BO_NE:
475 return makeTruthVal(b: LPTMD != RPTMD, type: resultTy);
476 default:
477 return UnknownVal();
478 }
479 }
480 case nonloc::LocAsIntegerKind: {
481 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
482 switch (rhs.getKind()) {
483 case nonloc::LocAsIntegerKind:
484 // FIXME: at the moment the implementation
485 // of modeling "pointers as integers" is not complete.
486 if (!BinaryOperator::isComparisonOp(Opc: op))
487 return UnknownVal();
488 return evalBinOpLL(state, op, lhs: lhsL,
489 rhs: rhs.castAs<nonloc::LocAsInteger>().getLoc(),
490 resultTy);
491 case nonloc::ConcreteIntKind: {
492 // FIXME: at the moment the implementation
493 // of modeling "pointers as integers" is not complete.
494 if (!BinaryOperator::isComparisonOp(Opc: op))
495 return UnknownVal();
496 // Transform the integer into a location and compare.
497 // FIXME: This only makes sense for comparisons. If we want to, say,
498 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
499 // then pack it back into a LocAsInteger.
500 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
501 // If the region has a symbolic base, pay attention to the type; it
502 // might be coming from a non-default address space. For non-symbolic
503 // regions it doesn't matter that much because such comparisons would
504 // most likely evaluate to concrete false anyway. FIXME: We might
505 // still need to handle the non-comparison case.
506 if (SymbolRef lSym = lhs.getAsLocSymbol(IncludeBaseRegions: true))
507 BasicVals.getAPSIntType(T: lSym->getType()).apply(Value&: i);
508 else
509 BasicVals.getAPSIntType(T: Context.VoidPtrTy).apply(Value&: i);
510 return evalBinOpLL(state, op, lhs: lhsL, rhs: makeLoc(integer: i), resultTy);
511 }
512 default:
513 switch (op) {
514 case BO_EQ:
515 return makeTruthVal(b: false, type: resultTy);
516 case BO_NE:
517 return makeTruthVal(b: true, type: resultTy);
518 default:
519 // This case also handles pointer arithmetic.
520 return makeSymExprValNN(op, lhs: InputLHS, rhs: InputRHS, resultTy);
521 }
522 }
523 }
524 case nonloc::ConcreteIntKind: {
525 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
526
527 // If we're dealing with two known constants, just perform the operation.
528 if (const llvm::APSInt *KnownRHSValue = getConstValue(state, V: rhs)) {
529 llvm::APSInt RHSValue = *KnownRHSValue;
530 if (BinaryOperator::isComparisonOp(Opc: op)) {
531 // We're looking for a type big enough to compare the two values.
532 // FIXME: This is not correct. char + short will result in a promotion
533 // to int. Unfortunately we have lost types by this point.
534 APSIntType CompareType = std::max(a: APSIntType(LHSValue),
535 b: APSIntType(RHSValue));
536 CompareType.apply(Value&: LHSValue);
537 CompareType.apply(Value&: RHSValue);
538 } else if (!BinaryOperator::isShiftOp(Opc: op)) {
539 APSIntType IntType = BasicVals.getAPSIntType(T: resultTy);
540 IntType.apply(Value&: LHSValue);
541 IntType.apply(Value&: RHSValue);
542 }
543
544 const llvm::APSInt *Result =
545 BasicVals.evalAPSInt(Op: op, V1: LHSValue, V2: RHSValue);
546 if (!Result) {
547 if (op == BO_Shl || op == BO_Shr) {
548 // FIXME: At this point the constant folding claims that the result
549 // of a bitwise shift is undefined. However, constant folding
550 // relies on the inaccurate type information that is stored in the
551 // bit size of APSInt objects, and if we reached this point, then
552 // the checker core.BitwiseShift already determined that the shift
553 // is valid (in a PreStmt callback, by querying the real type from
554 // the AST node).
555 // To avoid embarrassing false positives, let's just say that we
556 // don't know anything about the result of the shift.
557 return UnknownVal();
558 }
559 return UndefinedVal();
560 }
561
562 return nonloc::ConcreteInt(*Result);
563 }
564
565 // Swap the left and right sides and flip the operator if doing so
566 // allows us to better reason about the expression (this is a form
567 // of expression canonicalization).
568 // While we're at it, catch some special cases for non-commutative ops.
569 switch (op) {
570 case BO_LT:
571 case BO_GT:
572 case BO_LE:
573 case BO_GE:
574 op = BinaryOperator::reverseComparisonOp(Opc: op);
575 [[fallthrough]];
576 case BO_EQ:
577 case BO_NE:
578 case BO_Add:
579 case BO_Mul:
580 case BO_And:
581 case BO_Xor:
582 case BO_Or:
583 std::swap(a&: lhs, b&: rhs);
584 continue;
585 case BO_Shr:
586 // (~0)>>a
587 if (LHSValue.isAllOnes() && LHSValue.isSigned())
588 return evalCast(V: lhs, CastTy: resultTy, OriginalTy: QualType{});
589 [[fallthrough]];
590 case BO_Shl:
591 // 0<<a and 0>>a
592 if (LHSValue == 0)
593 return evalCast(V: lhs, CastTy: resultTy, OriginalTy: QualType{});
594 return makeSymExprValNN(op, lhs: InputLHS, rhs: InputRHS, resultTy);
595 case BO_Div:
596 // 0 / x == 0
597 case BO_Rem:
598 // 0 % x == 0
599 if (LHSValue == 0)
600 return makeZeroVal(type: resultTy);
601 [[fallthrough]];
602 default:
603 return makeSymExprValNN(op, lhs: InputLHS, rhs: InputRHS, resultTy);
604 }
605 }
606 case nonloc::SymbolValKind: {
607 // We only handle LHS as simple symbols or SymIntExprs.
608 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
609
610 // LHS is a symbolic expression.
611 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Val: Sym)) {
612
613 // Is this a logical not? (!x is represented as x == 0.)
614 if (op == BO_EQ && rhs.isZeroConstant()) {
615 // We know how to negate certain expressions. Simplify them here.
616
617 BinaryOperator::Opcode opc = symIntExpr->getOpcode();
618 switch (opc) {
619 default:
620 // We don't know how to negate this operation.
621 // Just handle it as if it were a normal comparison to 0.
622 break;
623 case BO_LAnd:
624 case BO_LOr:
625 llvm_unreachable("Logical operators handled by branching logic.");
626 case BO_Assign:
627 case BO_MulAssign:
628 case BO_DivAssign:
629 case BO_RemAssign:
630 case BO_AddAssign:
631 case BO_SubAssign:
632 case BO_ShlAssign:
633 case BO_ShrAssign:
634 case BO_AndAssign:
635 case BO_XorAssign:
636 case BO_OrAssign:
637 case BO_Comma:
638 llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
639 case BO_PtrMemD:
640 case BO_PtrMemI:
641 llvm_unreachable("Pointer arithmetic not handled here.");
642 case BO_LT:
643 case BO_GT:
644 case BO_LE:
645 case BO_GE:
646 case BO_EQ:
647 case BO_NE:
648 assert(resultTy->isBooleanType() ||
649 resultTy == getConditionType());
650 assert(symIntExpr->getType()->isBooleanType() ||
651 getContext().hasSameUnqualifiedType(symIntExpr->getType(),
652 getConditionType()));
653 // Negate the comparison and make a value.
654 opc = BinaryOperator::negateComparisonOp(Opc: opc);
655 return makeNonLoc(lhs: symIntExpr->getLHS(), op: opc,
656 rhs: symIntExpr->getRHS(), type: resultTy);
657 }
658 }
659
660 // For now, only handle expressions whose RHS is a constant.
661 if (const llvm::APSInt *RHSValue = getConstValue(state, V: rhs)) {
662 // If both the LHS and the current expression are additive,
663 // fold their constants and try again.
664 if (BinaryOperator::isAdditiveOp(Opc: op)) {
665 BinaryOperator::Opcode lop = symIntExpr->getOpcode();
666 if (BinaryOperator::isAdditiveOp(Opc: lop)) {
667 // Convert the two constants to a common type, then combine them.
668
669 // resultTy may not be the best type to convert to, but it's
670 // probably the best choice in expressions with mixed type
671 // (such as x+1U+2LL). The rules for implicit conversions should
672 // choose a reasonable type to preserve the expression, and will
673 // at least match how the value is going to be used.
674 APSIntType IntType = BasicVals.getAPSIntType(T: resultTy);
675 const llvm::APSInt &first = IntType.convert(Value: symIntExpr->getRHS());
676 const llvm::APSInt &second = IntType.convert(Value: *RHSValue);
677
678 // If the op and lop agrees, then we just need to
679 // sum the constants. Otherwise, we change to operation
680 // type if substraction would produce negative value
681 // (and cause overflow for unsigned integers),
682 // as consequence x+1U-10 produces x-9U, instead
683 // of x+4294967287U, that would be produced without this
684 // additional check.
685 const llvm::APSInt *newRHS;
686 if (lop == op) {
687 newRHS = BasicVals.evalAPSInt(Op: BO_Add, V1: first, V2: second);
688 } else if (first >= second) {
689 newRHS = BasicVals.evalAPSInt(Op: BO_Sub, V1: first, V2: second);
690 op = lop;
691 } else {
692 newRHS = BasicVals.evalAPSInt(Op: BO_Sub, V1: second, V2: first);
693 }
694
695 assert(newRHS && "Invalid operation despite common type!");
696 rhs = nonloc::ConcreteInt(*newRHS);
697 lhs = nonloc::SymbolVal(symIntExpr->getLHS());
698 continue;
699 }
700 }
701
702 // Otherwise, make a SymIntExpr out of the expression.
703 return MakeSymIntVal(LHS: symIntExpr, op, RHS: *RHSValue, resultTy);
704 }
705 }
706
707 // Is the RHS a constant?
708 if (const llvm::APSInt *RHSValue = getConstValue(state, V: rhs))
709 return MakeSymIntVal(LHS: Sym, op, RHS: *RHSValue, resultTy);
710
711 if (std::optional<NonLoc> V = tryRearrange(State: state, Op: op, Lhs: lhs, Rhs: rhs, ResultTy: resultTy))
712 return *V;
713
714 // Give up -- this is not a symbolic expression we can handle.
715 return makeSymExprValNN(op, lhs: InputLHS, rhs: InputRHS, resultTy);
716 }
717 }
718 }
719}
720
721static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR,
722 const FieldRegion *RightFR,
723 BinaryOperator::Opcode op,
724 QualType resultTy,
725 SimpleSValBuilder &SVB) {
726 // Only comparisons are meaningful here!
727 if (!BinaryOperator::isComparisonOp(Opc: op))
728 return UnknownVal();
729
730 // Next, see if the two FRs have the same super-region.
731 // FIXME: This doesn't handle casts yet, and simply stripping the casts
732 // doesn't help.
733 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
734 return UnknownVal();
735
736 const FieldDecl *LeftFD = LeftFR->getDecl();
737 const FieldDecl *RightFD = RightFR->getDecl();
738 const RecordDecl *RD = LeftFD->getParent();
739
740 // Make sure the two FRs are from the same kind of record. Just in case!
741 // FIXME: This is probably where inheritance would be a problem.
742 if (RD != RightFD->getParent())
743 return UnknownVal();
744
745 // We know for sure that the two fields are not the same, since that
746 // would have given us the same SVal.
747 if (op == BO_EQ)
748 return SVB.makeTruthVal(b: false, type: resultTy);
749 if (op == BO_NE)
750 return SVB.makeTruthVal(b: true, type: resultTy);
751
752 // Iterate through the fields and see which one comes first.
753 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
754 // members and the units in which bit-fields reside have addresses that
755 // increase in the order in which they are declared."
756 bool leftFirst = (op == BO_LT || op == BO_LE);
757 for (const auto *I : RD->fields()) {
758 if (I == LeftFD)
759 return SVB.makeTruthVal(b: leftFirst, type: resultTy);
760 if (I == RightFD)
761 return SVB.makeTruthVal(b: !leftFirst, type: resultTy);
762 }
763
764 llvm_unreachable("Fields not found in parent record's definition");
765}
766
767// This is used in debug builds only for now because some downstream users
768// may hit this assert in their subsequent merges.
769// There are still places in the analyzer where equal bitwidth Locs
770// are compared, and need to be found and corrected. Recent previous fixes have
771// addressed the known problems of making NULLs with specific bitwidths
772// for Loc comparisons along with deprecation of APIs for the same purpose.
773//
774static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
775 Loc LhsLoc) {
776 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
777 ASTContext &Ctx = State->getStateManager().getContext();
778 uint64_t RhsBitwidth =
779 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(T: RhsLoc.getType(Ctx));
780 uint64_t LhsBitwidth =
781 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(T: LhsLoc.getType(Ctx));
782 if (RhsBitwidth && LhsBitwidth && (LhsLoc.getKind() == RhsLoc.getKind())) {
783 assert(RhsBitwidth == LhsBitwidth &&
784 "RhsLoc and LhsLoc bitwidth must be same!");
785 }
786}
787
788// FIXME: all this logic will change if/when we have MemRegion::getLocation().
789SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
790 BinaryOperator::Opcode op,
791 Loc lhs, Loc rhs,
792 QualType resultTy) {
793
794 // Assert that bitwidth of lhs and rhs are the same.
795 // This can happen if two different address spaces are used,
796 // and the bitwidths of the address spaces are different.
797 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
798 // FIXME: See comment above in the function assertEqualBitWidths
799 assertEqualBitWidths(State: state, RhsLoc: rhs, LhsLoc: lhs);
800
801 // Only comparisons and subtractions are valid operations on two pointers.
802 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
803 // However, if a pointer is casted to an integer, evalBinOpNN may end up
804 // calling this function with another operation (PR7527). We don't attempt to
805 // model this for now, but it could be useful, particularly when the
806 // "location" is actually an integer value that's been passed through a void*.
807 if (!(BinaryOperator::isComparisonOp(Opc: op) || op == BO_Sub))
808 return UnknownVal();
809
810 // Special cases for when both sides are identical.
811 if (lhs == rhs) {
812 switch (op) {
813 default:
814 llvm_unreachable("Unimplemented operation for two identical values");
815 case BO_Sub:
816 return makeZeroVal(type: resultTy);
817 case BO_EQ:
818 case BO_LE:
819 case BO_GE:
820 return makeTruthVal(b: true, type: resultTy);
821 case BO_NE:
822 case BO_LT:
823 case BO_GT:
824 return makeTruthVal(b: false, type: resultTy);
825 }
826 }
827
828 switch (lhs.getKind()) {
829 default:
830 llvm_unreachable("Ordering not implemented for this Loc.");
831
832 case loc::GotoLabelKind:
833 // The only thing we know about labels is that they're non-null.
834 if (rhs.isZeroConstant()) {
835 switch (op) {
836 default:
837 break;
838 case BO_Sub:
839 return evalCast(V: lhs, CastTy: resultTy, OriginalTy: QualType{});
840 case BO_EQ:
841 case BO_LE:
842 case BO_LT:
843 return makeTruthVal(b: false, type: resultTy);
844 case BO_NE:
845 case BO_GT:
846 case BO_GE:
847 return makeTruthVal(b: true, type: resultTy);
848 }
849 }
850 // There may be two labels for the same location, and a function region may
851 // have the same address as a label at the start of the function (depending
852 // on the ABI).
853 // FIXME: we can probably do a comparison against other MemRegions, though.
854 // FIXME: is there a way to tell if two labels refer to the same location?
855 return UnknownVal();
856
857 case loc::ConcreteIntKind: {
858 auto L = lhs.castAs<loc::ConcreteInt>();
859
860 // If one of the operands is a symbol and the other is a constant,
861 // build an expression for use by the constraint manager.
862 if (SymbolRef rSym = rhs.getAsLocSymbol()) {
863 // We can only build expressions with symbols on the left,
864 // so we need a reversible operator.
865 if (!BinaryOperator::isComparisonOp(Opc: op) || op == BO_Cmp)
866 return UnknownVal();
867
868 op = BinaryOperator::reverseComparisonOp(Opc: op);
869 return makeNonLoc(lhs: rSym, op, rhs: L.getValue(), type: resultTy);
870 }
871
872 // If both operands are constants, just perform the operation.
873 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
874 assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
875
876 if (const auto *ResultInt =
877 BasicVals.evalAPSInt(Op: op, V1: L.getValue(), V2: rInt->getValue()))
878 return evalCast(V: nonloc::ConcreteInt(*ResultInt), CastTy: resultTy, OriginalTy: QualType{});
879 return UnknownVal();
880 }
881
882 // Special case comparisons against NULL.
883 // This must come after the test if the RHS is a symbol, which is used to
884 // build constraints. The address of any non-symbolic region is guaranteed
885 // to be non-NULL, as is any label.
886 assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
887 if (lhs.isZeroConstant()) {
888 switch (op) {
889 default:
890 break;
891 case BO_EQ:
892 case BO_GT:
893 case BO_GE:
894 return makeTruthVal(b: false, type: resultTy);
895 case BO_NE:
896 case BO_LT:
897 case BO_LE:
898 return makeTruthVal(b: true, type: resultTy);
899 }
900 }
901
902 // Comparing an arbitrary integer to a region or label address is
903 // completely unknowable.
904 return UnknownVal();
905 }
906 case loc::MemRegionValKind: {
907 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
908 // If one of the operands is a symbol and the other is a constant,
909 // build an expression for use by the constraint manager.
910 if (SymbolRef lSym = lhs.getAsLocSymbol(IncludeBaseRegions: true)) {
911 if (BinaryOperator::isComparisonOp(Opc: op))
912 return MakeSymIntVal(LHS: lSym, op, RHS: rInt->getValue(), resultTy);
913 return UnknownVal();
914 }
915 // Special case comparisons to NULL.
916 // This must come after the test if the LHS is a symbol, which is used to
917 // build constraints. The address of any non-symbolic region is guaranteed
918 // to be non-NULL.
919 if (rInt->isZeroConstant()) {
920 if (op == BO_Sub)
921 return evalCast(V: lhs, CastTy: resultTy, OriginalTy: QualType{});
922
923 if (BinaryOperator::isComparisonOp(Opc: op)) {
924 QualType boolType = getContext().BoolTy;
925 NonLoc l = evalCast(V: lhs, CastTy: boolType, OriginalTy: QualType{}).castAs<NonLoc>();
926 NonLoc r = makeTruthVal(b: false, type: boolType).castAs<NonLoc>();
927 return evalBinOpNN(state, op, lhs: l, rhs: r, resultTy);
928 }
929 }
930
931 // Comparing a region to an arbitrary integer is completely unknowable.
932 return UnknownVal();
933 }
934
935 // Get both values as regions, if possible.
936 const MemRegion *LeftMR = lhs.getAsRegion();
937 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
938
939 const MemRegion *RightMR = rhs.getAsRegion();
940 if (!RightMR)
941 // The RHS is probably a label, which in theory could address a region.
942 // FIXME: we can probably make a more useful statement about non-code
943 // regions, though.
944 return UnknownVal();
945
946 const MemRegion *LeftBase = LeftMR->getBaseRegion();
947 const MemRegion *RightBase = RightMR->getBaseRegion();
948 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
949 const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
950 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
951
952 // If the two regions are from different known memory spaces they cannot be
953 // equal. Also, assume that no symbolic region (whose memory space is
954 // unknown) is on the stack.
955 if (LeftMS != RightMS &&
956 ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
957 (isa<StackSpaceRegion>(Val: LeftMS) || isa<StackSpaceRegion>(Val: RightMS)))) {
958 switch (op) {
959 default:
960 return UnknownVal();
961 case BO_EQ:
962 return makeTruthVal(b: false, type: resultTy);
963 case BO_NE:
964 return makeTruthVal(b: true, type: resultTy);
965 }
966 }
967
968 // If both values wrap regions, see if they're from different base regions.
969 // Note, heap base symbolic regions are assumed to not alias with
970 // each other; for example, we assume that malloc returns different address
971 // on each invocation.
972 // FIXME: ObjC object pointers always reside on the heap, but currently
973 // we treat their memory space as unknown, because symbolic pointers
974 // to ObjC objects may alias. There should be a way to construct
975 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
976 // guesses memory space for ObjC object pointers manually instead of
977 // relying on us.
978 if (LeftBase != RightBase &&
979 ((!isa<SymbolicRegion>(Val: LeftBase) && !isa<SymbolicRegion>(Val: RightBase)) ||
980 (isa<HeapSpaceRegion>(Val: LeftMS) || isa<HeapSpaceRegion>(Val: RightMS))) ){
981 switch (op) {
982 default:
983 return UnknownVal();
984 case BO_EQ:
985 return makeTruthVal(b: false, type: resultTy);
986 case BO_NE:
987 return makeTruthVal(b: true, type: resultTy);
988 }
989 }
990
991 // Handle special cases for when both regions are element regions.
992 const ElementRegion *RightER = dyn_cast<ElementRegion>(Val: RightMR);
993 const ElementRegion *LeftER = dyn_cast<ElementRegion>(Val: LeftMR);
994 if (RightER && LeftER) {
995 // Next, see if the two ERs have the same super-region and matching types.
996 // FIXME: This should do something useful even if the types don't match,
997 // though if both indexes are constant the RegionRawOffset path will
998 // give the correct answer.
999 if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
1000 LeftER->getElementType() == RightER->getElementType()) {
1001 // Get the left index and cast it to the correct type.
1002 // If the index is unknown or undefined, bail out here.
1003 SVal LeftIndexVal = LeftER->getIndex();
1004 std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
1005 if (!LeftIndex)
1006 return UnknownVal();
1007 LeftIndexVal = evalCast(V: *LeftIndex, CastTy: ArrayIndexTy, OriginalTy: QualType{});
1008 LeftIndex = LeftIndexVal.getAs<NonLoc>();
1009 if (!LeftIndex)
1010 return UnknownVal();
1011
1012 // Do the same for the right index.
1013 SVal RightIndexVal = RightER->getIndex();
1014 std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
1015 if (!RightIndex)
1016 return UnknownVal();
1017 RightIndexVal = evalCast(V: *RightIndex, CastTy: ArrayIndexTy, OriginalTy: QualType{});
1018 RightIndex = RightIndexVal.getAs<NonLoc>();
1019 if (!RightIndex)
1020 return UnknownVal();
1021
1022 // Actually perform the operation.
1023 // evalBinOpNN expects the two indexes to already be the right type.
1024 return evalBinOpNN(state, op, lhs: *LeftIndex, rhs: *RightIndex, resultTy);
1025 }
1026 }
1027
1028 // Special handling of the FieldRegions, even with symbolic offsets.
1029 const FieldRegion *RightFR = dyn_cast<FieldRegion>(Val: RightMR);
1030 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(Val: LeftMR);
1031 if (RightFR && LeftFR) {
1032 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1033 SVB&: *this);
1034 if (!R.isUnknown())
1035 return R;
1036 }
1037
1038 // Compare the regions using the raw offsets.
1039 RegionOffset LeftOffset = LeftMR->getAsOffset();
1040 RegionOffset RightOffset = RightMR->getAsOffset();
1041
1042 if (LeftOffset.getRegion() != nullptr &&
1043 LeftOffset.getRegion() == RightOffset.getRegion() &&
1044 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1045 int64_t left = LeftOffset.getOffset();
1046 int64_t right = RightOffset.getOffset();
1047
1048 switch (op) {
1049 default:
1050 return UnknownVal();
1051 case BO_LT:
1052 return makeTruthVal(b: left < right, type: resultTy);
1053 case BO_GT:
1054 return makeTruthVal(b: left > right, type: resultTy);
1055 case BO_LE:
1056 return makeTruthVal(b: left <= right, type: resultTy);
1057 case BO_GE:
1058 return makeTruthVal(b: left >= right, type: resultTy);
1059 case BO_EQ:
1060 return makeTruthVal(b: left == right, type: resultTy);
1061 case BO_NE:
1062 return makeTruthVal(b: left != right, type: resultTy);
1063 }
1064 }
1065
1066 // At this point we're not going to get a good answer, but we can try
1067 // conjuring an expression instead.
1068 SymbolRef LHSSym = lhs.getAsLocSymbol();
1069 SymbolRef RHSSym = rhs.getAsLocSymbol();
1070 if (LHSSym && RHSSym)
1071 return makeNonLoc(lhs: LHSSym, op, rhs: RHSSym, type: resultTy);
1072
1073 // If we get here, we have no way of comparing the regions.
1074 return UnknownVal();
1075 }
1076 }
1077}
1078
1079SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1080 BinaryOperator::Opcode op, Loc lhs,
1081 NonLoc rhs, QualType resultTy) {
1082 if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1083 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1084 if (PTMSV->isNullMemberPointer())
1085 return UndefinedVal();
1086
1087 auto getFieldLValue = [&](const auto *FD) -> SVal {
1088 SVal Result = lhs;
1089
1090 for (const auto &I : *PTMSV)
1091 Result = StateMgr.getStoreManager().evalDerivedToBase(
1092 Derived: Result, DerivedPtrType: I->getType(), IsVirtual: I->isVirtual());
1093
1094 return state->getLValue(FD, Result);
1095 };
1096
1097 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1098 return getFieldLValue(FD);
1099 }
1100 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1101 return getFieldLValue(FD);
1102 }
1103 }
1104
1105 return rhs;
1106 }
1107
1108 assert(!BinaryOperator::isComparisonOp(op) &&
1109 "arguments to comparison ops must be of the same type");
1110
1111 // Special case: rhs is a zero constant.
1112 if (rhs.isZeroConstant())
1113 return lhs;
1114
1115 // Perserve the null pointer so that it can be found by the DerefChecker.
1116 if (lhs.isZeroConstant())
1117 return lhs;
1118
1119 // We are dealing with pointer arithmetic.
1120
1121 // Handle pointer arithmetic on constant values.
1122 if (std::optional<nonloc::ConcreteInt> rhsInt =
1123 rhs.getAs<nonloc::ConcreteInt>()) {
1124 if (std::optional<loc::ConcreteInt> lhsInt =
1125 lhs.getAs<loc::ConcreteInt>()) {
1126 const llvm::APSInt &leftI = lhsInt->getValue();
1127 assert(leftI.isUnsigned());
1128 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1129
1130 // Convert the bitwidth of rightI. This should deal with overflow
1131 // since we are dealing with concrete values.
1132 rightI = rightI.extOrTrunc(width: leftI.getBitWidth());
1133
1134 // Offset the increment by the pointer size.
1135 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1136 QualType pointeeType = resultTy->getPointeeType();
1137 Multiplicand = getContext().getTypeSizeInChars(T: pointeeType).getQuantity();
1138 rightI *= Multiplicand;
1139
1140 // Compute the adjusted pointer.
1141 switch (op) {
1142 case BO_Add:
1143 rightI = leftI + rightI;
1144 break;
1145 case BO_Sub:
1146 rightI = leftI - rightI;
1147 break;
1148 default:
1149 llvm_unreachable("Invalid pointer arithmetic operation");
1150 }
1151 return loc::ConcreteInt(getBasicValueFactory().getValue(X: rightI));
1152 }
1153 }
1154
1155 // Handle cases where 'lhs' is a region.
1156 if (const MemRegion *region = lhs.getAsRegion()) {
1157 rhs = convertToArrayIndex(val: rhs).castAs<NonLoc>();
1158 SVal index = UnknownVal();
1159 const SubRegion *superR = nullptr;
1160 // We need to know the type of the pointer in order to add an integer to it.
1161 // Depending on the type, different amount of bytes is added.
1162 QualType elementType;
1163
1164 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(Val: region)) {
1165 assert(op == BO_Add || op == BO_Sub);
1166 index = evalBinOpNN(state, op, lhs: elemReg->getIndex(), rhs,
1167 resultTy: getArrayIndexType());
1168 superR = cast<SubRegion>(Val: elemReg->getSuperRegion());
1169 elementType = elemReg->getElementType();
1170 }
1171 else if (isa<SubRegion>(Val: region)) {
1172 assert(op == BO_Add || op == BO_Sub);
1173 index = (op == BO_Add) ? rhs : evalMinus(val: rhs);
1174 superR = cast<SubRegion>(Val: region);
1175 // TODO: Is this actually reliable? Maybe improving our MemRegion
1176 // hierarchy to provide typed regions for all non-void pointers would be
1177 // better. For instance, we cannot extend this towards LocAsInteger
1178 // operations, where result type of the expression is integer.
1179 if (resultTy->isAnyPointerType())
1180 elementType = resultTy->getPointeeType();
1181 }
1182
1183 // Represent arithmetic on void pointers as arithmetic on char pointers.
1184 // It is fine when a TypedValueRegion of char value type represents
1185 // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1186 if (elementType->isVoidType())
1187 elementType = getContext().CharTy;
1188
1189 if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1190 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, Idx: *indexV,
1191 superRegion: superR, Ctx: getContext()));
1192 }
1193 }
1194 return UnknownVal();
1195}
1196
1197const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1198 SVal V) {
1199 if (const llvm::APSInt *Res = getConcreteValue(V))
1200 return Res;
1201
1202 if (SymbolRef Sym = V.getAsSymbol())
1203 return state->getConstraintManager().getSymVal(state, sym: Sym);
1204
1205 return nullptr;
1206}
1207
1208const llvm::APSInt *SimpleSValBuilder::getConcreteValue(SVal V) {
1209 if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1210 return &X->getValue();
1211
1212 if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1213 return &X->getValue();
1214
1215 return nullptr;
1216}
1217
1218const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1219 SVal V) {
1220 return getConstValue(state, V: simplifySVal(State: state, V));
1221}
1222
1223const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state,
1224 SVal V) {
1225 V = simplifySVal(State: state, V);
1226
1227 if (const llvm::APSInt *Res = getConcreteValue(V))
1228 return Res;
1229
1230 if (SymbolRef Sym = V.getAsSymbol())
1231 return state->getConstraintManager().getSymMinVal(state, sym: Sym);
1232
1233 return nullptr;
1234}
1235
1236const llvm::APSInt *SimpleSValBuilder::getMaxValue(ProgramStateRef state,
1237 SVal V) {
1238 V = simplifySVal(State: state, V);
1239
1240 if (const llvm::APSInt *Res = getConcreteValue(V))
1241 return Res;
1242
1243 if (SymbolRef Sym = V.getAsSymbol())
1244 return state->getConstraintManager().getSymMaxVal(state, sym: Sym);
1245
1246 return nullptr;
1247}
1248
1249SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1250 SVal SimplifiedVal = simplifySValOnce(State, V: Val);
1251 while (SimplifiedVal != Val) {
1252 Val = SimplifiedVal;
1253 SimplifiedVal = simplifySValOnce(State, V: Val);
1254 }
1255 return SimplifiedVal;
1256}
1257
1258SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1259 return simplifyUntilFixpoint(State, Val: V);
1260}
1261
1262SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1263 // For now, this function tries to constant-fold symbols inside a
1264 // nonloc::SymbolVal, and does nothing else. More simplifications should
1265 // be possible, such as constant-folding an index in an ElementRegion.
1266
1267 class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1268 ProgramStateRef State;
1269 SValBuilder &SVB;
1270
1271 // Cache results for the lifetime of the Simplifier. Results change every
1272 // time new constraints are added to the program state, which is the whole
1273 // point of simplifying, and for that very reason it's pointless to maintain
1274 // the same cache for the duration of the whole analysis.
1275 llvm::DenseMap<SymbolRef, SVal> Cached;
1276
1277 static bool isUnchanged(SymbolRef Sym, SVal Val) {
1278 return Sym == Val.getAsSymbol();
1279 }
1280
1281 SVal cache(SymbolRef Sym, SVal V) {
1282 Cached[Sym] = V;
1283 return V;
1284 }
1285
1286 SVal skip(SymbolRef Sym) {
1287 return cache(Sym, V: SVB.makeSymbolVal(Sym));
1288 }
1289
1290 // Return the known const value for the Sym if available, or return Undef
1291 // otherwise.
1292 SVal getConst(SymbolRef Sym) {
1293 const llvm::APSInt *Const =
1294 State->getConstraintManager().getSymVal(state: State, sym: Sym);
1295 if (Const)
1296 return Loc::isLocType(T: Sym->getType()) ? (SVal)SVB.makeIntLocVal(integer: *Const)
1297 : (SVal)SVB.makeIntVal(integer: *Const);
1298 return UndefinedVal();
1299 }
1300
1301 SVal getConstOrVisit(SymbolRef Sym) {
1302 const SVal Ret = getConst(Sym);
1303 if (Ret.isUndef())
1304 return Visit(S: Sym);
1305 return Ret;
1306 }
1307
1308 public:
1309 Simplifier(ProgramStateRef State)
1310 : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1311
1312 SVal VisitSymbolData(const SymbolData *S) {
1313 // No cache here.
1314 if (const llvm::APSInt *I =
1315 State->getConstraintManager().getSymVal(state: State, sym: S))
1316 return Loc::isLocType(T: S->getType()) ? (SVal)SVB.makeIntLocVal(integer: *I)
1317 : (SVal)SVB.makeIntVal(integer: *I);
1318 return SVB.makeSymbolVal(Sym: S);
1319 }
1320
1321 SVal VisitSymIntExpr(const SymIntExpr *S) {
1322 auto I = Cached.find(Val: S);
1323 if (I != Cached.end())
1324 return I->second;
1325
1326 SVal LHS = getConstOrVisit(Sym: S->getLHS());
1327 if (isUnchanged(Sym: S->getLHS(), Val: LHS))
1328 return skip(Sym: S);
1329
1330 SVal RHS;
1331 // By looking at the APSInt in the right-hand side of S, we cannot
1332 // figure out if it should be treated as a Loc or as a NonLoc.
1333 // So make our guess by recalling that we cannot multiply pointers
1334 // or compare a pointer to an integer.
1335 if (Loc::isLocType(T: S->getLHS()->getType()) &&
1336 BinaryOperator::isComparisonOp(Opc: S->getOpcode())) {
1337 // The usual conversion of $sym to &SymRegion{$sym}, as they have
1338 // the same meaning for Loc-type symbols, but the latter form
1339 // is preferred in SVal computations for being Loc itself.
1340 if (SymbolRef Sym = LHS.getAsSymbol()) {
1341 assert(Loc::isLocType(Sym->getType()));
1342 LHS = SVB.makeLoc(sym: Sym);
1343 }
1344 RHS = SVB.makeIntLocVal(integer: S->getRHS());
1345 } else {
1346 RHS = SVB.makeIntVal(integer: S->getRHS());
1347 }
1348
1349 return cache(
1350 Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType()));
1351 }
1352
1353 SVal VisitIntSymExpr(const IntSymExpr *S) {
1354 auto I = Cached.find(Val: S);
1355 if (I != Cached.end())
1356 return I->second;
1357
1358 SVal RHS = getConstOrVisit(Sym: S->getRHS());
1359 if (isUnchanged(Sym: S->getRHS(), Val: RHS))
1360 return skip(Sym: S);
1361
1362 SVal LHS = SVB.makeIntVal(integer: S->getLHS());
1363 return cache(
1364 Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType()));
1365 }
1366
1367 SVal VisitSymSymExpr(const SymSymExpr *S) {
1368 auto I = Cached.find(Val: S);
1369 if (I != Cached.end())
1370 return I->second;
1371
1372 // For now don't try to simplify mixed Loc/NonLoc expressions
1373 // because they often appear from LocAsInteger operations
1374 // and we don't know how to combine a LocAsInteger
1375 // with a concrete value.
1376 if (Loc::isLocType(T: S->getLHS()->getType()) !=
1377 Loc::isLocType(T: S->getRHS()->getType()))
1378 return skip(Sym: S);
1379
1380 SVal LHS = getConstOrVisit(Sym: S->getLHS());
1381 SVal RHS = getConstOrVisit(Sym: S->getRHS());
1382
1383 if (isUnchanged(Sym: S->getLHS(), Val: LHS) && isUnchanged(Sym: S->getRHS(), Val: RHS))
1384 return skip(Sym: S);
1385
1386 return cache(
1387 Sym: S, V: SVB.evalBinOp(state: State, op: S->getOpcode(), lhs: LHS, rhs: RHS, type: S->getType()));
1388 }
1389
1390 SVal VisitSymbolCast(const SymbolCast *S) {
1391 auto I = Cached.find(Val: S);
1392 if (I != Cached.end())
1393 return I->second;
1394 const SymExpr *OpSym = S->getOperand();
1395 SVal OpVal = getConstOrVisit(Sym: OpSym);
1396 if (isUnchanged(Sym: OpSym, Val: OpVal))
1397 return skip(Sym: S);
1398
1399 return cache(Sym: S, V: SVB.evalCast(V: OpVal, CastTy: S->getType(), OriginalTy: OpSym->getType()));
1400 }
1401
1402 SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1403 auto I = Cached.find(Val: S);
1404 if (I != Cached.end())
1405 return I->second;
1406 SVal Op = getConstOrVisit(Sym: S->getOperand());
1407 if (isUnchanged(Sym: S->getOperand(), Val: Op))
1408 return skip(Sym: S);
1409
1410 return cache(
1411 Sym: S, V: SVB.evalUnaryOp(state: State, opc: S->getOpcode(), operand: Op, type: S->getType()));
1412 }
1413
1414 SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1415
1416 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1417
1418 SVal VisitSymbolVal(nonloc::SymbolVal V) {
1419 // Simplification is much more costly than computing complexity.
1420 // For high complexity, it may be not worth it.
1421 return Visit(S: V.getSymbol());
1422 }
1423
1424 SVal VisitSVal(SVal V) { return V; }
1425 };
1426
1427 SVal SimplifiedV = Simplifier(State).Visit(V);
1428 return SimplifiedV;
1429}
1430