1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
15#include "clang/AST/OperationKinds.h"
16#include "clang/Basic/Builtins.h"
17#include "clang/Basic/CharInfo.h"
18#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20#include "clang/StaticAnalyzer/Core/Checker.h"
21#include "clang/StaticAnalyzer/Core/CheckerManager.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
26#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
27#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/raw_ostream.h"
34#include <functional>
35#include <optional>
36
37using namespace clang;
38using namespace ento;
39using namespace std::placeholders;
40
41namespace {
42struct AnyArgExpr {
43 const Expr *Expression;
44 unsigned ArgumentIndex;
45};
46struct SourceArgExpr : AnyArgExpr {};
47struct DestinationArgExpr : AnyArgExpr {};
48struct SizeArgExpr : AnyArgExpr {};
49
50using ErrorMessage = SmallString<128>;
51enum class AccessKind { write, read };
52
53static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
54 AccessKind Access) {
55 ErrorMessage Message;
56 llvm::raw_svector_ostream Os(Message);
57
58 // Function classification like: Memory copy function
59 Os << toUppercase(c: FunctionDescription.front())
60 << &FunctionDescription.data()[1];
61
62 if (Access == AccessKind::write) {
63 Os << " overflows the destination buffer";
64 } else { // read access
65 Os << " accesses out-of-bound array element";
66 }
67
68 return Message;
69}
70
71enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
72
73enum class CharKind { Regular = 0, Wide };
74constexpr CharKind CK_Regular = CharKind::Regular;
75constexpr CharKind CK_Wide = CharKind::Wide;
76
77static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
78 return Ctx.getPointerType(T: CK == CharKind::Regular ? Ctx.CharTy
79 : Ctx.WideCharTy);
80}
81
82class CStringChecker : public Checker< eval::Call,
83 check::PreStmt<DeclStmt>,
84 check::LiveSymbols,
85 check::DeadSymbols,
86 check::RegionChanges
87 > {
88 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
89 BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
90
91 mutable const char *CurrentFunctionDescription = nullptr;
92
93public:
94 /// The filter is used to filter out the diagnostics which are not enabled by
95 /// the user.
96 struct CStringChecksFilter {
97 bool CheckCStringNullArg = false;
98 bool CheckCStringOutOfBounds = false;
99 bool CheckCStringBufferOverlap = false;
100 bool CheckCStringNotNullTerm = false;
101 bool CheckCStringUninitializedRead = false;
102
103 CheckerNameRef CheckNameCStringNullArg;
104 CheckerNameRef CheckNameCStringOutOfBounds;
105 CheckerNameRef CheckNameCStringBufferOverlap;
106 CheckerNameRef CheckNameCStringNotNullTerm;
107 CheckerNameRef CheckNameCStringUninitializedRead;
108 };
109
110 CStringChecksFilter Filter;
111
112 static void *getTag() { static int tag; return &tag; }
113
114 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
115 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
116 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
117 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
118
119 ProgramStateRef
120 checkRegionChanges(ProgramStateRef state,
121 const InvalidatedSymbols *,
122 ArrayRef<const MemRegion *> ExplicitRegions,
123 ArrayRef<const MemRegion *> Regions,
124 const LocationContext *LCtx,
125 const CallEvent *Call) const;
126
127 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
128 const CallEvent &)>;
129
130 CallDescriptionMap<FnCheck> Callbacks = {
131 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
132 std::bind(f: &CStringChecker::evalMemcpy, args: _1, args: _2, args: _3, args: CK_Regular)},
133 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
134 std::bind(f: &CStringChecker::evalMemcpy, args: _1, args: _2, args: _3, args: CK_Wide)},
135 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
136 std::bind(f: &CStringChecker::evalMempcpy, args: _1, args: _2, args: _3, args: CK_Regular)},
137 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
138 std::bind(f: &CStringChecker::evalMempcpy, args: _1, args: _2, args: _3, args: CK_Wide)},
139 {{CDM::CLibrary, {"memcmp"}, 3},
140 std::bind(f: &CStringChecker::evalMemcmp, args: _1, args: _2, args: _3, args: CK_Regular)},
141 {{CDM::CLibrary, {"wmemcmp"}, 3},
142 std::bind(f: &CStringChecker::evalMemcmp, args: _1, args: _2, args: _3, args: CK_Wide)},
143 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
144 std::bind(f: &CStringChecker::evalMemmove, args: _1, args: _2, args: _3, args: CK_Regular)},
145 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
146 std::bind(f: &CStringChecker::evalMemmove, args: _1, args: _2, args: _3, args: CK_Wide)},
147 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
148 &CStringChecker::evalMemset},
149 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
150 // FIXME: C23 introduces 'memset_explicit', maybe also model that
151 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
152 &CStringChecker::evalStrcpy},
153 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
154 &CStringChecker::evalStrncpy},
155 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
156 &CStringChecker::evalStpcpy},
157 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
158 &CStringChecker::evalStrlcpy},
159 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
160 &CStringChecker::evalStrcat},
161 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
162 &CStringChecker::evalStrncat},
163 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
164 &CStringChecker::evalStrlcat},
165 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
166 &CStringChecker::evalstrLength},
167 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
168 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
169 &CStringChecker::evalstrnLength},
170 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
171 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
172 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
173 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
174 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
175 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
176 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
177 {{CDM::CLibrary, {"bcmp"}, 3},
178 std::bind(f: &CStringChecker::evalMemcmp, args: _1, args: _2, args: _3, args: CK_Regular)},
179 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
180 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
181 &CStringChecker::evalBzero},
182
183 // When recognizing calls to the following variadic functions, we accept
184 // any number of arguments in the call (std::nullopt = accept any
185 // number), but check that in the declaration there are 2 and 3
186 // parameters respectively. (Note that the parameter count does not
187 // include the "...". Calls where the number of arguments is too small
188 // will be discarded by the callback.)
189 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
190 &CStringChecker::evalSprintf},
191 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
192 &CStringChecker::evalSnprintf},
193 };
194
195 // These require a bit of special handling.
196 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
197 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
198
199 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
200 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
202 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
203 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
204 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
205 ProgramStateRef state, SizeArgExpr Size,
206 DestinationArgExpr Dest, SourceArgExpr Source,
207 bool Restricted, bool IsMempcpy, CharKind CK) const;
208
209 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
210
211 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
212 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
213 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
214 bool IsStrnlen = false) const;
215
216 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
217 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
218 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
219 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
220 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
221 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
222 bool returnPtr = true) const;
223
224 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
225 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
226 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
227
228 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
229 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
230 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
231 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
232 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
233 bool IsBounded = false, bool IgnoreCase = false) const;
234
235 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
236
237 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
238 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
239 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
240 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
241 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
242
243 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
244 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
245 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
246 bool IsBounded) const;
247
248 // Utility methods
249 std::pair<ProgramStateRef , ProgramStateRef >
250 static assumeZero(CheckerContext &C,
251 ProgramStateRef state, SVal V, QualType Ty);
252
253 static ProgramStateRef setCStringLength(ProgramStateRef state,
254 const MemRegion *MR,
255 SVal strLength);
256 static SVal getCStringLengthForRegion(CheckerContext &C,
257 ProgramStateRef &state,
258 const Expr *Ex,
259 const MemRegion *MR,
260 bool hypothetical);
261 SVal getCStringLength(CheckerContext &C,
262 ProgramStateRef &state,
263 const Expr *Ex,
264 SVal Buf,
265 bool hypothetical = false) const;
266
267 const StringLiteral *getCStringLiteral(CheckerContext &C,
268 ProgramStateRef &state,
269 const Expr *expr,
270 SVal val) const;
271
272 /// Invalidate the destination buffer determined by characters copied.
273 static ProgramStateRef
274 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
275 const Expr *BufE, SVal BufV, SVal SizeV,
276 QualType SizeTy);
277
278 /// Operation never overflows, do not invalidate the super region.
279 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
280 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
281
282 /// We do not know whether the operation can overflow (e.g. size is unknown),
283 /// invalidate the super region and escape related pointers.
284 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
285 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
286
287 /// Invalidate the source buffer for escaping pointers.
288 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
289 ProgramStateRef S,
290 const Expr *BufE, SVal BufV);
291
292 /// @param InvalidationTraitOperations Determine how to invlidate the
293 /// MemRegion by setting the invalidation traits. Return true to cause pointer
294 /// escape, or false otherwise.
295 static ProgramStateRef invalidateBufferAux(
296 CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
297 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
298 const MemRegion *)>
299 InvalidationTraitOperations);
300
301 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
302 const MemRegion *MR);
303
304 static bool memsetAux(const Expr *DstBuffer, SVal CharE,
305 const Expr *Size, CheckerContext &C,
306 ProgramStateRef &State);
307
308 // Re-usable checks
309 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
310 AnyArgExpr Arg, SVal l) const;
311 // Check whether the origin region behind \p Element (like the actual array
312 // region \p Element is from) is initialized.
313 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
314 AnyArgExpr Buffer, SVal Element, SVal Size) const;
315 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
316 AnyArgExpr Buffer, SVal Element,
317 AccessKind Access,
318 CharKind CK = CharKind::Regular) const;
319 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
320 AnyArgExpr Buffer, SizeArgExpr Size,
321 AccessKind Access,
322 CharKind CK = CharKind::Regular) const;
323 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
324 SizeArgExpr Size, AnyArgExpr First,
325 AnyArgExpr Second,
326 CharKind CK = CharKind::Regular) const;
327 void emitOverlapBug(CheckerContext &C,
328 ProgramStateRef state,
329 const Stmt *First,
330 const Stmt *Second) const;
331
332 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
333 StringRef WarningMsg) const;
334 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
335 const Stmt *S, StringRef WarningMsg) const;
336 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
337 const Stmt *S, StringRef WarningMsg) const;
338 void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
339 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
340 const Expr *E, StringRef Msg) const;
341 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
342 ProgramStateRef state,
343 NonLoc left,
344 NonLoc right) const;
345
346 // Return true if the destination buffer of the copy function may be in bound.
347 // Expects SVal of Size to be positive and unsigned.
348 // Expects SVal of FirstBuf to be a FieldRegion.
349 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
350 SVal BufVal, QualType BufTy, SVal LengthVal,
351 QualType LengthTy);
352};
353
354} //end anonymous namespace
355
356REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
357
358//===----------------------------------------------------------------------===//
359// Individual checks and utility methods.
360//===----------------------------------------------------------------------===//
361
362std::pair<ProgramStateRef, ProgramStateRef>
363CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
364 QualType Ty) {
365 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
366 if (!val)
367 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
368
369 SValBuilder &svalBuilder = C.getSValBuilder();
370 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(type: Ty);
371 return State->assume(Cond: svalBuilder.evalEQ(state: State, lhs: *val, rhs: zero));
372}
373
374ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
375 ProgramStateRef State,
376 AnyArgExpr Arg, SVal l) const {
377 // If a previous check has failed, propagate the failure.
378 if (!State)
379 return nullptr;
380
381 ProgramStateRef stateNull, stateNonNull;
382 std::tie(args&: stateNull, args&: stateNonNull) =
383 assumeZero(C, State, V: l, Ty: Arg.Expression->getType());
384
385 if (stateNull && !stateNonNull) {
386 if (Filter.CheckCStringNullArg) {
387 SmallString<80> buf;
388 llvm::raw_svector_ostream OS(buf);
389 assert(CurrentFunctionDescription);
390 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
391 << llvm::getOrdinalSuffix(Val: Arg.ArgumentIndex + 1) << " argument to "
392 << CurrentFunctionDescription;
393
394 emitNullArgBug(C, State: stateNull, S: Arg.Expression, WarningMsg: OS.str());
395 }
396 return nullptr;
397 }
398
399 // From here on, assume that the value is non-null.
400 assert(stateNonNull);
401 return stateNonNull;
402}
403
404static std::optional<NonLoc> getIndex(ProgramStateRef State,
405 const ElementRegion *ER, CharKind CK) {
406 SValBuilder &SVB = State->getStateManager().getSValBuilder();
407 ASTContext &Ctx = SVB.getContext();
408
409 if (CK == CharKind::Regular) {
410 if (ER->getValueType() != Ctx.CharTy)
411 return {};
412 return ER->getIndex();
413 }
414
415 if (ER->getValueType() != Ctx.WideCharTy)
416 return {};
417
418 QualType SizeTy = Ctx.getSizeType();
419 NonLoc WideSize =
420 SVB.makeIntVal(integer: Ctx.getTypeSizeInChars(T: Ctx.WideCharTy).getQuantity(),
421 type: SizeTy)
422 .castAs<NonLoc>();
423 SVal Offset =
424 SVB.evalBinOpNN(state: State, op: BO_Mul, lhs: ER->getIndex(), rhs: WideSize, resultTy: SizeTy);
425 if (Offset.isUnknown())
426 return {};
427 return Offset.castAs<NonLoc>();
428}
429
430// Basically 1 -> 1st, 12 -> 12th, etc.
431static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
432 Os << Idx << llvm::getOrdinalSuffix(Val: Idx);
433}
434
435ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
436 ProgramStateRef State,
437 AnyArgExpr Buffer, SVal Element,
438 SVal Size) const {
439
440 // If a previous check has failed, propagate the failure.
441 if (!State)
442 return nullptr;
443
444 const MemRegion *R = Element.getAsRegion();
445 const auto *ER = dyn_cast_or_null<ElementRegion>(Val: R);
446 if (!ER)
447 return State;
448
449 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
450 if (!SuperR)
451 return State;
452
453 // FIXME: We ought to able to check objects as well. Maybe
454 // UninitializedObjectChecker could help?
455 if (!SuperR->getValueType()->isArrayType())
456 return State;
457
458 SValBuilder &SVB = C.getSValBuilder();
459 ASTContext &Ctx = SVB.getContext();
460
461 const QualType ElemTy = Ctx.getBaseElementType(QT: SuperR->getValueType());
462 const NonLoc Zero = SVB.makeZeroArrayIndex();
463
464 std::optional<Loc> FirstElementVal =
465 State->getLValue(ElementType: ElemTy, Idx: Zero, Base: loc::MemRegionVal(SuperR)).getAs<Loc>();
466 if (!FirstElementVal)
467 return State;
468
469 // Ensure that we wouldn't read uninitialized value.
470 if (Filter.CheckCStringUninitializedRead &&
471 State->getSVal(LV: *FirstElementVal).isUndef()) {
472 llvm::SmallString<258> Buf;
473 llvm::raw_svector_ostream OS(Buf);
474 OS << "The first element of the ";
475 printIdxWithOrdinalSuffix(Os&: OS, Idx: Buffer.ArgumentIndex + 1);
476 OS << " argument is undefined";
477 emitUninitializedReadBug(C, State, E: Buffer.Expression, Msg: OS.str());
478 return nullptr;
479 }
480
481 // We won't check whether the entire region is fully initialized -- lets just
482 // check that the first and the last element is. So, onto checking the last
483 // element:
484 const QualType IdxTy = SVB.getArrayIndexType();
485
486 NonLoc ElemSize =
487 SVB.makeIntVal(integer: Ctx.getTypeSizeInChars(T: ElemTy).getQuantity(), type: IdxTy)
488 .castAs<NonLoc>();
489
490 // FIXME: Check that the size arg to the cstring function is divisible by
491 // size of the actual element type?
492
493 // The type of the argument to the cstring function is either char or wchar,
494 // but thats not the type of the original array (or memory region).
495 // Suppose the following:
496 // int t[5];
497 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
498 // When checking whether t is fully initialized, we see it as char array of
499 // size sizeof(int)*5. If we check the last element as a character, we read
500 // the last byte of an integer, which will be undefined. But just because
501 // that value is undefined, it doesn't mean that the element is uninitialized!
502 // For this reason, we need to retrieve the actual last element with the
503 // correct type.
504
505 // Divide the size argument to the cstring function by the actual element
506 // type. This value will be size of the array, or the index to the
507 // past-the-end element.
508 std::optional<NonLoc> Offset =
509 SVB.evalBinOpNN(state: State, op: clang::BO_Div, lhs: Size.castAs<NonLoc>(), rhs: ElemSize,
510 resultTy: IdxTy)
511 .getAs<NonLoc>();
512
513 // Retrieve the index of the last element.
514 const NonLoc One = SVB.makeIntVal(integer: 1, type: IdxTy).castAs<NonLoc>();
515 SVal LastIdx = SVB.evalBinOpNN(state: State, op: BO_Sub, lhs: *Offset, rhs: One, resultTy: IdxTy);
516
517 if (!Offset)
518 return State;
519
520 SVal LastElementVal =
521 State->getLValue(ElementType: ElemTy, Idx: LastIdx, Base: loc::MemRegionVal(SuperR));
522 if (!isa<Loc>(Val: LastElementVal))
523 return State;
524
525 if (Filter.CheckCStringUninitializedRead &&
526 State->getSVal(LV: LastElementVal.castAs<Loc>()).isUndef()) {
527 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
528 // If we can't get emit a sensible last element index, just bail out --
529 // prefer to emit nothing in favour of emitting garbage quality reports.
530 if (!IdxInt) {
531 C.addSink();
532 return nullptr;
533 }
534 llvm::SmallString<258> Buf;
535 llvm::raw_svector_ostream OS(Buf);
536 OS << "The last accessed element (at index ";
537 OS << IdxInt->getExtValue();
538 OS << ") in the ";
539 printIdxWithOrdinalSuffix(Os&: OS, Idx: Buffer.ArgumentIndex + 1);
540 OS << " argument is undefined";
541 emitUninitializedReadBug(C, State, E: Buffer.Expression, Msg: OS.str());
542 return nullptr;
543 }
544 return State;
545}
546
547// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
548ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
549 ProgramStateRef state,
550 AnyArgExpr Buffer, SVal Element,
551 AccessKind Access,
552 CharKind CK) const {
553
554 // If a previous check has failed, propagate the failure.
555 if (!state)
556 return nullptr;
557
558 // Check for out of bound array element access.
559 const MemRegion *R = Element.getAsRegion();
560 if (!R)
561 return state;
562
563 const auto *ER = dyn_cast<ElementRegion>(Val: R);
564 if (!ER)
565 return state;
566
567 // Get the index of the accessed element.
568 std::optional<NonLoc> Idx = getIndex(State: state, ER, CK);
569 if (!Idx)
570 return state;
571
572 // Get the size of the array.
573 const auto *superReg = cast<SubRegion>(Val: ER->getSuperRegion());
574 DefinedOrUnknownSVal Size =
575 getDynamicExtent(State: state, MR: superReg, SVB&: C.getSValBuilder());
576
577 auto [StInBound, StOutBound] = state->assumeInBoundDual(idx: *Idx, upperBound: Size);
578 if (StOutBound && !StInBound) {
579 // These checks are either enabled by the CString out-of-bounds checker
580 // explicitly or implicitly by the Malloc checker.
581 // In the latter case we only do modeling but do not emit warning.
582 if (!Filter.CheckCStringOutOfBounds)
583 return nullptr;
584
585 // Emit a bug report.
586 ErrorMessage Message =
587 createOutOfBoundErrorMsg(FunctionDescription: CurrentFunctionDescription, Access);
588 emitOutOfBoundsBug(C, State: StOutBound, S: Buffer.Expression, WarningMsg: Message);
589 return nullptr;
590 }
591
592 // Array bound check succeeded. From this point forward the array bound
593 // should always succeed.
594 return StInBound;
595}
596
597ProgramStateRef
598CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
599 AnyArgExpr Buffer, SizeArgExpr Size,
600 AccessKind Access, CharKind CK) const {
601 // If a previous check has failed, propagate the failure.
602 if (!State)
603 return nullptr;
604
605 SValBuilder &svalBuilder = C.getSValBuilder();
606 ASTContext &Ctx = svalBuilder.getContext();
607
608 QualType SizeTy = Size.Expression->getType();
609 QualType PtrTy = getCharPtrType(Ctx, CK);
610
611 // Check that the first buffer is non-null.
612 SVal BufVal = C.getSVal(S: Buffer.Expression);
613 State = checkNonNull(C, State, Arg: Buffer, l: BufVal);
614 if (!State)
615 return nullptr;
616
617 // If out-of-bounds checking is turned off, skip the rest.
618 if (!Filter.CheckCStringOutOfBounds)
619 return State;
620
621 SVal BufStart =
622 svalBuilder.evalCast(V: BufVal, CastTy: PtrTy, OriginalTy: Buffer.Expression->getType());
623
624 // Check if the first byte of the buffer is accessible.
625 State = CheckLocation(C, state: State, Buffer, Element: BufStart, Access, CK);
626
627 if (!State)
628 return nullptr;
629
630 // Get the access length and make sure it is known.
631 // FIXME: This assumes the caller has already checked that the access length
632 // is positive. And that it's unsigned.
633 SVal LengthVal = C.getSVal(S: Size.Expression);
634 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
635 if (!Length)
636 return State;
637
638 // Compute the offset of the last element to be accessed: size-1.
639 NonLoc One = svalBuilder.makeIntVal(integer: 1, type: SizeTy).castAs<NonLoc>();
640 SVal Offset = svalBuilder.evalBinOpNN(state: State, op: BO_Sub, lhs: *Length, rhs: One, resultTy: SizeTy);
641 if (Offset.isUnknown())
642 return nullptr;
643 NonLoc LastOffset = Offset.castAs<NonLoc>();
644
645 // Check that the first buffer is sufficiently long.
646 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
647
648 SVal BufEnd =
649 svalBuilder.evalBinOpLN(state: State, op: BO_Add, lhs: *BufLoc, rhs: LastOffset, resultTy: PtrTy);
650 State = CheckLocation(C, state: State, Buffer, Element: BufEnd, Access, CK);
651 if (Access == AccessKind::read)
652 State = checkInit(C, State, Buffer, Element: BufEnd, Size: *Length);
653
654 // If the buffer isn't large enough, abort.
655 if (!State)
656 return nullptr;
657 }
658
659 // Large enough or not, return this state!
660 return State;
661}
662
663ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
664 ProgramStateRef state,
665 SizeArgExpr Size, AnyArgExpr First,
666 AnyArgExpr Second,
667 CharKind CK) const {
668 if (!Filter.CheckCStringBufferOverlap)
669 return state;
670
671 // Do a simple check for overlap: if the two arguments are from the same
672 // buffer, see if the end of the first is greater than the start of the second
673 // or vice versa.
674
675 // If a previous check has failed, propagate the failure.
676 if (!state)
677 return nullptr;
678
679 ProgramStateRef stateTrue, stateFalse;
680
681 // Assume different address spaces cannot overlap.
682 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
683 Second.Expression->getType()->getPointeeType().getAddressSpace())
684 return state;
685
686 // Get the buffer values and make sure they're known locations.
687 const LocationContext *LCtx = C.getLocationContext();
688 SVal firstVal = state->getSVal(Ex: First.Expression, LCtx);
689 SVal secondVal = state->getSVal(Ex: Second.Expression, LCtx);
690
691 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
692 if (!firstLoc)
693 return state;
694
695 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
696 if (!secondLoc)
697 return state;
698
699 // Are the two values the same?
700 SValBuilder &svalBuilder = C.getSValBuilder();
701 std::tie(args&: stateTrue, args&: stateFalse) =
702 state->assume(Cond: svalBuilder.evalEQ(state, lhs: *firstLoc, rhs: *secondLoc));
703
704 if (stateTrue && !stateFalse) {
705 // If the values are known to be equal, that's automatically an overlap.
706 emitOverlapBug(C, state: stateTrue, First: First.Expression, Second: Second.Expression);
707 return nullptr;
708 }
709
710 // assume the two expressions are not equal.
711 assert(stateFalse);
712 state = stateFalse;
713
714 // Which value comes first?
715 QualType cmpTy = svalBuilder.getConditionType();
716 SVal reverse =
717 svalBuilder.evalBinOpLL(state, op: BO_GT, lhs: *firstLoc, rhs: *secondLoc, resultTy: cmpTy);
718 std::optional<DefinedOrUnknownSVal> reverseTest =
719 reverse.getAs<DefinedOrUnknownSVal>();
720 if (!reverseTest)
721 return state;
722
723 std::tie(args&: stateTrue, args&: stateFalse) = state->assume(Cond: *reverseTest);
724 if (stateTrue) {
725 if (stateFalse) {
726 // If we don't know which one comes first, we can't perform this test.
727 return state;
728 } else {
729 // Switch the values so that firstVal is before secondVal.
730 std::swap(lhs&: firstLoc, rhs&: secondLoc);
731
732 // Switch the Exprs as well, so that they still correspond.
733 std::swap(a&: First, b&: Second);
734 }
735 }
736
737 // Get the length, and make sure it too is known.
738 SVal LengthVal = state->getSVal(Ex: Size.Expression, LCtx);
739 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
740 if (!Length)
741 return state;
742
743 // Convert the first buffer's start address to char*.
744 // Bail out if the cast fails.
745 ASTContext &Ctx = svalBuilder.getContext();
746 QualType CharPtrTy = getCharPtrType(Ctx, CK);
747 SVal FirstStart =
748 svalBuilder.evalCast(V: *firstLoc, CastTy: CharPtrTy, OriginalTy: First.Expression->getType());
749 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
750 if (!FirstStartLoc)
751 return state;
752
753 // Compute the end of the first buffer. Bail out if THAT fails.
754 SVal FirstEnd = svalBuilder.evalBinOpLN(state, op: BO_Add, lhs: *FirstStartLoc,
755 rhs: *Length, resultTy: CharPtrTy);
756 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
757 if (!FirstEndLoc)
758 return state;
759
760 // Is the end of the first buffer past the start of the second buffer?
761 SVal Overlap =
762 svalBuilder.evalBinOpLL(state, op: BO_GT, lhs: *FirstEndLoc, rhs: *secondLoc, resultTy: cmpTy);
763 std::optional<DefinedOrUnknownSVal> OverlapTest =
764 Overlap.getAs<DefinedOrUnknownSVal>();
765 if (!OverlapTest)
766 return state;
767
768 std::tie(args&: stateTrue, args&: stateFalse) = state->assume(Cond: *OverlapTest);
769
770 if (stateTrue && !stateFalse) {
771 // Overlap!
772 emitOverlapBug(C, state: stateTrue, First: First.Expression, Second: Second.Expression);
773 return nullptr;
774 }
775
776 // assume the two expressions don't overlap.
777 assert(stateFalse);
778 return stateFalse;
779}
780
781void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
782 const Stmt *First, const Stmt *Second) const {
783 ExplodedNode *N = C.generateErrorNode(State: state);
784 if (!N)
785 return;
786
787 if (!BT_Overlap)
788 BT_Overlap.reset(p: new BugType(Filter.CheckNameCStringBufferOverlap,
789 categories::UnixAPI, "Improper arguments"));
790
791 // Generate a report for this bug.
792 auto report = std::make_unique<PathSensitiveBugReport>(
793 args&: *BT_Overlap, args: "Arguments must not be overlapping buffers", args&: N);
794 report->addRange(R: First->getSourceRange());
795 report->addRange(R: Second->getSourceRange());
796
797 C.emitReport(R: std::move(report));
798}
799
800void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
801 const Stmt *S, StringRef WarningMsg) const {
802 if (ExplodedNode *N = C.generateErrorNode(State)) {
803 if (!BT_Null) {
804 // FIXME: This call uses the string constant 'categories::UnixAPI' as the
805 // description of the bug; it should be replaced by a real description.
806 BT_Null.reset(
807 p: new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
808 }
809
810 auto Report =
811 std::make_unique<PathSensitiveBugReport>(args&: *BT_Null, args&: WarningMsg, args&: N);
812 Report->addRange(R: S->getSourceRange());
813 if (const auto *Ex = dyn_cast<Expr>(Val: S))
814 bugreporter::trackExpressionValue(N, E: Ex, R&: *Report);
815 C.emitReport(R: std::move(Report));
816 }
817}
818
819void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
820 ProgramStateRef State,
821 const Expr *E,
822 StringRef Msg) const {
823 if (ExplodedNode *N = C.generateErrorNode(State)) {
824 if (!BT_UninitRead)
825 BT_UninitRead.reset(p: new BugType(Filter.CheckNameCStringUninitializedRead,
826 "Accessing unitialized/garbage values"));
827
828 auto Report =
829 std::make_unique<PathSensitiveBugReport>(args&: *BT_UninitRead, args&: Msg, args&: N);
830 Report->addNote(Msg: "Other elements might also be undefined",
831 Pos: Report->getLocation());
832 Report->addRange(R: E->getSourceRange());
833 bugreporter::trackExpressionValue(N, E, R&: *Report);
834 C.emitReport(R: std::move(Report));
835 }
836}
837
838void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
839 ProgramStateRef State, const Stmt *S,
840 StringRef WarningMsg) const {
841 if (ExplodedNode *N = C.generateErrorNode(State)) {
842 if (!BT_Bounds)
843 BT_Bounds.reset(p: new BugType(Filter.CheckCStringOutOfBounds
844 ? Filter.CheckNameCStringOutOfBounds
845 : Filter.CheckNameCStringNullArg,
846 "Out-of-bound array access"));
847
848 // FIXME: It would be nice to eventually make this diagnostic more clear,
849 // e.g., by referencing the original declaration or by saying *why* this
850 // reference is outside the range.
851 auto Report =
852 std::make_unique<PathSensitiveBugReport>(args&: *BT_Bounds, args&: WarningMsg, args&: N);
853 Report->addRange(R: S->getSourceRange());
854 C.emitReport(R: std::move(Report));
855 }
856}
857
858void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
859 const Stmt *S,
860 StringRef WarningMsg) const {
861 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
862 if (!BT_NotCString) {
863 // FIXME: This call uses the string constant 'categories::UnixAPI' as the
864 // description of the bug; it should be replaced by a real description.
865 BT_NotCString.reset(
866 p: new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
867 }
868
869 auto Report =
870 std::make_unique<PathSensitiveBugReport>(args&: *BT_NotCString, args&: WarningMsg, args&: N);
871
872 Report->addRange(R: S->getSourceRange());
873 C.emitReport(R: std::move(Report));
874 }
875}
876
877void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
878 ProgramStateRef State) const {
879 if (ExplodedNode *N = C.generateErrorNode(State)) {
880 if (!BT_AdditionOverflow) {
881 // FIXME: This call uses the word "API" as the description of the bug;
882 // it should be replaced by a better error message (if this unlikely
883 // situation continues to exist as a separate bug type).
884 BT_AdditionOverflow.reset(
885 p: new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
886 }
887
888 // This isn't a great error message, but this should never occur in real
889 // code anyway -- you'd have to create a buffer longer than a size_t can
890 // represent, which is sort of a contradiction.
891 const char *WarningMsg =
892 "This expression will create a string whose length is too big to "
893 "be represented as a size_t";
894
895 auto Report = std::make_unique<PathSensitiveBugReport>(args&: *BT_AdditionOverflow,
896 args&: WarningMsg, args&: N);
897 C.emitReport(R: std::move(Report));
898 }
899}
900
901ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
902 ProgramStateRef state,
903 NonLoc left,
904 NonLoc right) const {
905 // If out-of-bounds checking is turned off, skip the rest.
906 if (!Filter.CheckCStringOutOfBounds)
907 return state;
908
909 // If a previous check has failed, propagate the failure.
910 if (!state)
911 return nullptr;
912
913 SValBuilder &svalBuilder = C.getSValBuilder();
914 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
915
916 QualType sizeTy = svalBuilder.getContext().getSizeType();
917 const llvm::APSInt &maxValInt = BVF.getMaxValue(T: sizeTy);
918 NonLoc maxVal = svalBuilder.makeIntVal(integer: maxValInt);
919
920 SVal maxMinusRight;
921 if (isa<nonloc::ConcreteInt>(Val: right)) {
922 maxMinusRight = svalBuilder.evalBinOpNN(state, op: BO_Sub, lhs: maxVal, rhs: right,
923 resultTy: sizeTy);
924 } else {
925 // Try switching the operands. (The order of these two assignments is
926 // important!)
927 maxMinusRight = svalBuilder.evalBinOpNN(state, op: BO_Sub, lhs: maxVal, rhs: left,
928 resultTy: sizeTy);
929 left = right;
930 }
931
932 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
933 QualType cmpTy = svalBuilder.getConditionType();
934 // If left > max - right, we have an overflow.
935 SVal willOverflow = svalBuilder.evalBinOpNN(state, op: BO_GT, lhs: left,
936 rhs: *maxMinusRightNL, resultTy: cmpTy);
937
938 ProgramStateRef stateOverflow, stateOkay;
939 std::tie(args&: stateOverflow, args&: stateOkay) =
940 state->assume(Cond: willOverflow.castAs<DefinedOrUnknownSVal>());
941
942 if (stateOverflow && !stateOkay) {
943 // We have an overflow. Emit a bug report.
944 emitAdditionOverflowBug(C, State: stateOverflow);
945 return nullptr;
946 }
947
948 // From now on, assume an overflow didn't occur.
949 assert(stateOkay);
950 state = stateOkay;
951 }
952
953 return state;
954}
955
956ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
957 const MemRegion *MR,
958 SVal strLength) {
959 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
960
961 MR = MR->StripCasts();
962
963 switch (MR->getKind()) {
964 case MemRegion::StringRegionKind:
965 // FIXME: This can happen if we strcpy() into a string region. This is
966 // undefined [C99 6.4.5p6], but we should still warn about it.
967 return state;
968
969 case MemRegion::SymbolicRegionKind:
970 case MemRegion::AllocaRegionKind:
971 case MemRegion::NonParamVarRegionKind:
972 case MemRegion::ParamVarRegionKind:
973 case MemRegion::FieldRegionKind:
974 case MemRegion::ObjCIvarRegionKind:
975 // These are the types we can currently track string lengths for.
976 break;
977
978 case MemRegion::ElementRegionKind:
979 // FIXME: Handle element regions by upper-bounding the parent region's
980 // string length.
981 return state;
982
983 default:
984 // Other regions (mostly non-data) can't have a reliable C string length.
985 // For now, just ignore the change.
986 // FIXME: These are rare but not impossible. We should output some kind of
987 // warning for things like strcpy((char[]){'a', 0}, "b");
988 return state;
989 }
990
991 if (strLength.isUnknown())
992 return state->remove<CStringLength>(K: MR);
993
994 return state->set<CStringLength>(K: MR, E: strLength);
995}
996
997SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
998 ProgramStateRef &state,
999 const Expr *Ex,
1000 const MemRegion *MR,
1001 bool hypothetical) {
1002 if (!hypothetical) {
1003 // If there's a recorded length, go ahead and return it.
1004 const SVal *Recorded = state->get<CStringLength>(key: MR);
1005 if (Recorded)
1006 return *Recorded;
1007 }
1008
1009 // Otherwise, get a new symbol and update the state.
1010 SValBuilder &svalBuilder = C.getSValBuilder();
1011 QualType sizeTy = svalBuilder.getContext().getSizeType();
1012 SVal strLength = svalBuilder.getMetadataSymbolVal(symbolTag: CStringChecker::getTag(),
1013 region: MR, expr: Ex, type: sizeTy,
1014 LCtx: C.getLocationContext(),
1015 count: C.blockCount());
1016
1017 if (!hypothetical) {
1018 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
1019 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
1020 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
1021 const llvm::APSInt &maxValInt = BVF.getMaxValue(T: sizeTy);
1022 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(RawValue: 4);
1023 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(Op: BO_Div, V1: maxValInt,
1024 V2: fourInt);
1025 NonLoc maxLength = svalBuilder.makeIntVal(integer: *maxLengthInt);
1026 SVal evalLength = svalBuilder.evalBinOpNN(state, op: BO_LE, lhs: *strLn, rhs: maxLength,
1027 resultTy: svalBuilder.getConditionType());
1028 state = state->assume(Cond: evalLength.castAs<DefinedOrUnknownSVal>(), Assumption: true);
1029 }
1030 state = state->set<CStringLength>(K: MR, E: strLength);
1031 }
1032
1033 return strLength;
1034}
1035
1036SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1037 const Expr *Ex, SVal Buf,
1038 bool hypothetical) const {
1039 const MemRegion *MR = Buf.getAsRegion();
1040 if (!MR) {
1041 // If we can't get a region, see if it's something we /know/ isn't a
1042 // C string. In the context of locations, the only time we can issue such
1043 // a warning is for labels.
1044 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1045 if (Filter.CheckCStringNotNullTerm) {
1046 SmallString<120> buf;
1047 llvm::raw_svector_ostream os(buf);
1048 assert(CurrentFunctionDescription);
1049 os << "Argument to " << CurrentFunctionDescription
1050 << " is the address of the label '" << Label->getLabel()->getName()
1051 << "', which is not a null-terminated string";
1052
1053 emitNotCStringBug(C, State: state, S: Ex, WarningMsg: os.str());
1054 }
1055 return UndefinedVal();
1056 }
1057
1058 // If it's not a region and not a label, give up.
1059 return UnknownVal();
1060 }
1061
1062 // If we have a region, strip casts from it and see if we can figure out
1063 // its length. For anything we can't figure out, just return UnknownVal.
1064 MR = MR->StripCasts();
1065
1066 switch (MR->getKind()) {
1067 case MemRegion::StringRegionKind: {
1068 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1069 // so we can assume that the byte length is the correct C string length.
1070 SValBuilder &svalBuilder = C.getSValBuilder();
1071 QualType sizeTy = svalBuilder.getContext().getSizeType();
1072 const StringLiteral *strLit = cast<StringRegion>(Val: MR)->getStringLiteral();
1073 return svalBuilder.makeIntVal(integer: strLit->getLength(), type: sizeTy);
1074 }
1075 case MemRegion::NonParamVarRegionKind: {
1076 // If we have a global constant with a string literal initializer,
1077 // compute the initializer's length.
1078 const VarDecl *Decl = cast<NonParamVarRegion>(Val: MR)->getDecl();
1079 if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
1080 if (const Expr *Init = Decl->getInit()) {
1081 if (auto *StrLit = dyn_cast<StringLiteral>(Val: Init)) {
1082 SValBuilder &SvalBuilder = C.getSValBuilder();
1083 QualType SizeTy = SvalBuilder.getContext().getSizeType();
1084 return SvalBuilder.makeIntVal(integer: StrLit->getLength(), type: SizeTy);
1085 }
1086 }
1087 }
1088 [[fallthrough]];
1089 }
1090 case MemRegion::SymbolicRegionKind:
1091 case MemRegion::AllocaRegionKind:
1092 case MemRegion::ParamVarRegionKind:
1093 case MemRegion::FieldRegionKind:
1094 case MemRegion::ObjCIvarRegionKind:
1095 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1096 case MemRegion::CompoundLiteralRegionKind:
1097 // FIXME: Can we track this? Is it necessary?
1098 return UnknownVal();
1099 case MemRegion::ElementRegionKind:
1100 // FIXME: How can we handle this? It's not good enough to subtract the
1101 // offset from the base string length; consider "123\x00567" and &a[5].
1102 return UnknownVal();
1103 default:
1104 // Other regions (mostly non-data) can't have a reliable C string length.
1105 // In this case, an error is emitted and UndefinedVal is returned.
1106 // The caller should always be prepared to handle this case.
1107 if (Filter.CheckCStringNotNullTerm) {
1108 SmallString<120> buf;
1109 llvm::raw_svector_ostream os(buf);
1110
1111 assert(CurrentFunctionDescription);
1112 os << "Argument to " << CurrentFunctionDescription << " is ";
1113
1114 if (SummarizeRegion(os, Ctx&: C.getASTContext(), MR))
1115 os << ", which is not a null-terminated string";
1116 else
1117 os << "not a null-terminated string";
1118
1119 emitNotCStringBug(C, State: state, S: Ex, WarningMsg: os.str());
1120 }
1121 return UndefinedVal();
1122 }
1123}
1124
1125const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1126 ProgramStateRef &state, const Expr *expr, SVal val) const {
1127
1128 // Get the memory region pointed to by the val.
1129 const MemRegion *bufRegion = val.getAsRegion();
1130 if (!bufRegion)
1131 return nullptr;
1132
1133 // Strip casts off the memory region.
1134 bufRegion = bufRegion->StripCasts();
1135
1136 // Cast the memory region to a string region.
1137 const StringRegion *strRegion= dyn_cast<StringRegion>(Val: bufRegion);
1138 if (!strRegion)
1139 return nullptr;
1140
1141 // Return the actual string in the string region.
1142 return strRegion->getStringLiteral();
1143}
1144
1145bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1146 SVal BufVal, QualType BufTy,
1147 SVal LengthVal, QualType LengthTy) {
1148 // If we do not know that the buffer is long enough we return 'true'.
1149 // Otherwise the parent region of this field region would also get
1150 // invalidated, which would lead to warnings based on an unknown state.
1151
1152 if (LengthVal.isUnknown())
1153 return false;
1154
1155 // Originally copied from CheckBufferAccess and CheckLocation.
1156 SValBuilder &SB = C.getSValBuilder();
1157 ASTContext &Ctx = C.getASTContext();
1158
1159 QualType PtrTy = Ctx.getPointerType(T: Ctx.CharTy);
1160
1161 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1162 if (!Length)
1163 return true; // cf top comment.
1164
1165 // Compute the offset of the last element to be accessed: size-1.
1166 NonLoc One = SB.makeIntVal(integer: 1, type: LengthTy).castAs<NonLoc>();
1167 SVal Offset = SB.evalBinOpNN(state: State, op: BO_Sub, lhs: *Length, rhs: One, resultTy: LengthTy);
1168 if (Offset.isUnknown())
1169 return true; // cf top comment
1170 NonLoc LastOffset = Offset.castAs<NonLoc>();
1171
1172 // Check that the first buffer is sufficiently long.
1173 SVal BufStart = SB.evalCast(V: BufVal, CastTy: PtrTy, OriginalTy: BufTy);
1174 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1175 if (!BufLoc)
1176 return true; // cf top comment.
1177
1178 SVal BufEnd = SB.evalBinOpLN(state: State, op: BO_Add, lhs: *BufLoc, rhs: LastOffset, resultTy: PtrTy);
1179
1180 // Check for out of bound array element access.
1181 const MemRegion *R = BufEnd.getAsRegion();
1182 if (!R)
1183 return true; // cf top comment.
1184
1185 const ElementRegion *ER = dyn_cast<ElementRegion>(Val: R);
1186 if (!ER)
1187 return true; // cf top comment.
1188
1189 // FIXME: Does this crash when a non-standard definition
1190 // of a library function is encountered?
1191 assert(ER->getValueType() == C.getASTContext().CharTy &&
1192 "isFirstBufInBound should only be called with char* ElementRegions");
1193
1194 // Get the size of the array.
1195 const SubRegion *superReg = cast<SubRegion>(Val: ER->getSuperRegion());
1196 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, MR: superReg, SVB&: SB);
1197
1198 // Get the index of the accessed element.
1199 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1200
1201 ProgramStateRef StInBound = State->assumeInBound(idx: Idx, upperBound: SizeDV, assumption: true);
1202
1203 return static_cast<bool>(StInBound);
1204}
1205
1206ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1207 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1208 SVal SizeV, QualType SizeTy) {
1209 auto InvalidationTraitOperations =
1210 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1211 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1212 // If destination buffer is a field region and access is in bound, do
1213 // not invalidate its super region.
1214 if (MemRegion::FieldRegionKind == R->getKind() &&
1215 isFirstBufInBound(C, State: S, BufVal: BufV, BufTy, LengthVal: SizeV, LengthTy: SizeTy)) {
1216 ITraits.setTrait(
1217 MR: R,
1218 IK: RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1219 }
1220 return false;
1221 };
1222
1223 return invalidateBufferAux(C, State: S, Ex: BufE, V: BufV, InvalidationTraitOperations);
1224}
1225
1226ProgramStateRef
1227CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1228 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1229 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1230 const MemRegion *R) {
1231 return isa<FieldRegion>(Val: R);
1232 };
1233
1234 return invalidateBufferAux(C, State: S, Ex: BufE, V: BufV, InvalidationTraitOperations);
1235}
1236
1237ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1238 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1239 auto InvalidationTraitOperations =
1240 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1241 if (MemRegion::FieldRegionKind == R->getKind())
1242 ITraits.setTrait(
1243 MR: R,
1244 IK: RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1245 return false;
1246 };
1247
1248 return invalidateBufferAux(C, State: S, Ex: BufE, V: BufV, InvalidationTraitOperations);
1249}
1250
1251ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1252 ProgramStateRef S,
1253 const Expr *BufE,
1254 SVal BufV) {
1255 auto InvalidationTraitOperations =
1256 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1257 ITraits.setTrait(
1258 MR: R->getBaseRegion(),
1259 IK: RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1260 ITraits.setTrait(MR: R,
1261 IK: RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1262 return true;
1263 };
1264
1265 return invalidateBufferAux(C, State: S, Ex: BufE, V: BufV, InvalidationTraitOperations);
1266}
1267
1268ProgramStateRef CStringChecker::invalidateBufferAux(
1269 CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1270 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1271 const MemRegion *)>
1272 InvalidationTraitOperations) {
1273 std::optional<Loc> L = V.getAs<Loc>();
1274 if (!L)
1275 return State;
1276
1277 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1278 // some assumptions about the value that CFRefCount can't. Even so, it should
1279 // probably be refactored.
1280 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1281 const MemRegion *R = MR->getRegion()->StripCasts();
1282
1283 // Are we dealing with an ElementRegion? If so, we should be invalidating
1284 // the super-region.
1285 if (const ElementRegion *ER = dyn_cast<ElementRegion>(Val: R)) {
1286 R = ER->getSuperRegion();
1287 // FIXME: What about layers of ElementRegions?
1288 }
1289
1290 // Invalidate this region.
1291 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1292 RegionAndSymbolInvalidationTraits ITraits;
1293 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1294
1295 return State->invalidateRegions(Regions: R, E, BlockCount: C.blockCount(), LCtx,
1296 CausesPointerEscape, IS: nullptr, Call: nullptr,
1297 ITraits: &ITraits);
1298 }
1299
1300 // If we have a non-region value by chance, just remove the binding.
1301 // FIXME: is this necessary or correct? This handles the non-Region
1302 // cases. Is it ever valid to store to these?
1303 return State->killBinding(LV: *L);
1304}
1305
1306bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1307 const MemRegion *MR) {
1308 switch (MR->getKind()) {
1309 case MemRegion::FunctionCodeRegionKind: {
1310 if (const auto *FD = cast<FunctionCodeRegion>(Val: MR)->getDecl())
1311 os << "the address of the function '" << *FD << '\'';
1312 else
1313 os << "the address of a function";
1314 return true;
1315 }
1316 case MemRegion::BlockCodeRegionKind:
1317 os << "block text";
1318 return true;
1319 case MemRegion::BlockDataRegionKind:
1320 os << "a block";
1321 return true;
1322 case MemRegion::CXXThisRegionKind:
1323 case MemRegion::CXXTempObjectRegionKind:
1324 os << "a C++ temp object of type "
1325 << cast<TypedValueRegion>(Val: MR)->getValueType();
1326 return true;
1327 case MemRegion::NonParamVarRegionKind:
1328 os << "a variable of type" << cast<TypedValueRegion>(Val: MR)->getValueType();
1329 return true;
1330 case MemRegion::ParamVarRegionKind:
1331 os << "a parameter of type" << cast<TypedValueRegion>(Val: MR)->getValueType();
1332 return true;
1333 case MemRegion::FieldRegionKind:
1334 os << "a field of type " << cast<TypedValueRegion>(Val: MR)->getValueType();
1335 return true;
1336 case MemRegion::ObjCIvarRegionKind:
1337 os << "an instance variable of type "
1338 << cast<TypedValueRegion>(Val: MR)->getValueType();
1339 return true;
1340 default:
1341 return false;
1342 }
1343}
1344
1345bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1346 const Expr *Size, CheckerContext &C,
1347 ProgramStateRef &State) {
1348 SVal MemVal = C.getSVal(S: DstBuffer);
1349 SVal SizeVal = C.getSVal(S: Size);
1350 const MemRegion *MR = MemVal.getAsRegion();
1351 if (!MR)
1352 return false;
1353
1354 // We're about to model memset by producing a "default binding" in the Store.
1355 // Our current implementation - RegionStore - doesn't support default bindings
1356 // that don't cover the whole base region. So we should first get the offset
1357 // and the base region to figure out whether the offset of buffer is 0.
1358 RegionOffset Offset = MR->getAsOffset();
1359 const MemRegion *BR = Offset.getRegion();
1360
1361 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1362 if (!SizeNL)
1363 return false;
1364
1365 SValBuilder &svalBuilder = C.getSValBuilder();
1366 ASTContext &Ctx = C.getASTContext();
1367
1368 // void *memset(void *dest, int ch, size_t count);
1369 // For now we can only handle the case of offset is 0 and concrete char value.
1370 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1371 Offset.getOffset() == 0) {
1372 // Get the base region's size.
1373 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, MR: BR, SVB&: svalBuilder);
1374
1375 ProgramStateRef StateWholeReg, StateNotWholeReg;
1376 std::tie(args&: StateWholeReg, args&: StateNotWholeReg) =
1377 State->assume(Cond: svalBuilder.evalEQ(state: State, lhs: SizeDV, rhs: *SizeNL));
1378
1379 // With the semantic of 'memset()', we should convert the CharVal to
1380 // unsigned char.
1381 CharVal = svalBuilder.evalCast(V: CharVal, CastTy: Ctx.UnsignedCharTy, OriginalTy: Ctx.IntTy);
1382
1383 ProgramStateRef StateNullChar, StateNonNullChar;
1384 std::tie(args&: StateNullChar, args&: StateNonNullChar) =
1385 assumeZero(C, State, V: CharVal, Ty: Ctx.UnsignedCharTy);
1386
1387 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1388 !StateNonNullChar) {
1389 // If the 'memset()' acts on the whole region of destination buffer and
1390 // the value of the second argument of 'memset()' is zero, bind the second
1391 // argument's value to the destination buffer with 'default binding'.
1392 // FIXME: Since there is no perfect way to bind the non-zero character, we
1393 // can only deal with zero value here. In the future, we need to deal with
1394 // the binding of non-zero value in the case of whole region.
1395 State = State->bindDefaultZero(loc: svalBuilder.makeLoc(region: BR),
1396 LCtx: C.getLocationContext());
1397 } else {
1398 // If the destination buffer's extent is not equal to the value of
1399 // third argument, just invalidate buffer.
1400 State = invalidateDestinationBufferBySize(C, S: State, BufE: DstBuffer, BufV: MemVal,
1401 SizeV: SizeVal, SizeTy: Size->getType());
1402 }
1403
1404 if (StateNullChar && !StateNonNullChar) {
1405 // If the value of the second argument of 'memset()' is zero, set the
1406 // string length of destination buffer to 0 directly.
1407 State = setCStringLength(state: State, MR,
1408 strLength: svalBuilder.makeZeroVal(type: Ctx.getSizeType()));
1409 } else if (!StateNullChar && StateNonNullChar) {
1410 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1411 symbolTag: CStringChecker::getTag(), region: MR, expr: DstBuffer, type: Ctx.getSizeType(),
1412 LCtx: C.getLocationContext(), count: C.blockCount());
1413
1414 // If the value of second argument is not zero, then the string length
1415 // is at least the size argument.
1416 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1417 state: State, op: BO_GE, lhs: NewStrLen, rhs: SizeVal, type: svalBuilder.getConditionType());
1418
1419 State = setCStringLength(
1420 state: State->assume(Cond: NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), Assumption: true),
1421 MR, strLength: NewStrLen);
1422 }
1423 } else {
1424 // If the offset is not zero and char value is not concrete, we can do
1425 // nothing but invalidate the buffer.
1426 State = invalidateDestinationBufferBySize(C, S: State, BufE: DstBuffer, BufV: MemVal,
1427 SizeV: SizeVal, SizeTy: Size->getType());
1428 }
1429 return true;
1430}
1431
1432//===----------------------------------------------------------------------===//
1433// evaluation of individual function calls.
1434//===----------------------------------------------------------------------===//
1435
1436void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1437 ProgramStateRef state, SizeArgExpr Size,
1438 DestinationArgExpr Dest,
1439 SourceArgExpr Source, bool Restricted,
1440 bool IsMempcpy, CharKind CK) const {
1441 CurrentFunctionDescription = "memory copy function";
1442
1443 // See if the size argument is zero.
1444 const LocationContext *LCtx = C.getLocationContext();
1445 SVal sizeVal = state->getSVal(Ex: Size.Expression, LCtx);
1446 QualType sizeTy = Size.Expression->getType();
1447
1448 ProgramStateRef stateZeroSize, stateNonZeroSize;
1449 std::tie(args&: stateZeroSize, args&: stateNonZeroSize) =
1450 assumeZero(C, State: state, V: sizeVal, Ty: sizeTy);
1451
1452 // Get the value of the Dest.
1453 SVal destVal = state->getSVal(Ex: Dest.Expression, LCtx);
1454
1455 // If the size is zero, there won't be any actual memory access, so
1456 // just bind the return value to the destination buffer and return.
1457 if (stateZeroSize && !stateNonZeroSize) {
1458 stateZeroSize =
1459 stateZeroSize->BindExpr(S: Call.getOriginExpr(), LCtx, V: destVal);
1460 C.addTransition(State: stateZeroSize);
1461 return;
1462 }
1463
1464 // If the size can be nonzero, we have to check the other arguments.
1465 if (stateNonZeroSize) {
1466 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1467 // to the size of the destination buffer, then emit a warning
1468 // that an attacker may provoke a buffer overflow error.
1469 state = stateNonZeroSize;
1470
1471 // Ensure the destination is not null. If it is NULL there will be a
1472 // NULL pointer dereference.
1473 state = checkNonNull(C, State: state, Arg: Dest, l: destVal);
1474 if (!state)
1475 return;
1476
1477 // Get the value of the Src.
1478 SVal srcVal = state->getSVal(Ex: Source.Expression, LCtx);
1479
1480 // Ensure the source is not null. If it is NULL there will be a
1481 // NULL pointer dereference.
1482 state = checkNonNull(C, State: state, Arg: Source, l: srcVal);
1483 if (!state)
1484 return;
1485
1486 // Ensure the accesses are valid and that the buffers do not overlap.
1487 state = CheckBufferAccess(C, State: state, Buffer: Dest, Size, Access: AccessKind::write, CK);
1488 state = CheckBufferAccess(C, State: state, Buffer: Source, Size, Access: AccessKind::read, CK);
1489
1490 if (Restricted)
1491 state = CheckOverlap(C, state, Size, First: Dest, Second: Source, CK);
1492
1493 if (!state)
1494 return;
1495
1496 // If this is mempcpy, get the byte after the last byte copied and
1497 // bind the expr.
1498 if (IsMempcpy) {
1499 // Get the byte after the last byte copied.
1500 SValBuilder &SvalBuilder = C.getSValBuilder();
1501 ASTContext &Ctx = SvalBuilder.getContext();
1502 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1503 SVal DestRegCharVal =
1504 SvalBuilder.evalCast(V: destVal, CastTy: CharPtrTy, OriginalTy: Dest.Expression->getType());
1505 SVal lastElement = C.getSValBuilder().evalBinOp(
1506 state, op: BO_Add, lhs: DestRegCharVal, rhs: sizeVal, type: Dest.Expression->getType());
1507 // If we don't know how much we copied, we can at least
1508 // conjure a return value for later.
1509 if (lastElement.isUnknown())
1510 lastElement = C.getSValBuilder().conjureSymbolVal(
1511 symbolTag: nullptr, expr: Call.getOriginExpr(), LCtx, count: C.blockCount());
1512
1513 // The byte after the last byte copied is the return value.
1514 state = state->BindExpr(S: Call.getOriginExpr(), LCtx, V: lastElement);
1515 } else {
1516 // All other copies return the destination buffer.
1517 // (Well, bcopy() has a void return type, but this won't hurt.)
1518 state = state->BindExpr(S: Call.getOriginExpr(), LCtx, V: destVal);
1519 }
1520
1521 // Invalidate the destination (regular invalidation without pointer-escaping
1522 // the address of the top-level region).
1523 // FIXME: Even if we can't perfectly model the copy, we should see if we
1524 // can use LazyCompoundVals to copy the source values into the destination.
1525 // This would probably remove any existing bindings past the end of the
1526 // copied region, but that's still an improvement over blank invalidation.
1527 state = invalidateDestinationBufferBySize(
1528 C, S: state, BufE: Dest.Expression, BufV: C.getSVal(S: Dest.Expression), SizeV: sizeVal,
1529 SizeTy: Size.Expression->getType());
1530
1531 // Invalidate the source (const-invalidation without const-pointer-escaping
1532 // the address of the top-level region).
1533 state = invalidateSourceBuffer(C, S: state, BufE: Source.Expression,
1534 BufV: C.getSVal(S: Source.Expression));
1535
1536 C.addTransition(State: state);
1537 }
1538}
1539
1540void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1541 CharKind CK) const {
1542 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1543 // The return value is the address of the destination buffer.
1544 DestinationArgExpr Dest = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
1545 SourceArgExpr Src = {{.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1}};
1546 SizeArgExpr Size = {{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}};
1547
1548 ProgramStateRef State = C.getState();
1549
1550 constexpr bool IsRestricted = true;
1551 constexpr bool IsMempcpy = false;
1552 evalCopyCommon(C, Call, state: State, Size, Dest, Source: Src, Restricted: IsRestricted, IsMempcpy, CK);
1553}
1554
1555void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1556 CharKind CK) const {
1557 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1558 // The return value is a pointer to the byte following the last written byte.
1559 DestinationArgExpr Dest = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
1560 SourceArgExpr Src = {{.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1}};
1561 SizeArgExpr Size = {{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}};
1562
1563 constexpr bool IsRestricted = true;
1564 constexpr bool IsMempcpy = true;
1565 evalCopyCommon(C, Call, state: C.getState(), Size, Dest, Source: Src, Restricted: IsRestricted,
1566 IsMempcpy, CK);
1567}
1568
1569void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1570 CharKind CK) const {
1571 // void *memmove(void *dst, const void *src, size_t n);
1572 // The return value is the address of the destination buffer.
1573 DestinationArgExpr Dest = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
1574 SourceArgExpr Src = {{.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1}};
1575 SizeArgExpr Size = {{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}};
1576
1577 constexpr bool IsRestricted = false;
1578 constexpr bool IsMempcpy = false;
1579 evalCopyCommon(C, Call, state: C.getState(), Size, Dest, Source: Src, Restricted: IsRestricted,
1580 IsMempcpy, CK);
1581}
1582
1583void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1584 // void bcopy(const void *src, void *dst, size_t n);
1585 SourceArgExpr Src{{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
1586 DestinationArgExpr Dest = {{.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1}};
1587 SizeArgExpr Size = {{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}};
1588
1589 constexpr bool IsRestricted = false;
1590 constexpr bool IsMempcpy = false;
1591 evalCopyCommon(C, Call, state: C.getState(), Size, Dest, Source: Src, Restricted: IsRestricted,
1592 IsMempcpy, CK: CharKind::Regular);
1593}
1594
1595void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1596 CharKind CK) const {
1597 // int memcmp(const void *s1, const void *s2, size_t n);
1598 CurrentFunctionDescription = "memory comparison function";
1599
1600 AnyArgExpr Left = {.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0};
1601 AnyArgExpr Right = {.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1};
1602 SizeArgExpr Size = {{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}};
1603
1604 ProgramStateRef State = C.getState();
1605 SValBuilder &Builder = C.getSValBuilder();
1606 const LocationContext *LCtx = C.getLocationContext();
1607
1608 // See if the size argument is zero.
1609 SVal sizeVal = State->getSVal(Ex: Size.Expression, LCtx);
1610 QualType sizeTy = Size.Expression->getType();
1611
1612 ProgramStateRef stateZeroSize, stateNonZeroSize;
1613 std::tie(args&: stateZeroSize, args&: stateNonZeroSize) =
1614 assumeZero(C, State, V: sizeVal, Ty: sizeTy);
1615
1616 // If the size can be zero, the result will be 0 in that case, and we don't
1617 // have to check either of the buffers.
1618 if (stateZeroSize) {
1619 State = stateZeroSize;
1620 State = State->BindExpr(S: Call.getOriginExpr(), LCtx,
1621 V: Builder.makeZeroVal(type: Call.getResultType()));
1622 C.addTransition(State);
1623 }
1624
1625 // If the size can be nonzero, we have to check the other arguments.
1626 if (stateNonZeroSize) {
1627 State = stateNonZeroSize;
1628 // If we know the two buffers are the same, we know the result is 0.
1629 // First, get the two buffers' addresses. Another checker will have already
1630 // made sure they're not undefined.
1631 DefinedOrUnknownSVal LV =
1632 State->getSVal(Ex: Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1633 DefinedOrUnknownSVal RV =
1634 State->getSVal(Ex: Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1635
1636 // See if they are the same.
1637 ProgramStateRef SameBuffer, NotSameBuffer;
1638 std::tie(args&: SameBuffer, args&: NotSameBuffer) =
1639 State->assume(Cond: Builder.evalEQ(state: State, lhs: LV, rhs: RV));
1640
1641 // If the two arguments are the same buffer, we know the result is 0,
1642 // and we only need to check one size.
1643 if (SameBuffer && !NotSameBuffer) {
1644 State = SameBuffer;
1645 State = CheckBufferAccess(C, State, Buffer: Left, Size, Access: AccessKind::read);
1646 if (State) {
1647 State = SameBuffer->BindExpr(S: Call.getOriginExpr(), LCtx,
1648 V: Builder.makeZeroVal(type: Call.getResultType()));
1649 C.addTransition(State);
1650 }
1651 return;
1652 }
1653
1654 // If the two arguments might be different buffers, we have to check
1655 // the size of both of them.
1656 assert(NotSameBuffer);
1657 State = CheckBufferAccess(C, State, Buffer: Right, Size, Access: AccessKind::read, CK);
1658 State = CheckBufferAccess(C, State, Buffer: Left, Size, Access: AccessKind::read, CK);
1659 if (State) {
1660 // The return value is the comparison result, which we don't know.
1661 SVal CmpV = Builder.conjureSymbolVal(symbolTag: nullptr, expr: Call.getOriginExpr(), LCtx,
1662 count: C.blockCount());
1663 State = State->BindExpr(S: Call.getOriginExpr(), LCtx, V: CmpV);
1664 C.addTransition(State);
1665 }
1666 }
1667}
1668
1669void CStringChecker::evalstrLength(CheckerContext &C,
1670 const CallEvent &Call) const {
1671 // size_t strlen(const char *s);
1672 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1673}
1674
1675void CStringChecker::evalstrnLength(CheckerContext &C,
1676 const CallEvent &Call) const {
1677 // size_t strnlen(const char *s, size_t maxlen);
1678 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1679}
1680
1681void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1682 const CallEvent &Call,
1683 bool IsStrnlen) const {
1684 CurrentFunctionDescription = "string length function";
1685 ProgramStateRef state = C.getState();
1686 const LocationContext *LCtx = C.getLocationContext();
1687
1688 if (IsStrnlen) {
1689 const Expr *maxlenExpr = Call.getArgExpr(Index: 1);
1690 SVal maxlenVal = state->getSVal(Ex: maxlenExpr, LCtx);
1691
1692 ProgramStateRef stateZeroSize, stateNonZeroSize;
1693 std::tie(args&: stateZeroSize, args&: stateNonZeroSize) =
1694 assumeZero(C, State: state, V: maxlenVal, Ty: maxlenExpr->getType());
1695
1696 // If the size can be zero, the result will be 0 in that case, and we don't
1697 // have to check the string itself.
1698 if (stateZeroSize) {
1699 SVal zero = C.getSValBuilder().makeZeroVal(type: Call.getResultType());
1700 stateZeroSize = stateZeroSize->BindExpr(S: Call.getOriginExpr(), LCtx, V: zero);
1701 C.addTransition(State: stateZeroSize);
1702 }
1703
1704 // If the size is GUARANTEED to be zero, we're done!
1705 if (!stateNonZeroSize)
1706 return;
1707
1708 // Otherwise, record the assumption that the size is nonzero.
1709 state = stateNonZeroSize;
1710 }
1711
1712 // Check that the string argument is non-null.
1713 AnyArgExpr Arg = {.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0};
1714 SVal ArgVal = state->getSVal(Ex: Arg.Expression, LCtx);
1715 state = checkNonNull(C, State: state, Arg, l: ArgVal);
1716
1717 if (!state)
1718 return;
1719
1720 SVal strLength = getCStringLength(C, state, Ex: Arg.Expression, Buf: ArgVal);
1721
1722 // If the argument isn't a valid C string, there's no valid state to
1723 // transition to.
1724 if (strLength.isUndef())
1725 return;
1726
1727 DefinedOrUnknownSVal result = UnknownVal();
1728
1729 // If the check is for strnlen() then bind the return value to no more than
1730 // the maxlen value.
1731 if (IsStrnlen) {
1732 QualType cmpTy = C.getSValBuilder().getConditionType();
1733
1734 // It's a little unfortunate to be getting this again,
1735 // but it's not that expensive...
1736 const Expr *maxlenExpr = Call.getArgExpr(Index: 1);
1737 SVal maxlenVal = state->getSVal(Ex: maxlenExpr, LCtx);
1738
1739 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1740 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1741
1742 if (strLengthNL && maxlenValNL) {
1743 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1744
1745 // Check if the strLength is greater than the maxlen.
1746 std::tie(args&: stateStringTooLong, args&: stateStringNotTooLong) = state->assume(
1747 Cond: C.getSValBuilder()
1748 .evalBinOpNN(state, op: BO_GT, lhs: *strLengthNL, rhs: *maxlenValNL, resultTy: cmpTy)
1749 .castAs<DefinedOrUnknownSVal>());
1750
1751 if (stateStringTooLong && !stateStringNotTooLong) {
1752 // If the string is longer than maxlen, return maxlen.
1753 result = *maxlenValNL;
1754 } else if (stateStringNotTooLong && !stateStringTooLong) {
1755 // If the string is shorter than maxlen, return its length.
1756 result = *strLengthNL;
1757 }
1758 }
1759
1760 if (result.isUnknown()) {
1761 // If we don't have enough information for a comparison, there's
1762 // no guarantee the full string length will actually be returned.
1763 // All we know is the return value is the min of the string length
1764 // and the limit. This is better than nothing.
1765 result = C.getSValBuilder().conjureSymbolVal(
1766 symbolTag: nullptr, expr: Call.getOriginExpr(), LCtx, count: C.blockCount());
1767 NonLoc resultNL = result.castAs<NonLoc>();
1768
1769 if (strLengthNL) {
1770 state = state->assume(Cond: C.getSValBuilder().evalBinOpNN(
1771 state, op: BO_LE, lhs: resultNL, rhs: *strLengthNL, resultTy: cmpTy)
1772 .castAs<DefinedOrUnknownSVal>(), Assumption: true);
1773 }
1774
1775 if (maxlenValNL) {
1776 state = state->assume(Cond: C.getSValBuilder().evalBinOpNN(
1777 state, op: BO_LE, lhs: resultNL, rhs: *maxlenValNL, resultTy: cmpTy)
1778 .castAs<DefinedOrUnknownSVal>(), Assumption: true);
1779 }
1780 }
1781
1782 } else {
1783 // This is a plain strlen(), not strnlen().
1784 result = strLength.castAs<DefinedOrUnknownSVal>();
1785
1786 // If we don't know the length of the string, conjure a return
1787 // value, so it can be used in constraints, at least.
1788 if (result.isUnknown()) {
1789 result = C.getSValBuilder().conjureSymbolVal(
1790 symbolTag: nullptr, expr: Call.getOriginExpr(), LCtx, count: C.blockCount());
1791 }
1792 }
1793
1794 // Bind the return value.
1795 assert(!result.isUnknown() && "Should have conjured a value by now");
1796 state = state->BindExpr(S: Call.getOriginExpr(), LCtx, V: result);
1797 C.addTransition(State: state);
1798}
1799
1800void CStringChecker::evalStrcpy(CheckerContext &C,
1801 const CallEvent &Call) const {
1802 // char *strcpy(char *restrict dst, const char *restrict src);
1803 evalStrcpyCommon(C, Call,
1804 /* ReturnEnd = */ false,
1805 /* IsBounded = */ false,
1806 /* appendK = */ ConcatFnKind::none);
1807}
1808
1809void CStringChecker::evalStrncpy(CheckerContext &C,
1810 const CallEvent &Call) const {
1811 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1812 evalStrcpyCommon(C, Call,
1813 /* ReturnEnd = */ false,
1814 /* IsBounded = */ true,
1815 /* appendK = */ ConcatFnKind::none);
1816}
1817
1818void CStringChecker::evalStpcpy(CheckerContext &C,
1819 const CallEvent &Call) const {
1820 // char *stpcpy(char *restrict dst, const char *restrict src);
1821 evalStrcpyCommon(C, Call,
1822 /* ReturnEnd = */ true,
1823 /* IsBounded = */ false,
1824 /* appendK = */ ConcatFnKind::none);
1825}
1826
1827void CStringChecker::evalStrlcpy(CheckerContext &C,
1828 const CallEvent &Call) const {
1829 // size_t strlcpy(char *dest, const char *src, size_t size);
1830 evalStrcpyCommon(C, Call,
1831 /* ReturnEnd = */ true,
1832 /* IsBounded = */ true,
1833 /* appendK = */ ConcatFnKind::none,
1834 /* returnPtr = */ false);
1835}
1836
1837void CStringChecker::evalStrcat(CheckerContext &C,
1838 const CallEvent &Call) const {
1839 // char *strcat(char *restrict s1, const char *restrict s2);
1840 evalStrcpyCommon(C, Call,
1841 /* ReturnEnd = */ false,
1842 /* IsBounded = */ false,
1843 /* appendK = */ ConcatFnKind::strcat);
1844}
1845
1846void CStringChecker::evalStrncat(CheckerContext &C,
1847 const CallEvent &Call) const {
1848 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1849 evalStrcpyCommon(C, Call,
1850 /* ReturnEnd = */ false,
1851 /* IsBounded = */ true,
1852 /* appendK = */ ConcatFnKind::strcat);
1853}
1854
1855void CStringChecker::evalStrlcat(CheckerContext &C,
1856 const CallEvent &Call) const {
1857 // size_t strlcat(char *dst, const char *src, size_t size);
1858 // It will append at most size - strlen(dst) - 1 bytes,
1859 // NULL-terminating the result.
1860 evalStrcpyCommon(C, Call,
1861 /* ReturnEnd = */ false,
1862 /* IsBounded = */ true,
1863 /* appendK = */ ConcatFnKind::strlcat,
1864 /* returnPtr = */ false);
1865}
1866
1867void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1868 bool ReturnEnd, bool IsBounded,
1869 ConcatFnKind appendK,
1870 bool returnPtr) const {
1871 if (appendK == ConcatFnKind::none)
1872 CurrentFunctionDescription = "string copy function";
1873 else
1874 CurrentFunctionDescription = "string concatenation function";
1875
1876 ProgramStateRef state = C.getState();
1877 const LocationContext *LCtx = C.getLocationContext();
1878
1879 // Check that the destination is non-null.
1880 DestinationArgExpr Dst = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
1881 SVal DstVal = state->getSVal(Ex: Dst.Expression, LCtx);
1882 state = checkNonNull(C, State: state, Arg: Dst, l: DstVal);
1883 if (!state)
1884 return;
1885
1886 // Check that the source is non-null.
1887 SourceArgExpr srcExpr = {{.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1}};
1888 SVal srcVal = state->getSVal(Ex: srcExpr.Expression, LCtx);
1889 state = checkNonNull(C, State: state, Arg: srcExpr, l: srcVal);
1890 if (!state)
1891 return;
1892
1893 // Get the string length of the source.
1894 SVal strLength = getCStringLength(C, state, Ex: srcExpr.Expression, Buf: srcVal);
1895 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1896
1897 // Get the string length of the destination buffer.
1898 SVal dstStrLength = getCStringLength(C, state, Ex: Dst.Expression, Buf: DstVal);
1899 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1900
1901 // If the source isn't a valid C string, give up.
1902 if (strLength.isUndef())
1903 return;
1904
1905 SValBuilder &svalBuilder = C.getSValBuilder();
1906 QualType cmpTy = svalBuilder.getConditionType();
1907 QualType sizeTy = svalBuilder.getContext().getSizeType();
1908
1909 // These two values allow checking two kinds of errors:
1910 // - actual overflows caused by a source that doesn't fit in the destination
1911 // - potential overflows caused by a bound that could exceed the destination
1912 SVal amountCopied = UnknownVal();
1913 SVal maxLastElementIndex = UnknownVal();
1914 const char *boundWarning = nullptr;
1915
1916 // FIXME: Why do we choose the srcExpr if the access has no size?
1917 // Note that the 3rd argument of the call would be the size parameter.
1918 SizeArgExpr SrcExprAsSizeDummy = {
1919 {.Expression: srcExpr.Expression, .ArgumentIndex: srcExpr.ArgumentIndex}};
1920 state = CheckOverlap(
1921 C, state,
1922 Size: (IsBounded ? SizeArgExpr{{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}} : SrcExprAsSizeDummy),
1923 First: Dst, Second: srcExpr);
1924
1925 if (!state)
1926 return;
1927
1928 // If the function is strncpy, strncat, etc... it is bounded.
1929 if (IsBounded) {
1930 // Get the max number of characters to copy.
1931 SizeArgExpr lenExpr = {{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}};
1932 SVal lenVal = state->getSVal(Ex: lenExpr.Expression, LCtx);
1933
1934 // Protect against misdeclared strncpy().
1935 lenVal =
1936 svalBuilder.evalCast(V: lenVal, CastTy: sizeTy, OriginalTy: lenExpr.Expression->getType());
1937
1938 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1939
1940 // If we know both values, we might be able to figure out how much
1941 // we're copying.
1942 if (strLengthNL && lenValNL) {
1943 switch (appendK) {
1944 case ConcatFnKind::none:
1945 case ConcatFnKind::strcat: {
1946 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1947 // Check if the max number to copy is less than the length of the src.
1948 // If the bound is equal to the source length, strncpy won't null-
1949 // terminate the result!
1950 std::tie(args&: stateSourceTooLong, args&: stateSourceNotTooLong) = state->assume(
1951 Cond: svalBuilder
1952 .evalBinOpNN(state, op: BO_GE, lhs: *strLengthNL, rhs: *lenValNL, resultTy: cmpTy)
1953 .castAs<DefinedOrUnknownSVal>());
1954
1955 if (stateSourceTooLong && !stateSourceNotTooLong) {
1956 // Max number to copy is less than the length of the src, so the
1957 // actual strLength copied is the max number arg.
1958 state = stateSourceTooLong;
1959 amountCopied = lenVal;
1960
1961 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1962 // The source buffer entirely fits in the bound.
1963 state = stateSourceNotTooLong;
1964 amountCopied = strLength;
1965 }
1966 break;
1967 }
1968 case ConcatFnKind::strlcat:
1969 if (!dstStrLengthNL)
1970 return;
1971
1972 // amountCopied = min (size - dstLen - 1 , srcLen)
1973 SVal freeSpace = svalBuilder.evalBinOpNN(state, op: BO_Sub, lhs: *lenValNL,
1974 rhs: *dstStrLengthNL, resultTy: sizeTy);
1975 if (!isa<NonLoc>(Val: freeSpace))
1976 return;
1977 freeSpace =
1978 svalBuilder.evalBinOp(state, op: BO_Sub, lhs: freeSpace,
1979 rhs: svalBuilder.makeIntVal(integer: 1, type: sizeTy), type: sizeTy);
1980 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1981
1982 // While unlikely, it is possible that the subtraction is
1983 // too complex to compute, let's check whether it succeeded.
1984 if (!freeSpaceNL)
1985 return;
1986 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1987 state, op: BO_LE, lhs: *strLengthNL, rhs: *freeSpaceNL, resultTy: cmpTy);
1988
1989 ProgramStateRef TrueState, FalseState;
1990 std::tie(args&: TrueState, args&: FalseState) =
1991 state->assume(Cond: hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1992
1993 // srcStrLength <= size - dstStrLength -1
1994 if (TrueState && !FalseState) {
1995 amountCopied = strLength;
1996 }
1997
1998 // srcStrLength > size - dstStrLength -1
1999 if (!TrueState && FalseState) {
2000 amountCopied = freeSpace;
2001 }
2002
2003 if (TrueState && FalseState)
2004 amountCopied = UnknownVal();
2005 break;
2006 }
2007 }
2008 // We still want to know if the bound is known to be too large.
2009 if (lenValNL) {
2010 switch (appendK) {
2011 case ConcatFnKind::strcat:
2012 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
2013
2014 // Get the string length of the destination. If the destination is
2015 // memory that can't have a string length, we shouldn't be copying
2016 // into it anyway.
2017 if (dstStrLength.isUndef())
2018 return;
2019
2020 if (dstStrLengthNL) {
2021 maxLastElementIndex = svalBuilder.evalBinOpNN(
2022 state, op: BO_Add, lhs: *lenValNL, rhs: *dstStrLengthNL, resultTy: sizeTy);
2023
2024 boundWarning = "Size argument is greater than the free space in the "
2025 "destination buffer";
2026 }
2027 break;
2028 case ConcatFnKind::none:
2029 case ConcatFnKind::strlcat:
2030 // For strncpy and strlcat, this is just checking
2031 // that lenVal <= sizeof(dst).
2032 // (Yes, strncpy and strncat differ in how they treat termination.
2033 // strncat ALWAYS terminates, but strncpy doesn't.)
2034
2035 // We need a special case for when the copy size is zero, in which
2036 // case strncpy will do no work at all. Our bounds check uses n-1
2037 // as the last element accessed, so n == 0 is problematic.
2038 ProgramStateRef StateZeroSize, StateNonZeroSize;
2039 std::tie(args&: StateZeroSize, args&: StateNonZeroSize) =
2040 assumeZero(C, State: state, V: *lenValNL, Ty: sizeTy);
2041
2042 // If the size is known to be zero, we're done.
2043 if (StateZeroSize && !StateNonZeroSize) {
2044 if (returnPtr) {
2045 StateZeroSize =
2046 StateZeroSize->BindExpr(S: Call.getOriginExpr(), LCtx, V: DstVal);
2047 } else {
2048 if (appendK == ConcatFnKind::none) {
2049 // strlcpy returns strlen(src)
2050 StateZeroSize = StateZeroSize->BindExpr(S: Call.getOriginExpr(),
2051 LCtx, V: strLength);
2052 } else {
2053 // strlcat returns strlen(src) + strlen(dst)
2054 SVal retSize = svalBuilder.evalBinOp(
2055 state, op: BO_Add, lhs: strLength, rhs: dstStrLength, type: sizeTy);
2056 StateZeroSize =
2057 StateZeroSize->BindExpr(S: Call.getOriginExpr(), LCtx, V: retSize);
2058 }
2059 }
2060 C.addTransition(State: StateZeroSize);
2061 return;
2062 }
2063
2064 // Otherwise, go ahead and figure out the last element we'll touch.
2065 // We don't record the non-zero assumption here because we can't
2066 // be sure. We won't warn on a possible zero.
2067 NonLoc one = svalBuilder.makeIntVal(integer: 1, type: sizeTy).castAs<NonLoc>();
2068 maxLastElementIndex =
2069 svalBuilder.evalBinOpNN(state, op: BO_Sub, lhs: *lenValNL, rhs: one, resultTy: sizeTy);
2070 boundWarning = "Size argument is greater than the length of the "
2071 "destination buffer";
2072 break;
2073 }
2074 }
2075 } else {
2076 // The function isn't bounded. The amount copied should match the length
2077 // of the source buffer.
2078 amountCopied = strLength;
2079 }
2080
2081 assert(state);
2082
2083 // This represents the number of characters copied into the destination
2084 // buffer. (It may not actually be the strlen if the destination buffer
2085 // is not terminated.)
2086 SVal finalStrLength = UnknownVal();
2087 SVal strlRetVal = UnknownVal();
2088
2089 if (appendK == ConcatFnKind::none && !returnPtr) {
2090 // strlcpy returns the sizeof(src)
2091 strlRetVal = strLength;
2092 }
2093
2094 // If this is an appending function (strcat, strncat...) then set the
2095 // string length to strlen(src) + strlen(dst) since the buffer will
2096 // ultimately contain both.
2097 if (appendK != ConcatFnKind::none) {
2098 // Get the string length of the destination. If the destination is memory
2099 // that can't have a string length, we shouldn't be copying into it anyway.
2100 if (dstStrLength.isUndef())
2101 return;
2102
2103 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2104 strlRetVal = svalBuilder.evalBinOpNN(state, op: BO_Add, lhs: *strLengthNL,
2105 rhs: *dstStrLengthNL, resultTy: sizeTy);
2106 }
2107
2108 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2109
2110 // If we know both string lengths, we might know the final string length.
2111 if (amountCopiedNL && dstStrLengthNL) {
2112 // Make sure the two lengths together don't overflow a size_t.
2113 state = checkAdditionOverflow(C, state, left: *amountCopiedNL, right: *dstStrLengthNL);
2114 if (!state)
2115 return;
2116
2117 finalStrLength = svalBuilder.evalBinOpNN(state, op: BO_Add, lhs: *amountCopiedNL,
2118 rhs: *dstStrLengthNL, resultTy: sizeTy);
2119 }
2120
2121 // If we couldn't get a single value for the final string length,
2122 // we can at least bound it by the individual lengths.
2123 if (finalStrLength.isUnknown()) {
2124 // Try to get a "hypothetical" string length symbol, which we can later
2125 // set as a real value if that turns out to be the case.
2126 finalStrLength =
2127 getCStringLength(C, state, Ex: Call.getOriginExpr(), Buf: DstVal, hypothetical: true);
2128 assert(!finalStrLength.isUndef());
2129
2130 if (std::optional<NonLoc> finalStrLengthNL =
2131 finalStrLength.getAs<NonLoc>()) {
2132 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2133 // we overwrite dst string with the src
2134 // finalStrLength >= srcStrLength
2135 SVal sourceInResult = svalBuilder.evalBinOpNN(
2136 state, op: BO_GE, lhs: *finalStrLengthNL, rhs: *amountCopiedNL, resultTy: cmpTy);
2137 state = state->assume(Cond: sourceInResult.castAs<DefinedOrUnknownSVal>(),
2138 Assumption: true);
2139 if (!state)
2140 return;
2141 }
2142
2143 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2144 // we extend the dst string with the src
2145 // finalStrLength >= dstStrLength
2146 SVal destInResult = svalBuilder.evalBinOpNN(state, op: BO_GE,
2147 lhs: *finalStrLengthNL,
2148 rhs: *dstStrLengthNL,
2149 resultTy: cmpTy);
2150 state =
2151 state->assume(Cond: destInResult.castAs<DefinedOrUnknownSVal>(), Assumption: true);
2152 if (!state)
2153 return;
2154 }
2155 }
2156 }
2157
2158 } else {
2159 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2160 // the final string length will match the input string length.
2161 finalStrLength = amountCopied;
2162 }
2163
2164 SVal Result;
2165
2166 if (returnPtr) {
2167 // The final result of the function will either be a pointer past the last
2168 // copied element, or a pointer to the start of the destination buffer.
2169 Result = (ReturnEnd ? UnknownVal() : DstVal);
2170 } else {
2171 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2172 //strlcpy, strlcat
2173 Result = strlRetVal;
2174 else
2175 Result = finalStrLength;
2176 }
2177
2178 assert(state);
2179
2180 // If the destination is a MemRegion, try to check for a buffer overflow and
2181 // record the new string length.
2182 if (std::optional<loc::MemRegionVal> dstRegVal =
2183 DstVal.getAs<loc::MemRegionVal>()) {
2184 QualType ptrTy = Dst.Expression->getType();
2185
2186 // If we have an exact value on a bounded copy, use that to check for
2187 // overflows, rather than our estimate about how much is actually copied.
2188 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2189 SVal maxLastElement =
2190 svalBuilder.evalBinOpLN(state, op: BO_Add, lhs: *dstRegVal, rhs: *maxLastNL, resultTy: ptrTy);
2191
2192 // Check if the first byte of the destination is writable.
2193 state = CheckLocation(C, state, Buffer: Dst, Element: DstVal, Access: AccessKind::write);
2194 if (!state)
2195 return;
2196 // Check if the last byte of the destination is writable.
2197 state = CheckLocation(C, state, Buffer: Dst, Element: maxLastElement, Access: AccessKind::write);
2198 if (!state)
2199 return;
2200 }
2201
2202 // Then, if the final length is known...
2203 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2204 SVal lastElement = svalBuilder.evalBinOpLN(state, op: BO_Add, lhs: *dstRegVal,
2205 rhs: *knownStrLength, resultTy: ptrTy);
2206
2207 // ...and we haven't checked the bound, we'll check the actual copy.
2208 if (!boundWarning) {
2209 // Check if the first byte of the destination is writable.
2210 state = CheckLocation(C, state, Buffer: Dst, Element: DstVal, Access: AccessKind::write);
2211 if (!state)
2212 return;
2213 // Check if the last byte of the destination is writable.
2214 state = CheckLocation(C, state, Buffer: Dst, Element: lastElement, Access: AccessKind::write);
2215 if (!state)
2216 return;
2217 }
2218
2219 // If this is a stpcpy-style copy, the last element is the return value.
2220 if (returnPtr && ReturnEnd)
2221 Result = lastElement;
2222 }
2223
2224 // Invalidate the destination (regular invalidation without pointer-escaping
2225 // the address of the top-level region). This must happen before we set the
2226 // C string length because invalidation will clear the length.
2227 // FIXME: Even if we can't perfectly model the copy, we should see if we
2228 // can use LazyCompoundVals to copy the source values into the destination.
2229 // This would probably remove any existing bindings past the end of the
2230 // string, but that's still an improvement over blank invalidation.
2231 state = invalidateDestinationBufferBySize(C, S: state, BufE: Dst.Expression,
2232 BufV: *dstRegVal, SizeV: amountCopied,
2233 SizeTy: C.getASTContext().getSizeType());
2234
2235 // Invalidate the source (const-invalidation without const-pointer-escaping
2236 // the address of the top-level region).
2237 state = invalidateSourceBuffer(C, S: state, BufE: srcExpr.Expression, BufV: srcVal);
2238
2239 // Set the C string length of the destination, if we know it.
2240 if (IsBounded && (appendK == ConcatFnKind::none)) {
2241 // strncpy is annoying in that it doesn't guarantee to null-terminate
2242 // the result string. If the original string didn't fit entirely inside
2243 // the bound (including the null-terminator), we don't know how long the
2244 // result is.
2245 if (amountCopied != strLength)
2246 finalStrLength = UnknownVal();
2247 }
2248 state = setCStringLength(state, MR: dstRegVal->getRegion(), strLength: finalStrLength);
2249 }
2250
2251 assert(state);
2252
2253 if (returnPtr) {
2254 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2255 // overflow, we still need a result. Conjure a return value.
2256 if (ReturnEnd && Result.isUnknown()) {
2257 Result = svalBuilder.conjureSymbolVal(symbolTag: nullptr, expr: Call.getOriginExpr(), LCtx,
2258 count: C.blockCount());
2259 }
2260 }
2261 // Set the return value.
2262 state = state->BindExpr(S: Call.getOriginExpr(), LCtx, V: Result);
2263 C.addTransition(State: state);
2264}
2265
2266void CStringChecker::evalStrcmp(CheckerContext &C,
2267 const CallEvent &Call) const {
2268 //int strcmp(const char *s1, const char *s2);
2269 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2270}
2271
2272void CStringChecker::evalStrncmp(CheckerContext &C,
2273 const CallEvent &Call) const {
2274 //int strncmp(const char *s1, const char *s2, size_t n);
2275 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2276}
2277
2278void CStringChecker::evalStrcasecmp(CheckerContext &C,
2279 const CallEvent &Call) const {
2280 //int strcasecmp(const char *s1, const char *s2);
2281 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2282}
2283
2284void CStringChecker::evalStrncasecmp(CheckerContext &C,
2285 const CallEvent &Call) const {
2286 //int strncasecmp(const char *s1, const char *s2, size_t n);
2287 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2288}
2289
2290void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2291 bool IsBounded, bool IgnoreCase) const {
2292 CurrentFunctionDescription = "string comparison function";
2293 ProgramStateRef state = C.getState();
2294 const LocationContext *LCtx = C.getLocationContext();
2295
2296 // Check that the first string is non-null
2297 AnyArgExpr Left = {.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0};
2298 SVal LeftVal = state->getSVal(Ex: Left.Expression, LCtx);
2299 state = checkNonNull(C, State: state, Arg: Left, l: LeftVal);
2300 if (!state)
2301 return;
2302
2303 // Check that the second string is non-null.
2304 AnyArgExpr Right = {.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1};
2305 SVal RightVal = state->getSVal(Ex: Right.Expression, LCtx);
2306 state = checkNonNull(C, State: state, Arg: Right, l: RightVal);
2307 if (!state)
2308 return;
2309
2310 // Get the string length of the first string or give up.
2311 SVal LeftLength = getCStringLength(C, state, Ex: Left.Expression, Buf: LeftVal);
2312 if (LeftLength.isUndef())
2313 return;
2314
2315 // Get the string length of the second string or give up.
2316 SVal RightLength = getCStringLength(C, state, Ex: Right.Expression, Buf: RightVal);
2317 if (RightLength.isUndef())
2318 return;
2319
2320 // If we know the two buffers are the same, we know the result is 0.
2321 // First, get the two buffers' addresses. Another checker will have already
2322 // made sure they're not undefined.
2323 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2324 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2325
2326 // See if they are the same.
2327 SValBuilder &svalBuilder = C.getSValBuilder();
2328 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, lhs: LV, rhs: RV);
2329 ProgramStateRef StSameBuf, StNotSameBuf;
2330 std::tie(args&: StSameBuf, args&: StNotSameBuf) = state->assume(Cond: SameBuf);
2331
2332 // If the two arguments might be the same buffer, we know the result is 0,
2333 // and we only need to check one size.
2334 if (StSameBuf) {
2335 StSameBuf =
2336 StSameBuf->BindExpr(S: Call.getOriginExpr(), LCtx,
2337 V: svalBuilder.makeZeroVal(type: Call.getResultType()));
2338 C.addTransition(State: StSameBuf);
2339
2340 // If the two arguments are GUARANTEED to be the same, we're done!
2341 if (!StNotSameBuf)
2342 return;
2343 }
2344
2345 assert(StNotSameBuf);
2346 state = StNotSameBuf;
2347
2348 // At this point we can go about comparing the two buffers.
2349 // For now, we only do this if they're both known string literals.
2350
2351 // Attempt to extract string literals from both expressions.
2352 const StringLiteral *LeftStrLiteral =
2353 getCStringLiteral(C, state, expr: Left.Expression, val: LeftVal);
2354 const StringLiteral *RightStrLiteral =
2355 getCStringLiteral(C, state, expr: Right.Expression, val: RightVal);
2356 bool canComputeResult = false;
2357 SVal resultVal = svalBuilder.conjureSymbolVal(symbolTag: nullptr, expr: Call.getOriginExpr(),
2358 LCtx, count: C.blockCount());
2359
2360 if (LeftStrLiteral && RightStrLiteral) {
2361 StringRef LeftStrRef = LeftStrLiteral->getString();
2362 StringRef RightStrRef = RightStrLiteral->getString();
2363
2364 if (IsBounded) {
2365 // Get the max number of characters to compare.
2366 const Expr *lenExpr = Call.getArgExpr(Index: 2);
2367 SVal lenVal = state->getSVal(Ex: lenExpr, LCtx);
2368
2369 // If the length is known, we can get the right substrings.
2370 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, val: lenVal)) {
2371 // Create substrings of each to compare the prefix.
2372 LeftStrRef = LeftStrRef.substr(Start: 0, N: (size_t)len->getZExtValue());
2373 RightStrRef = RightStrRef.substr(Start: 0, N: (size_t)len->getZExtValue());
2374 canComputeResult = true;
2375 }
2376 } else {
2377 // This is a normal, unbounded strcmp.
2378 canComputeResult = true;
2379 }
2380
2381 if (canComputeResult) {
2382 // Real strcmp stops at null characters.
2383 size_t s1Term = LeftStrRef.find(C: '\0');
2384 if (s1Term != StringRef::npos)
2385 LeftStrRef = LeftStrRef.substr(Start: 0, N: s1Term);
2386
2387 size_t s2Term = RightStrRef.find(C: '\0');
2388 if (s2Term != StringRef::npos)
2389 RightStrRef = RightStrRef.substr(Start: 0, N: s2Term);
2390
2391 // Use StringRef's comparison methods to compute the actual result.
2392 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RHS: RightStrRef)
2393 : LeftStrRef.compare(RHS: RightStrRef);
2394
2395 // The strcmp function returns an integer greater than, equal to, or less
2396 // than zero, [c11, p7.24.4.2].
2397 if (compareRes == 0) {
2398 resultVal = svalBuilder.makeIntVal(integer: compareRes, type: Call.getResultType());
2399 }
2400 else {
2401 DefinedSVal zeroVal = svalBuilder.makeIntVal(integer: 0, type: Call.getResultType());
2402 // Constrain strcmp's result range based on the result of StringRef's
2403 // comparison methods.
2404 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2405 SVal compareWithZero =
2406 svalBuilder.evalBinOp(state, op, lhs: resultVal, rhs: zeroVal,
2407 type: svalBuilder.getConditionType());
2408 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2409 state = state->assume(Cond: compareWithZeroVal, Assumption: true);
2410 }
2411 }
2412 }
2413
2414 state = state->BindExpr(S: Call.getOriginExpr(), LCtx, V: resultVal);
2415
2416 // Record this as a possible path.
2417 C.addTransition(State: state);
2418}
2419
2420void CStringChecker::evalStrsep(CheckerContext &C,
2421 const CallEvent &Call) const {
2422 // char *strsep(char **stringp, const char *delim);
2423 // Verify whether the search string parameter matches the return type.
2424 SourceArgExpr SearchStrPtr = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
2425
2426 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2427 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2428 CharPtrTy.getUnqualifiedType())
2429 return;
2430
2431 CurrentFunctionDescription = "strsep()";
2432 ProgramStateRef State = C.getState();
2433 const LocationContext *LCtx = C.getLocationContext();
2434
2435 // Check that the search string pointer is non-null (though it may point to
2436 // a null string).
2437 SVal SearchStrVal = State->getSVal(Ex: SearchStrPtr.Expression, LCtx);
2438 State = checkNonNull(C, State, Arg: SearchStrPtr, l: SearchStrVal);
2439 if (!State)
2440 return;
2441
2442 // Check that the delimiter string is non-null.
2443 AnyArgExpr DelimStr = {.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1};
2444 SVal DelimStrVal = State->getSVal(Ex: DelimStr.Expression, LCtx);
2445 State = checkNonNull(C, State, Arg: DelimStr, l: DelimStrVal);
2446 if (!State)
2447 return;
2448
2449 SValBuilder &SVB = C.getSValBuilder();
2450 SVal Result;
2451 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2452 // Get the current value of the search string pointer, as a char*.
2453 Result = State->getSVal(LV: *SearchStrLoc, T: CharPtrTy);
2454
2455 // Invalidate the search string, representing the change of one delimiter
2456 // character to NUL.
2457 // As the replacement never overflows, do not invalidate its super region.
2458 State = invalidateDestinationBufferNeverOverflows(
2459 C, S: State, BufE: SearchStrPtr.Expression, BufV: Result);
2460
2461 // Overwrite the search string pointer. The new value is either an address
2462 // further along in the same string, or NULL if there are no more tokens.
2463 State =
2464 State->bindLoc(location: *SearchStrLoc,
2465 V: SVB.conjureSymbolVal(symbolTag: getTag(), expr: Call.getOriginExpr(),
2466 LCtx, type: CharPtrTy, count: C.blockCount()),
2467 LCtx);
2468 } else {
2469 assert(SearchStrVal.isUnknown());
2470 // Conjure a symbolic value. It's the best we can do.
2471 Result = SVB.conjureSymbolVal(symbolTag: nullptr, expr: Call.getOriginExpr(), LCtx,
2472 count: C.blockCount());
2473 }
2474
2475 // Set the return value, and finish.
2476 State = State->BindExpr(S: Call.getOriginExpr(), LCtx, V: Result);
2477 C.addTransition(State);
2478}
2479
2480// These should probably be moved into a C++ standard library checker.
2481void CStringChecker::evalStdCopy(CheckerContext &C,
2482 const CallEvent &Call) const {
2483 evalStdCopyCommon(C, Call);
2484}
2485
2486void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2487 const CallEvent &Call) const {
2488 evalStdCopyCommon(C, Call);
2489}
2490
2491void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2492 const CallEvent &Call) const {
2493 if (!Call.getArgExpr(Index: 2)->getType()->isPointerType())
2494 return;
2495
2496 ProgramStateRef State = C.getState();
2497
2498 const LocationContext *LCtx = C.getLocationContext();
2499
2500 // template <class _InputIterator, class _OutputIterator>
2501 // _OutputIterator
2502 // copy(_InputIterator __first, _InputIterator __last,
2503 // _OutputIterator __result)
2504
2505 // Invalidate the destination buffer
2506 const Expr *Dst = Call.getArgExpr(Index: 2);
2507 SVal DstVal = State->getSVal(Ex: Dst, LCtx);
2508 // FIXME: As we do not know how many items are copied, we also invalidate the
2509 // super region containing the target location.
2510 State =
2511 invalidateDestinationBufferAlwaysEscapeSuperRegion(C, S: State, BufE: Dst, BufV: DstVal);
2512
2513 SValBuilder &SVB = C.getSValBuilder();
2514
2515 SVal ResultVal =
2516 SVB.conjureSymbolVal(symbolTag: nullptr, expr: Call.getOriginExpr(), LCtx, count: C.blockCount());
2517 State = State->BindExpr(S: Call.getOriginExpr(), LCtx, V: ResultVal);
2518
2519 C.addTransition(State);
2520}
2521
2522void CStringChecker::evalMemset(CheckerContext &C,
2523 const CallEvent &Call) const {
2524 // void *memset(void *s, int c, size_t n);
2525 CurrentFunctionDescription = "memory set function";
2526
2527 DestinationArgExpr Buffer = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
2528 AnyArgExpr CharE = {.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1};
2529 SizeArgExpr Size = {{.Expression: Call.getArgExpr(Index: 2), .ArgumentIndex: 2}};
2530
2531 ProgramStateRef State = C.getState();
2532
2533 // See if the size argument is zero.
2534 const LocationContext *LCtx = C.getLocationContext();
2535 SVal SizeVal = C.getSVal(S: Size.Expression);
2536 QualType SizeTy = Size.Expression->getType();
2537
2538 ProgramStateRef ZeroSize, NonZeroSize;
2539 std::tie(args&: ZeroSize, args&: NonZeroSize) = assumeZero(C, State, V: SizeVal, Ty: SizeTy);
2540
2541 // Get the value of the memory area.
2542 SVal BufferPtrVal = C.getSVal(S: Buffer.Expression);
2543
2544 // If the size is zero, there won't be any actual memory access, so
2545 // just bind the return value to the buffer and return.
2546 if (ZeroSize && !NonZeroSize) {
2547 ZeroSize = ZeroSize->BindExpr(S: Call.getOriginExpr(), LCtx, V: BufferPtrVal);
2548 C.addTransition(State: ZeroSize);
2549 return;
2550 }
2551
2552 // Ensure the memory area is not null.
2553 // If it is NULL there will be a NULL pointer dereference.
2554 State = checkNonNull(C, State: NonZeroSize, Arg: Buffer, l: BufferPtrVal);
2555 if (!State)
2556 return;
2557
2558 State = CheckBufferAccess(C, State, Buffer, Size, Access: AccessKind::write);
2559 if (!State)
2560 return;
2561
2562 // According to the values of the arguments, bind the value of the second
2563 // argument to the destination buffer and set string length, or just
2564 // invalidate the destination buffer.
2565 if (!memsetAux(DstBuffer: Buffer.Expression, CharVal: C.getSVal(S: CharE.Expression),
2566 Size: Size.Expression, C, State))
2567 return;
2568
2569 State = State->BindExpr(S: Call.getOriginExpr(), LCtx, V: BufferPtrVal);
2570 C.addTransition(State);
2571}
2572
2573void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2574 CurrentFunctionDescription = "memory clearance function";
2575
2576 DestinationArgExpr Buffer = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
2577 SizeArgExpr Size = {{.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1}};
2578 SVal Zero = C.getSValBuilder().makeZeroVal(type: C.getASTContext().IntTy);
2579
2580 ProgramStateRef State = C.getState();
2581
2582 // See if the size argument is zero.
2583 SVal SizeVal = C.getSVal(S: Size.Expression);
2584 QualType SizeTy = Size.Expression->getType();
2585
2586 ProgramStateRef StateZeroSize, StateNonZeroSize;
2587 std::tie(args&: StateZeroSize, args&: StateNonZeroSize) =
2588 assumeZero(C, State, V: SizeVal, Ty: SizeTy);
2589
2590 // If the size is zero, there won't be any actual memory access,
2591 // In this case we just return.
2592 if (StateZeroSize && !StateNonZeroSize) {
2593 C.addTransition(State: StateZeroSize);
2594 return;
2595 }
2596
2597 // Get the value of the memory area.
2598 SVal MemVal = C.getSVal(S: Buffer.Expression);
2599
2600 // Ensure the memory area is not null.
2601 // If it is NULL there will be a NULL pointer dereference.
2602 State = checkNonNull(C, State: StateNonZeroSize, Arg: Buffer, l: MemVal);
2603 if (!State)
2604 return;
2605
2606 State = CheckBufferAccess(C, State, Buffer, Size, Access: AccessKind::write);
2607 if (!State)
2608 return;
2609
2610 if (!memsetAux(DstBuffer: Buffer.Expression, CharVal: Zero, Size: Size.Expression, C, State))
2611 return;
2612
2613 C.addTransition(State);
2614}
2615
2616void CStringChecker::evalSprintf(CheckerContext &C,
2617 const CallEvent &Call) const {
2618 CurrentFunctionDescription = "'sprintf'";
2619 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2620}
2621
2622void CStringChecker::evalSnprintf(CheckerContext &C,
2623 const CallEvent &Call) const {
2624 CurrentFunctionDescription = "'snprintf'";
2625 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2626}
2627
2628void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2629 bool IsBounded) const {
2630 ProgramStateRef State = C.getState();
2631 const auto *CE = cast<CallExpr>(Val: Call.getOriginExpr());
2632 DestinationArgExpr Dest = {{.Expression: Call.getArgExpr(Index: 0), .ArgumentIndex: 0}};
2633
2634 const auto NumParams = Call.parameters().size();
2635 if (CE->getNumArgs() < NumParams) {
2636 // This is an invalid call, let's just ignore it.
2637 return;
2638 }
2639
2640 const auto AllArguments =
2641 llvm::make_range(x: CE->getArgs(), y: CE->getArgs() + CE->getNumArgs());
2642 const auto VariadicArguments = drop_begin(RangeOrContainer: enumerate(First: AllArguments), N: NumParams);
2643
2644 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2645 // We consider only string buffers
2646 if (const QualType type = ArgExpr->getType();
2647 !type->isAnyPointerType() ||
2648 !type->getPointeeType()->isAnyCharacterType())
2649 continue;
2650 SourceArgExpr Source = {{.Expression: ArgExpr, .ArgumentIndex: unsigned(ArgIdx)}};
2651
2652 // Ensure the buffers do not overlap.
2653 SizeArgExpr SrcExprAsSizeDummy = {
2654 {.Expression: Source.Expression, .ArgumentIndex: Source.ArgumentIndex}};
2655 State = CheckOverlap(
2656 C, state: State,
2657 Size: (IsBounded ? SizeArgExpr{{.Expression: Call.getArgExpr(Index: 1), .ArgumentIndex: 1}} : SrcExprAsSizeDummy),
2658 First: Dest, Second: Source);
2659 if (!State)
2660 return;
2661 }
2662
2663 C.addTransition(State);
2664}
2665
2666//===----------------------------------------------------------------------===//
2667// The driver method, and other Checker callbacks.
2668//===----------------------------------------------------------------------===//
2669
2670CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2671 CheckerContext &C) const {
2672 const auto *CE = dyn_cast_or_null<CallExpr>(Val: Call.getOriginExpr());
2673 if (!CE)
2674 return nullptr;
2675
2676 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: Call.getDecl());
2677 if (!FD)
2678 return nullptr;
2679
2680 if (StdCopy.matches(Call))
2681 return &CStringChecker::evalStdCopy;
2682 if (StdCopyBackward.matches(Call))
2683 return &CStringChecker::evalStdCopyBackward;
2684
2685 // Pro-actively check that argument types are safe to do arithmetic upon.
2686 // We do not want to crash if someone accidentally passes a structure
2687 // into, say, a C++ overload of any of these functions. We could not check
2688 // that for std::copy because they may have arguments of other types.
2689 for (auto I : CE->arguments()) {
2690 QualType T = I->getType();
2691 if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2692 return nullptr;
2693 }
2694
2695 const FnCheck *Callback = Callbacks.lookup(Call);
2696 if (Callback)
2697 return *Callback;
2698
2699 return nullptr;
2700}
2701
2702bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2703 FnCheck Callback = identifyCall(Call, C);
2704
2705 // If the callee isn't a string function, let another checker handle it.
2706 if (!Callback)
2707 return false;
2708
2709 // Check and evaluate the call.
2710 assert(isa<CallExpr>(Call.getOriginExpr()));
2711 Callback(this, C, Call);
2712
2713 // If the evaluate call resulted in no change, chain to the next eval call
2714 // handler.
2715 // Note, the custom CString evaluation calls assume that basic safety
2716 // properties are held. However, if the user chooses to turn off some of these
2717 // checks, we ignore the issues and leave the call evaluation to a generic
2718 // handler.
2719 return C.isDifferent();
2720}
2721
2722void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2723 // Record string length for char a[] = "abc";
2724 ProgramStateRef state = C.getState();
2725
2726 for (const auto *I : DS->decls()) {
2727 const VarDecl *D = dyn_cast<VarDecl>(Val: I);
2728 if (!D)
2729 continue;
2730
2731 // FIXME: Handle array fields of structs.
2732 if (!D->getType()->isArrayType())
2733 continue;
2734
2735 const Expr *Init = D->getInit();
2736 if (!Init)
2737 continue;
2738 if (!isa<StringLiteral>(Val: Init))
2739 continue;
2740
2741 Loc VarLoc = state->getLValue(VD: D, LC: C.getLocationContext());
2742 const MemRegion *MR = VarLoc.getAsRegion();
2743 if (!MR)
2744 continue;
2745
2746 SVal StrVal = C.getSVal(S: Init);
2747 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2748 DefinedOrUnknownSVal strLength =
2749 getCStringLength(C, state, Ex: Init, Buf: StrVal).castAs<DefinedOrUnknownSVal>();
2750
2751 state = state->set<CStringLength>(K: MR, E: strLength);
2752 }
2753
2754 C.addTransition(State: state);
2755}
2756
2757ProgramStateRef
2758CStringChecker::checkRegionChanges(ProgramStateRef state,
2759 const InvalidatedSymbols *,
2760 ArrayRef<const MemRegion *> ExplicitRegions,
2761 ArrayRef<const MemRegion *> Regions,
2762 const LocationContext *LCtx,
2763 const CallEvent *Call) const {
2764 CStringLengthTy Entries = state->get<CStringLength>();
2765 if (Entries.isEmpty())
2766 return state;
2767
2768 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2769 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2770
2771 // First build sets for the changed regions and their super-regions.
2772 for (const MemRegion *MR : Regions) {
2773 Invalidated.insert(Ptr: MR);
2774
2775 SuperRegions.insert(Ptr: MR);
2776 while (const SubRegion *SR = dyn_cast<SubRegion>(Val: MR)) {
2777 MR = SR->getSuperRegion();
2778 SuperRegions.insert(Ptr: MR);
2779 }
2780 }
2781
2782 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2783
2784 // Then loop over the entries in the current state.
2785 for (const MemRegion *MR : llvm::make_first_range(c&: Entries)) {
2786 // Is this entry for a super-region of a changed region?
2787 if (SuperRegions.count(Ptr: MR)) {
2788 Entries = F.remove(Old: Entries, K: MR);
2789 continue;
2790 }
2791
2792 // Is this entry for a sub-region of a changed region?
2793 const MemRegion *Super = MR;
2794 while (const SubRegion *SR = dyn_cast<SubRegion>(Val: Super)) {
2795 Super = SR->getSuperRegion();
2796 if (Invalidated.count(Ptr: Super)) {
2797 Entries = F.remove(Old: Entries, K: MR);
2798 break;
2799 }
2800 }
2801 }
2802
2803 return state->set<CStringLength>(Entries);
2804}
2805
2806void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2807 SymbolReaper &SR) const {
2808 // Mark all symbols in our string length map as valid.
2809 CStringLengthTy Entries = state->get<CStringLength>();
2810
2811 for (SVal Len : llvm::make_second_range(c&: Entries)) {
2812 for (SymbolRef Sym : Len.symbols())
2813 SR.markInUse(sym: Sym);
2814 }
2815}
2816
2817void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2818 CheckerContext &C) const {
2819 ProgramStateRef state = C.getState();
2820 CStringLengthTy Entries = state->get<CStringLength>();
2821 if (Entries.isEmpty())
2822 return;
2823
2824 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2825 for (auto [Reg, Len] : Entries) {
2826 if (SymbolRef Sym = Len.getAsSymbol()) {
2827 if (SR.isDead(sym: Sym))
2828 Entries = F.remove(Old: Entries, K: Reg);
2829 }
2830 }
2831
2832 state = state->set<CStringLength>(Entries);
2833 C.addTransition(State: state);
2834}
2835
2836void ento::registerCStringModeling(CheckerManager &Mgr) {
2837 Mgr.registerChecker<CStringChecker>();
2838}
2839
2840bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2841 return true;
2842}
2843
2844#define REGISTER_CHECKER(name) \
2845 void ento::register##name(CheckerManager &mgr) { \
2846 CStringChecker *checker = mgr.getChecker<CStringChecker>(); \
2847 checker->Filter.Check##name = true; \
2848 checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \
2849 } \
2850 \
2851 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2852
2853REGISTER_CHECKER(CStringNullArg)
2854REGISTER_CHECKER(CStringOutOfBounds)
2855REGISTER_CHECKER(CStringBufferOverlap)
2856REGISTER_CHECKER(CStringNotNullTerm)
2857REGISTER_CHECKER(CStringUninitializedRead)
2858