1//===-- DereferenceChecker.cpp - Null dereference checker -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines NullDerefChecker, a builtin check in ExprEngine that performs
10// checks for null pointers at loads and stores.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/AST/ExprObjC.h"
15#include "clang/Basic/TargetInfo.h"
16#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
22#include "llvm/Support/FormatVariadic.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace clang;
26using namespace ento;
27
28namespace {
29
30class DerefBugType : public BugType {
31 StringRef ArrayMsg, FieldMsg;
32
33public:
34 DerefBugType(CheckerFrontend *FE, StringRef Desc, const char *AMsg,
35 const char *FMsg = nullptr)
36 : BugType(FE, Desc), ArrayMsg(AMsg), FieldMsg(FMsg ? FMsg : AMsg) {}
37 StringRef getArrayMsg() const { return ArrayMsg; }
38 StringRef getFieldMsg() const { return FieldMsg; }
39};
40
41class DereferenceChecker
42 : public CheckerFamily<check::Location, check::Bind,
43 check::PreStmt<BinaryOperator>,
44 EventDispatcher<ImplicitNullDerefEvent>> {
45 void reportDerefBug(const DerefBugType &BT, ProgramStateRef State,
46 const Stmt *S, CheckerContext &C) const;
47
48 bool suppressReport(CheckerContext &C, const Expr *E) const;
49
50public:
51 void checkLocation(SVal location, bool isLoad, const Stmt* S,
52 CheckerContext &C) const;
53 void checkBind(SVal L, SVal V, const Stmt *S, bool AtDeclInit,
54 CheckerContext &C) const;
55 void checkPreStmt(const BinaryOperator *Op, CheckerContext &C) const;
56
57 static void AddDerefSource(raw_ostream &os,
58 SmallVectorImpl<SourceRange> &Ranges,
59 const Expr *Ex, const ProgramState *state,
60 const LocationContext *LCtx,
61 bool loadedFrom = false);
62
63 CheckerFrontend NullDerefChecker, FixedDerefChecker, NullPointerArithmChecker;
64 const DerefBugType NullBug{&NullDerefChecker, "Dereference of null pointer",
65 "a null pointer dereference",
66 "a dereference of a null pointer"};
67 const DerefBugType UndefBug{&NullDerefChecker,
68 "Dereference of undefined pointer value",
69 "an undefined pointer dereference",
70 "a dereference of an undefined pointer value"};
71 const DerefBugType LabelBug{&NullDerefChecker,
72 "Dereference of the address of a label",
73 "an undefined pointer dereference",
74 "a dereference of an address of a label"};
75 const DerefBugType FixedAddressBug{&FixedDerefChecker,
76 "Dereference of a fixed address",
77 "a dereference of a fixed address"};
78 const BugType NullPointerArithmBug{
79 &NullPointerArithmChecker,
80 "Possibly undefined arithmetic operation involving a null pointer"};
81
82 StringRef getDebugTag() const override { return "DereferenceChecker"; }
83};
84
85struct ValueDescStr {
86 SmallVectorImpl<SourceRange> &Ranges;
87 const Expr *Ex;
88 const ProgramState *State;
89 const LocationContext *LCtx;
90 bool IsPointer;
91 ConditionTruthVal IsNull;
92};
93
94} // end anonymous namespace
95
96void
97DereferenceChecker::AddDerefSource(raw_ostream &os,
98 SmallVectorImpl<SourceRange> &Ranges,
99 const Expr *Ex,
100 const ProgramState *state,
101 const LocationContext *LCtx,
102 bool loadedFrom) {
103 Ex = Ex->IgnoreParenLValueCasts();
104 switch (Ex->getStmtClass()) {
105 default:
106 break;
107 case Stmt::DeclRefExprClass: {
108 const DeclRefExpr *DR = cast<DeclRefExpr>(Val: Ex);
109 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DR->getDecl())) {
110 os << " (" << (loadedFrom ? "loaded from" : "from")
111 << " variable '" << VD->getName() << "')";
112 Ranges.push_back(Elt: DR->getSourceRange());
113 }
114 break;
115 }
116 case Stmt::MemberExprClass: {
117 const MemberExpr *ME = cast<MemberExpr>(Val: Ex);
118 os << " (" << (loadedFrom ? "loaded from" : "via")
119 << " field '" << ME->getMemberNameInfo() << "')";
120 SourceLocation L = ME->getMemberLoc();
121 Ranges.push_back(Elt: SourceRange(L, L));
122 break;
123 }
124 case Stmt::ObjCIvarRefExprClass: {
125 const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: Ex);
126 os << " (" << (loadedFrom ? "loaded from" : "via")
127 << " ivar '" << IV->getDecl()->getName() << "')";
128 SourceLocation L = IV->getLocation();
129 Ranges.push_back(Elt: SourceRange(L, L));
130 break;
131 }
132 }
133}
134
135static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){
136 const Expr *E = nullptr;
137
138 // Walk through lvalue casts to get the original expression
139 // that syntactically caused the load.
140 if (const Expr *expr = dyn_cast<Expr>(Val: S))
141 E = expr->IgnoreParenLValueCasts();
142
143 if (IsBind) {
144 const VarDecl *VD;
145 const Expr *Init;
146 std::tie(args&: VD, args&: Init) = parseAssignment(S);
147 if (VD && Init)
148 E = Init;
149 }
150 return E;
151}
152
153bool DereferenceChecker::suppressReport(CheckerContext &C,
154 const Expr *E) const {
155 // Do not report dereferences on memory that use address space #256, #257,
156 // and #258. Those address spaces are used when dereferencing address spaces
157 // relative to the GS, FS, and SS segments on x86/x86-64 targets.
158 // Dereferencing a null pointer in these address spaces is not defined
159 // as an error. All other null dereferences in other address spaces
160 // are defined as an error unless explicitly defined.
161 // See https://clang.llvm.org/docs/LanguageExtensions.html, the section
162 // "X86/X86-64 Language Extensions"
163
164 QualType Ty = E->getType();
165 if (!Ty.hasAddressSpace())
166 return false;
167 if (C.getAnalysisManager()
168 .getAnalyzerOptions()
169 .ShouldSuppressAddressSpaceDereferences)
170 return true;
171
172 const llvm::Triple::ArchType Arch =
173 C.getASTContext().getTargetInfo().getTriple().getArch();
174
175 if ((Arch == llvm::Triple::x86) || (Arch == llvm::Triple::x86_64)) {
176 switch (toTargetAddressSpace(AS: E->getType().getAddressSpace())) {
177 case 256:
178 case 257:
179 case 258:
180 return true;
181 }
182 }
183 return false;
184}
185
186static bool isDeclRefExprToReference(const Expr *E) {
187 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: E))
188 return DRE->getDecl()->getType()->isReferenceType();
189 return false;
190}
191
192void DereferenceChecker::reportDerefBug(const DerefBugType &BT,
193 ProgramStateRef State, const Stmt *S,
194 CheckerContext &C) const {
195 if (&BT == &FixedAddressBug) {
196 if (!FixedDerefChecker.isEnabled())
197 // Deliberately don't add a sink node if check is disabled.
198 // This situation may be valid in special cases.
199 return;
200 } else {
201 if (!NullDerefChecker.isEnabled()) {
202 C.addSink();
203 return;
204 }
205 }
206
207 // Generate an error node.
208 ExplodedNode *N = C.generateErrorNode(State);
209 if (!N)
210 return;
211
212 SmallString<100> Buf;
213 llvm::raw_svector_ostream Out(Buf);
214
215 SmallVector<SourceRange, 2> Ranges;
216
217 switch (S->getStmtClass()) {
218 case Stmt::ArraySubscriptExprClass: {
219 Out << "Array access";
220 const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(Val: S);
221 AddDerefSource(os&: Out, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), state: State.get(),
222 LCtx: N->getLocationContext());
223 Out << " results in " << BT.getArrayMsg();
224 break;
225 }
226 case Stmt::ArraySectionExprClass: {
227 Out << "Array access";
228 const ArraySectionExpr *AE = cast<ArraySectionExpr>(Val: S);
229 AddDerefSource(os&: Out, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), state: State.get(),
230 LCtx: N->getLocationContext());
231 Out << " results in " << BT.getArrayMsg();
232 break;
233 }
234 case Stmt::UnaryOperatorClass: {
235 Out << BT.getDescription();
236 const UnaryOperator *U = cast<UnaryOperator>(Val: S);
237 AddDerefSource(os&: Out, Ranges, Ex: U->getSubExpr()->IgnoreParens(), state: State.get(),
238 LCtx: N->getLocationContext(), loadedFrom: true);
239 break;
240 }
241 case Stmt::MemberExprClass: {
242 const MemberExpr *M = cast<MemberExpr>(Val: S);
243 if (M->isArrow() || isDeclRefExprToReference(E: M->getBase())) {
244 Out << "Access to field '" << M->getMemberNameInfo() << "' results in "
245 << BT.getFieldMsg();
246 AddDerefSource(os&: Out, Ranges, Ex: M->getBase()->IgnoreParenCasts(), state: State.get(),
247 LCtx: N->getLocationContext(), loadedFrom: true);
248 }
249 break;
250 }
251 case Stmt::ObjCIvarRefExprClass: {
252 const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: S);
253 Out << "Access to instance variable '" << *IV->getDecl() << "' results in "
254 << BT.getFieldMsg();
255 AddDerefSource(os&: Out, Ranges, Ex: IV->getBase()->IgnoreParenCasts(), state: State.get(),
256 LCtx: N->getLocationContext(), loadedFrom: true);
257 break;
258 }
259 default:
260 break;
261 }
262
263 auto BR = std::make_unique<PathSensitiveBugReport>(
264 args: BT, args: Buf.empty() ? BT.getDescription() : Buf.str(), args&: N);
265
266 bugreporter::trackExpressionValue(N, E: bugreporter::getDerefExpr(S), R&: *BR);
267
268 for (const auto &R : Ranges)
269 BR->addRange(R);
270
271 C.emitReport(R: std::move(BR));
272}
273
274void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
275 CheckerContext &C) const {
276 // Check for dereference of an undefined value.
277 if (l.isUndef()) {
278 const Expr *DerefExpr = getDereferenceExpr(S);
279 if (!suppressReport(C, E: DerefExpr))
280 reportDerefBug(BT: UndefBug, State: C.getState(), S: DerefExpr, C);
281 return;
282 }
283
284 DefinedOrUnknownSVal location = l.castAs<DefinedOrUnknownSVal>();
285
286 // Check for null dereferences.
287 if (!isa<Loc>(Val: location))
288 return;
289
290 ProgramStateRef state = C.getState();
291
292 ProgramStateRef notNullState, nullState;
293 std::tie(args&: notNullState, args&: nullState) = state->assume(Cond: location);
294
295 if (nullState) {
296 if (!notNullState) {
297 // We know that 'location' can only be null. This is what
298 // we call an "explicit" null dereference.
299 const Expr *expr = getDereferenceExpr(S);
300 if (!suppressReport(C, E: expr)) {
301 reportDerefBug(BT: NullBug, State: nullState, S: expr, C);
302 return;
303 }
304 }
305
306 // Otherwise, we have the case where the location could either be
307 // null or not-null. Record the error node as an "implicit" null
308 // dereference.
309 if (ExplodedNode *N = C.generateSink(State: nullState, Pred: C.getPredecessor())) {
310 ImplicitNullDerefEvent event = {.Location: l, .IsLoad: isLoad, .SinkNode: N, .BR: &C.getBugReporter(),
311 /*IsDirectDereference=*/true};
312 dispatchEvent(event);
313 }
314 }
315
316 if (location.isConstant()) {
317 const Expr *DerefExpr = getDereferenceExpr(S, IsBind: isLoad);
318 if (!DerefExpr->getType().isVolatileQualified() &&
319 !suppressReport(C, E: DerefExpr))
320 reportDerefBug(BT: FixedAddressBug, State: notNullState, S: DerefExpr, C);
321 return;
322 }
323
324 // From this point forward, we know that the location is not null.
325 C.addTransition(State: notNullState);
326}
327
328void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
329 bool AtDeclInit, CheckerContext &C) const {
330 // If we're binding to a reference, check if the value is known to be null.
331 if (V.isUndef())
332 return;
333
334 // One should never write to label addresses.
335 if (auto Label = L.getAs<loc::GotoLabel>()) {
336 reportDerefBug(BT: LabelBug, State: C.getState(), S, C);
337 return;
338 }
339
340 const MemRegion *MR = L.getAsRegion();
341 const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(Val: MR);
342 if (!TVR)
343 return;
344
345 if (!TVR->getValueType()->isReferenceType())
346 return;
347
348 ProgramStateRef State = C.getState();
349
350 ProgramStateRef StNonNull, StNull;
351 std::tie(args&: StNonNull, args&: StNull) = State->assume(Cond: V.castAs<DefinedOrUnknownSVal>());
352
353 if (StNull) {
354 if (!StNonNull) {
355 const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true);
356 if (!suppressReport(C, E: expr)) {
357 reportDerefBug(BT: NullBug, State: StNull, S: expr, C);
358 return;
359 }
360 }
361
362 // At this point the value could be either null or non-null.
363 // Record this as an "implicit" null dereference.
364 if (ExplodedNode *N = C.generateSink(State: StNull, Pred: C.getPredecessor())) {
365 ImplicitNullDerefEvent event = {.Location: V, /*isLoad=*/.IsLoad: true, .SinkNode: N,
366 .BR: &C.getBugReporter(),
367 /*IsDirectDereference=*/true};
368 dispatchEvent(event);
369 }
370 }
371
372 if (V.isConstant()) {
373 const Expr *DerefExpr = getDereferenceExpr(S, IsBind: true);
374 if (!suppressReport(C, E: DerefExpr))
375 reportDerefBug(BT: FixedAddressBug, State, S: DerefExpr, C);
376 return;
377 }
378
379 // Unlike a regular null dereference, initializing a reference with a
380 // dereferenced null pointer does not actually cause a runtime exception in
381 // Clang's implementation of references.
382 //
383 // int &r = *p; // safe??
384 // if (p != NULL) return; // uh-oh
385 // r = 5; // trap here
386 //
387 // The standard says this is invalid as soon as we try to create a "null
388 // reference" (there is no such thing), but turning this into an assumption
389 // that 'p' is never null will not match our actual runtime behavior.
390 // So we do not record this assumption, allowing us to warn on the last line
391 // of this example.
392 //
393 // We do need to add a transition because we may have generated a sink for
394 // the "implicit" null dereference.
395 C.addTransition(State, Tag: this);
396}
397
398namespace llvm {
399template <> struct format_provider<ValueDescStr> {
400 static void format(const ValueDescStr &V, raw_ostream &Stream,
401 StringRef Style) {
402 static const char *ValueStr[2][3] = {
403 {"zero", "nonzero integer value", "probably nonzero integer value"},
404 {"null pointer", "non-null pointer", "probably non-null pointer"},
405 };
406 Stream
407 << ValueStr[V.IsPointer][V.IsNull.isConstrainedTrue()
408 ? 0
409 : (V.IsNull.isConstrainedFalse() ? 1 : 2)];
410 DereferenceChecker::AddDerefSource(os&: Stream, Ranges&: V.Ranges, Ex: V.Ex, state: V.State, LCtx: V.LCtx,
411 loadedFrom: false);
412 }
413};
414} // namespace llvm
415
416void DereferenceChecker::checkPreStmt(const BinaryOperator *Op,
417 CheckerContext &C) const {
418 if (!Op->isAdditiveOp() || !NullPointerArithmChecker.isEnabled())
419 return;
420 const Expr *E1 = Op->getLHS();
421 const Expr *E2 = Op->getRHS();
422 QualType T1 = E1->getType().getCanonicalType();
423 QualType T2 = E2->getType().getCanonicalType();
424 bool T1IsPointer = T1->isPointerType();
425 bool T2IsPointer = T2->isPointerType();
426 if (T1->isIntegerType() && T2->isIntegerType())
427 return;
428 if (!T1IsPointer && !T1->isIntegerType() && !T2IsPointer &&
429 !T2->isIntegerType())
430 return;
431
432 ProgramStateRef State = C.getState();
433 ConditionTruthVal V1IsNull = State->isNull(V: C.getSVal(S: E1));
434 ConditionTruthVal V2IsNull = State->isNull(V: C.getSVal(S: E2));
435 bool IsConstrained = true;
436
437 // Check cases 'NULL + x' and 'NULL - x'
438 if (T1IsPointer && !T2IsPointer) {
439 if (!V1IsNull.isConstrainedTrue() || V2IsNull.isConstrainedTrue())
440 return;
441 IsConstrained = V2IsNull.isConstrainedFalse();
442 }
443
444 // Check case 'x + NULL'
445 if (!T1IsPointer && T2IsPointer) {
446 if (V1IsNull.isConstrainedTrue() || !V2IsNull.isConstrainedTrue())
447 return;
448 IsConstrained = V1IsNull.isConstrainedFalse();
449 }
450
451 // Check case 'NULL - p' or 'p - NULL'
452 if (T1IsPointer && T2IsPointer) {
453 if (!V1IsNull.isConstrainedTrue() && !V2IsNull.isConstrainedTrue())
454 return;
455 if (V1IsNull.isConstrainedTrue() && V2IsNull.isConstrainedTrue())
456 return;
457 IsConstrained =
458 V1IsNull.isConstrainedFalse() || V2IsNull.isConstrainedFalse();
459 }
460
461 SmallVector<SourceRange, 2> Ranges;
462 const char *OpcodeStr =
463 Op->getOpcode() == BO_Add ? "Addition" : "Subtraction";
464 const char *ResultStr = IsConstrained ? "results" : "may result";
465 ValueDescStr DerefArg1{
466 .Ranges: Ranges, .Ex: E1, .State: State.get(), .LCtx: C.getLocationContext(), .IsPointer: T1IsPointer, .IsNull: V1IsNull};
467 ValueDescStr DerefArg2{
468 .Ranges: Ranges, .Ex: E2, .State: State.get(), .LCtx: C.getLocationContext(), .IsPointer: T2IsPointer, .IsNull: V2IsNull};
469 std::string Msg =
470 llvm::formatv(Fmt: "{0} of a {1} and a {2} {3} in undefined behavior",
471 Vals&: OpcodeStr, Vals&: DerefArg1, Vals&: DerefArg2, Vals&: ResultStr);
472
473 ExplodedNode *N = C.generateErrorNode(State);
474 if (!N)
475 return;
476 auto BR =
477 std::make_unique<PathSensitiveBugReport>(args: NullPointerArithmBug, args&: Msg, args&: N);
478 if (V1IsNull.isConstrainedTrue())
479 bugreporter::trackExpressionValue(N, E: E1, R&: *BR);
480 if (V2IsNull.isConstrainedTrue())
481 bugreporter::trackExpressionValue(N, E: E2, R&: *BR);
482 for (const auto &R : Ranges)
483 BR->addRange(R);
484
485 C.emitReport(R: std::move(BR));
486}
487
488void ento::registerNullDereferenceChecker(CheckerManager &Mgr) {
489 Mgr.getChecker<DereferenceChecker>()->NullDerefChecker.enable(Mgr);
490}
491
492bool ento::shouldRegisterNullDereferenceChecker(const CheckerManager &) {
493 return true;
494}
495
496void ento::registerFixedAddressDereferenceChecker(CheckerManager &Mgr) {
497 Mgr.getChecker<DereferenceChecker>()->FixedDerefChecker.enable(Mgr);
498}
499
500bool ento::shouldRegisterFixedAddressDereferenceChecker(
501 const CheckerManager &) {
502 return true;
503}
504
505void ento::registerNullPointerArithmChecker(CheckerManager &Mgr) {
506 Mgr.getChecker<DereferenceChecker>()->NullPointerArithmChecker.enable(Mgr);
507}
508
509bool ento::shouldRegisterNullPointerArithmChecker(const CheckerManager &) {
510 return true;
511}
512