1 | //===-- DereferenceChecker.cpp - Null dereference checker -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This defines NullDerefChecker, a builtin check in ExprEngine that performs |
10 | // checks for null pointers at loads and stores. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/AST/ExprObjC.h" |
15 | #include "clang/Basic/TargetInfo.h" |
16 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
17 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
18 | #include "clang/StaticAnalyzer/Core/Checker.h" |
19 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
20 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
21 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" |
22 | #include "llvm/Support/raw_ostream.h" |
23 | |
24 | using namespace clang; |
25 | using namespace ento; |
26 | |
27 | namespace { |
28 | class DereferenceChecker |
29 | : public Checker< check::Location, |
30 | check::Bind, |
31 | EventDispatcher<ImplicitNullDerefEvent> > { |
32 | enum DerefKind { |
33 | NullPointer, |
34 | UndefinedPointerValue, |
35 | AddressOfLabel, |
36 | FixedAddress, |
37 | }; |
38 | |
39 | void reportBug(DerefKind K, ProgramStateRef State, const Stmt *S, |
40 | CheckerContext &C) const; |
41 | |
42 | bool suppressReport(CheckerContext &C, const Expr *E) const; |
43 | |
44 | public: |
45 | void checkLocation(SVal location, bool isLoad, const Stmt* S, |
46 | CheckerContext &C) const; |
47 | void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; |
48 | |
49 | static void AddDerefSource(raw_ostream &os, |
50 | SmallVectorImpl<SourceRange> &Ranges, |
51 | const Expr *Ex, const ProgramState *state, |
52 | const LocationContext *LCtx, |
53 | bool loadedFrom = false); |
54 | |
55 | bool CheckNullDereference = false; |
56 | bool CheckFixedDereference = false; |
57 | |
58 | std::unique_ptr<BugType> BT_Null; |
59 | std::unique_ptr<BugType> BT_Undef; |
60 | std::unique_ptr<BugType> BT_Label; |
61 | std::unique_ptr<BugType> BT_FixedAddress; |
62 | }; |
63 | } // end anonymous namespace |
64 | |
65 | void |
66 | DereferenceChecker::AddDerefSource(raw_ostream &os, |
67 | SmallVectorImpl<SourceRange> &Ranges, |
68 | const Expr *Ex, |
69 | const ProgramState *state, |
70 | const LocationContext *LCtx, |
71 | bool loadedFrom) { |
72 | Ex = Ex->IgnoreParenLValueCasts(); |
73 | switch (Ex->getStmtClass()) { |
74 | default: |
75 | break; |
76 | case Stmt::DeclRefExprClass: { |
77 | const DeclRefExpr *DR = cast<DeclRefExpr>(Val: Ex); |
78 | if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DR->getDecl())) { |
79 | os << " (" << (loadedFrom ? "loaded from" : "from" ) |
80 | << " variable '" << VD->getName() << "')" ; |
81 | Ranges.push_back(Elt: DR->getSourceRange()); |
82 | } |
83 | break; |
84 | } |
85 | case Stmt::MemberExprClass: { |
86 | const MemberExpr *ME = cast<MemberExpr>(Val: Ex); |
87 | os << " (" << (loadedFrom ? "loaded from" : "via" ) |
88 | << " field '" << ME->getMemberNameInfo() << "')" ; |
89 | SourceLocation L = ME->getMemberLoc(); |
90 | Ranges.push_back(Elt: SourceRange(L, L)); |
91 | break; |
92 | } |
93 | case Stmt::ObjCIvarRefExprClass: { |
94 | const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: Ex); |
95 | os << " (" << (loadedFrom ? "loaded from" : "via" ) |
96 | << " ivar '" << IV->getDecl()->getName() << "')" ; |
97 | SourceLocation L = IV->getLocation(); |
98 | Ranges.push_back(Elt: SourceRange(L, L)); |
99 | break; |
100 | } |
101 | } |
102 | } |
103 | |
104 | static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){ |
105 | const Expr *E = nullptr; |
106 | |
107 | // Walk through lvalue casts to get the original expression |
108 | // that syntactically caused the load. |
109 | if (const Expr *expr = dyn_cast<Expr>(Val: S)) |
110 | E = expr->IgnoreParenLValueCasts(); |
111 | |
112 | if (IsBind) { |
113 | const VarDecl *VD; |
114 | const Expr *Init; |
115 | std::tie(args&: VD, args&: Init) = parseAssignment(S); |
116 | if (VD && Init) |
117 | E = Init; |
118 | } |
119 | return E; |
120 | } |
121 | |
122 | bool DereferenceChecker::suppressReport(CheckerContext &C, |
123 | const Expr *E) const { |
124 | // Do not report dereferences on memory that use address space #256, #257, |
125 | // and #258. Those address spaces are used when dereferencing address spaces |
126 | // relative to the GS, FS, and SS segments on x86/x86-64 targets. |
127 | // Dereferencing a null pointer in these address spaces is not defined |
128 | // as an error. All other null dereferences in other address spaces |
129 | // are defined as an error unless explicitly defined. |
130 | // See https://clang.llvm.org/docs/LanguageExtensions.html, the section |
131 | // "X86/X86-64 Language Extensions" |
132 | |
133 | QualType Ty = E->getType(); |
134 | if (!Ty.hasAddressSpace()) |
135 | return false; |
136 | if (C.getAnalysisManager() |
137 | .getAnalyzerOptions() |
138 | .ShouldSuppressAddressSpaceDereferences) |
139 | return true; |
140 | |
141 | const llvm::Triple::ArchType Arch = |
142 | C.getASTContext().getTargetInfo().getTriple().getArch(); |
143 | |
144 | if ((Arch == llvm::Triple::x86) || (Arch == llvm::Triple::x86_64)) { |
145 | switch (toTargetAddressSpace(AS: E->getType().getAddressSpace())) { |
146 | case 256: |
147 | case 257: |
148 | case 258: |
149 | return true; |
150 | } |
151 | } |
152 | return false; |
153 | } |
154 | |
155 | static bool isDeclRefExprToReference(const Expr *E) { |
156 | if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: E)) |
157 | return DRE->getDecl()->getType()->isReferenceType(); |
158 | return false; |
159 | } |
160 | |
161 | void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State, |
162 | const Stmt *S, CheckerContext &C) const { |
163 | const BugType *BT = nullptr; |
164 | llvm::StringRef DerefStr1; |
165 | llvm::StringRef DerefStr2; |
166 | switch (K) { |
167 | case DerefKind::NullPointer: |
168 | if (!CheckNullDereference) { |
169 | C.addSink(); |
170 | return; |
171 | } |
172 | BT = BT_Null.get(); |
173 | DerefStr1 = " results in a null pointer dereference" ; |
174 | DerefStr2 = " results in a dereference of a null pointer" ; |
175 | break; |
176 | case DerefKind::UndefinedPointerValue: |
177 | if (!CheckNullDereference) { |
178 | C.addSink(); |
179 | return; |
180 | } |
181 | BT = BT_Undef.get(); |
182 | DerefStr1 = " results in an undefined pointer dereference" ; |
183 | DerefStr2 = " results in a dereference of an undefined pointer value" ; |
184 | break; |
185 | case DerefKind::AddressOfLabel: |
186 | if (!CheckNullDereference) { |
187 | C.addSink(); |
188 | return; |
189 | } |
190 | BT = BT_Label.get(); |
191 | DerefStr1 = " results in an undefined pointer dereference" ; |
192 | DerefStr2 = " results in a dereference of an address of a label" ; |
193 | break; |
194 | case DerefKind::FixedAddress: |
195 | // Deliberately don't add a sink node if check is disabled. |
196 | // This situation may be valid in special cases. |
197 | if (!CheckFixedDereference) |
198 | return; |
199 | |
200 | BT = BT_FixedAddress.get(); |
201 | DerefStr1 = " results in a dereference of a fixed address" ; |
202 | DerefStr2 = " results in a dereference of a fixed address" ; |
203 | break; |
204 | }; |
205 | |
206 | // Generate an error node. |
207 | ExplodedNode *N = C.generateErrorNode(State); |
208 | if (!N) |
209 | return; |
210 | |
211 | SmallString<100> buf; |
212 | llvm::raw_svector_ostream os(buf); |
213 | |
214 | SmallVector<SourceRange, 2> Ranges; |
215 | |
216 | switch (S->getStmtClass()) { |
217 | case Stmt::ArraySubscriptExprClass: { |
218 | os << "Array access" ; |
219 | const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(Val: S); |
220 | AddDerefSource(os, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), |
221 | state: State.get(), LCtx: N->getLocationContext()); |
222 | os << DerefStr1; |
223 | break; |
224 | } |
225 | case Stmt::ArraySectionExprClass: { |
226 | os << "Array access" ; |
227 | const ArraySectionExpr *AE = cast<ArraySectionExpr>(Val: S); |
228 | AddDerefSource(os, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), |
229 | state: State.get(), LCtx: N->getLocationContext()); |
230 | os << DerefStr1; |
231 | break; |
232 | } |
233 | case Stmt::UnaryOperatorClass: { |
234 | os << BT->getDescription(); |
235 | const UnaryOperator *U = cast<UnaryOperator>(Val: S); |
236 | AddDerefSource(os, Ranges, Ex: U->getSubExpr()->IgnoreParens(), |
237 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
238 | break; |
239 | } |
240 | case Stmt::MemberExprClass: { |
241 | const MemberExpr *M = cast<MemberExpr>(Val: S); |
242 | if (M->isArrow() || isDeclRefExprToReference(E: M->getBase())) { |
243 | os << "Access to field '" << M->getMemberNameInfo() << "'" << DerefStr2; |
244 | AddDerefSource(os, Ranges, Ex: M->getBase()->IgnoreParenCasts(), |
245 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
246 | } |
247 | break; |
248 | } |
249 | case Stmt::ObjCIvarRefExprClass: { |
250 | const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: S); |
251 | os << "Access to instance variable '" << *IV->getDecl() << "'" << DerefStr2; |
252 | AddDerefSource(os, Ranges, Ex: IV->getBase()->IgnoreParenCasts(), |
253 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
254 | break; |
255 | } |
256 | default: |
257 | break; |
258 | } |
259 | |
260 | auto report = std::make_unique<PathSensitiveBugReport>( |
261 | args: *BT, args: buf.empty() ? BT->getDescription() : buf.str(), args&: N); |
262 | |
263 | bugreporter::trackExpressionValue(N, E: bugreporter::getDerefExpr(S), R&: *report); |
264 | |
265 | for (SmallVectorImpl<SourceRange>::iterator |
266 | I = Ranges.begin(), E = Ranges.end(); I!=E; ++I) |
267 | report->addRange(R: *I); |
268 | |
269 | C.emitReport(R: std::move(report)); |
270 | } |
271 | |
272 | void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, |
273 | CheckerContext &C) const { |
274 | // Check for dereference of an undefined value. |
275 | if (l.isUndef()) { |
276 | const Expr *DerefExpr = getDereferenceExpr(S); |
277 | if (!suppressReport(C, E: DerefExpr)) |
278 | reportBug(K: DerefKind::UndefinedPointerValue, State: C.getState(), S: DerefExpr, C); |
279 | return; |
280 | } |
281 | |
282 | DefinedOrUnknownSVal location = l.castAs<DefinedOrUnknownSVal>(); |
283 | |
284 | // Check for null dereferences. |
285 | if (!isa<Loc>(Val: location)) |
286 | return; |
287 | |
288 | ProgramStateRef state = C.getState(); |
289 | |
290 | ProgramStateRef notNullState, nullState; |
291 | std::tie(args&: notNullState, args&: nullState) = state->assume(Cond: location); |
292 | |
293 | if (nullState) { |
294 | if (!notNullState) { |
295 | // We know that 'location' can only be null. This is what |
296 | // we call an "explicit" null dereference. |
297 | const Expr *expr = getDereferenceExpr(S); |
298 | if (!suppressReport(C, E: expr)) { |
299 | reportBug(K: DerefKind::NullPointer, State: nullState, S: expr, C); |
300 | return; |
301 | } |
302 | } |
303 | |
304 | // Otherwise, we have the case where the location could either be |
305 | // null or not-null. Record the error node as an "implicit" null |
306 | // dereference. |
307 | if (ExplodedNode *N = C.generateSink(State: nullState, Pred: C.getPredecessor())) { |
308 | ImplicitNullDerefEvent event = {.Location: l, .IsLoad: isLoad, .SinkNode: N, .BR: &C.getBugReporter(), |
309 | /*IsDirectDereference=*/true}; |
310 | dispatchEvent(event); |
311 | } |
312 | } |
313 | |
314 | if (location.isConstant()) { |
315 | const Expr *DerefExpr = getDereferenceExpr(S, IsBind: isLoad); |
316 | if (!suppressReport(C, E: DerefExpr)) |
317 | reportBug(K: DerefKind::FixedAddress, State: notNullState, S: DerefExpr, C); |
318 | return; |
319 | } |
320 | |
321 | // From this point forward, we know that the location is not null. |
322 | C.addTransition(State: notNullState); |
323 | } |
324 | |
325 | void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S, |
326 | CheckerContext &C) const { |
327 | // If we're binding to a reference, check if the value is known to be null. |
328 | if (V.isUndef()) |
329 | return; |
330 | |
331 | // One should never write to label addresses. |
332 | if (auto Label = L.getAs<loc::GotoLabel>()) { |
333 | reportBug(K: DerefKind::AddressOfLabel, State: C.getState(), S, C); |
334 | return; |
335 | } |
336 | |
337 | const MemRegion *MR = L.getAsRegion(); |
338 | const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(Val: MR); |
339 | if (!TVR) |
340 | return; |
341 | |
342 | if (!TVR->getValueType()->isReferenceType()) |
343 | return; |
344 | |
345 | ProgramStateRef State = C.getState(); |
346 | |
347 | ProgramStateRef StNonNull, StNull; |
348 | std::tie(args&: StNonNull, args&: StNull) = State->assume(Cond: V.castAs<DefinedOrUnknownSVal>()); |
349 | |
350 | if (StNull) { |
351 | if (!StNonNull) { |
352 | const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true); |
353 | if (!suppressReport(C, E: expr)) { |
354 | reportBug(K: DerefKind::NullPointer, State: StNull, S: expr, C); |
355 | return; |
356 | } |
357 | } |
358 | |
359 | // At this point the value could be either null or non-null. |
360 | // Record this as an "implicit" null dereference. |
361 | if (ExplodedNode *N = C.generateSink(State: StNull, Pred: C.getPredecessor())) { |
362 | ImplicitNullDerefEvent event = {.Location: V, /*isLoad=*/.IsLoad: true, .SinkNode: N, |
363 | .BR: &C.getBugReporter(), |
364 | /*IsDirectDereference=*/true}; |
365 | dispatchEvent(event); |
366 | } |
367 | } |
368 | |
369 | if (V.isConstant()) { |
370 | const Expr *DerefExpr = getDereferenceExpr(S, IsBind: true); |
371 | if (!suppressReport(C, E: DerefExpr)) |
372 | reportBug(K: DerefKind::FixedAddress, State, S: DerefExpr, C); |
373 | return; |
374 | } |
375 | |
376 | // Unlike a regular null dereference, initializing a reference with a |
377 | // dereferenced null pointer does not actually cause a runtime exception in |
378 | // Clang's implementation of references. |
379 | // |
380 | // int &r = *p; // safe?? |
381 | // if (p != NULL) return; // uh-oh |
382 | // r = 5; // trap here |
383 | // |
384 | // The standard says this is invalid as soon as we try to create a "null |
385 | // reference" (there is no such thing), but turning this into an assumption |
386 | // that 'p' is never null will not match our actual runtime behavior. |
387 | // So we do not record this assumption, allowing us to warn on the last line |
388 | // of this example. |
389 | // |
390 | // We do need to add a transition because we may have generated a sink for |
391 | // the "implicit" null dereference. |
392 | C.addTransition(State, Tag: this); |
393 | } |
394 | |
395 | void ento::registerDereferenceModeling(CheckerManager &Mgr) { |
396 | Mgr.registerChecker<DereferenceChecker>(); |
397 | } |
398 | |
399 | bool ento::shouldRegisterDereferenceModeling(const CheckerManager &) { |
400 | return true; |
401 | } |
402 | |
403 | void ento::registerNullDereferenceChecker(CheckerManager &Mgr) { |
404 | auto *Chk = Mgr.getChecker<DereferenceChecker>(); |
405 | Chk->CheckNullDereference = true; |
406 | Chk->BT_Null.reset(p: new BugType(Mgr.getCurrentCheckerName(), |
407 | "Dereference of null pointer" , |
408 | categories::LogicError)); |
409 | Chk->BT_Undef.reset(p: new BugType(Mgr.getCurrentCheckerName(), |
410 | "Dereference of undefined pointer value" , |
411 | categories::LogicError)); |
412 | Chk->BT_Label.reset(p: new BugType(Mgr.getCurrentCheckerName(), |
413 | "Dereference of the address of a label" , |
414 | categories::LogicError)); |
415 | } |
416 | |
417 | bool ento::shouldRegisterNullDereferenceChecker(const CheckerManager &) { |
418 | return true; |
419 | } |
420 | |
421 | void ento::registerFixedAddressDereferenceChecker(CheckerManager &Mgr) { |
422 | auto *Chk = Mgr.getChecker<DereferenceChecker>(); |
423 | Chk->CheckFixedDereference = true; |
424 | Chk->BT_FixedAddress.reset(p: new BugType(Mgr.getCurrentCheckerName(), |
425 | "Dereference of a fixed address" , |
426 | categories::LogicError)); |
427 | } |
428 | |
429 | bool ento::shouldRegisterFixedAddressDereferenceChecker( |
430 | const CheckerManager &) { |
431 | return true; |
432 | } |
433 | |