1 | //===-- DereferenceChecker.cpp - Null dereference checker -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This defines NullDerefChecker, a builtin check in ExprEngine that performs |
10 | // checks for null pointers at loads and stores. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/AST/ExprObjC.h" |
15 | #include "clang/AST/ExprOpenMP.h" |
16 | #include "clang/Basic/TargetInfo.h" |
17 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
18 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
19 | #include "clang/StaticAnalyzer/Core/Checker.h" |
20 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
21 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
22 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" |
23 | #include "llvm/ADT/SmallString.h" |
24 | #include "llvm/Support/raw_ostream.h" |
25 | |
26 | using namespace clang; |
27 | using namespace ento; |
28 | |
29 | namespace { |
30 | class DereferenceChecker |
31 | : public Checker< check::Location, |
32 | check::Bind, |
33 | EventDispatcher<ImplicitNullDerefEvent> > { |
34 | enum DerefKind { NullPointer, UndefinedPointerValue, AddressOfLabel }; |
35 | |
36 | BugType BT_Null{this, "Dereference of null pointer" , categories::LogicError}; |
37 | BugType BT_Undef{this, "Dereference of undefined pointer value" , |
38 | categories::LogicError}; |
39 | BugType BT_Label{this, "Dereference of the address of a label" , |
40 | categories::LogicError}; |
41 | |
42 | void reportBug(DerefKind K, ProgramStateRef State, const Stmt *S, |
43 | CheckerContext &C) const; |
44 | |
45 | bool suppressReport(CheckerContext &C, const Expr *E) const; |
46 | |
47 | public: |
48 | void checkLocation(SVal location, bool isLoad, const Stmt* S, |
49 | CheckerContext &C) const; |
50 | void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; |
51 | |
52 | static void AddDerefSource(raw_ostream &os, |
53 | SmallVectorImpl<SourceRange> &Ranges, |
54 | const Expr *Ex, const ProgramState *state, |
55 | const LocationContext *LCtx, |
56 | bool loadedFrom = false); |
57 | |
58 | bool SuppressAddressSpaces = false; |
59 | }; |
60 | } // end anonymous namespace |
61 | |
62 | void |
63 | DereferenceChecker::AddDerefSource(raw_ostream &os, |
64 | SmallVectorImpl<SourceRange> &Ranges, |
65 | const Expr *Ex, |
66 | const ProgramState *state, |
67 | const LocationContext *LCtx, |
68 | bool loadedFrom) { |
69 | Ex = Ex->IgnoreParenLValueCasts(); |
70 | switch (Ex->getStmtClass()) { |
71 | default: |
72 | break; |
73 | case Stmt::DeclRefExprClass: { |
74 | const DeclRefExpr *DR = cast<DeclRefExpr>(Val: Ex); |
75 | if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DR->getDecl())) { |
76 | os << " (" << (loadedFrom ? "loaded from" : "from" ) |
77 | << " variable '" << VD->getName() << "')" ; |
78 | Ranges.push_back(Elt: DR->getSourceRange()); |
79 | } |
80 | break; |
81 | } |
82 | case Stmt::MemberExprClass: { |
83 | const MemberExpr *ME = cast<MemberExpr>(Val: Ex); |
84 | os << " (" << (loadedFrom ? "loaded from" : "via" ) |
85 | << " field '" << ME->getMemberNameInfo() << "')" ; |
86 | SourceLocation L = ME->getMemberLoc(); |
87 | Ranges.push_back(Elt: SourceRange(L, L)); |
88 | break; |
89 | } |
90 | case Stmt::ObjCIvarRefExprClass: { |
91 | const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: Ex); |
92 | os << " (" << (loadedFrom ? "loaded from" : "via" ) |
93 | << " ivar '" << IV->getDecl()->getName() << "')" ; |
94 | SourceLocation L = IV->getLocation(); |
95 | Ranges.push_back(Elt: SourceRange(L, L)); |
96 | break; |
97 | } |
98 | } |
99 | } |
100 | |
101 | static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){ |
102 | const Expr *E = nullptr; |
103 | |
104 | // Walk through lvalue casts to get the original expression |
105 | // that syntactically caused the load. |
106 | if (const Expr *expr = dyn_cast<Expr>(Val: S)) |
107 | E = expr->IgnoreParenLValueCasts(); |
108 | |
109 | if (IsBind) { |
110 | const VarDecl *VD; |
111 | const Expr *Init; |
112 | std::tie(args&: VD, args&: Init) = parseAssignment(S); |
113 | if (VD && Init) |
114 | E = Init; |
115 | } |
116 | return E; |
117 | } |
118 | |
119 | bool DereferenceChecker::suppressReport(CheckerContext &C, |
120 | const Expr *E) const { |
121 | // Do not report dereferences on memory that use address space #256, #257, |
122 | // and #258. Those address spaces are used when dereferencing address spaces |
123 | // relative to the GS, FS, and SS segments on x86/x86-64 targets. |
124 | // Dereferencing a null pointer in these address spaces is not defined |
125 | // as an error. All other null dereferences in other address spaces |
126 | // are defined as an error unless explicitly defined. |
127 | // See https://clang.llvm.org/docs/LanguageExtensions.html, the section |
128 | // "X86/X86-64 Language Extensions" |
129 | |
130 | QualType Ty = E->getType(); |
131 | if (!Ty.hasAddressSpace()) |
132 | return false; |
133 | if (SuppressAddressSpaces) |
134 | return true; |
135 | |
136 | const llvm::Triple::ArchType Arch = |
137 | C.getASTContext().getTargetInfo().getTriple().getArch(); |
138 | |
139 | if ((Arch == llvm::Triple::x86) || (Arch == llvm::Triple::x86_64)) { |
140 | switch (toTargetAddressSpace(AS: E->getType().getAddressSpace())) { |
141 | case 256: |
142 | case 257: |
143 | case 258: |
144 | return true; |
145 | } |
146 | } |
147 | return false; |
148 | } |
149 | |
150 | static bool isDeclRefExprToReference(const Expr *E) { |
151 | if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: E)) |
152 | return DRE->getDecl()->getType()->isReferenceType(); |
153 | return false; |
154 | } |
155 | |
156 | void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State, |
157 | const Stmt *S, CheckerContext &C) const { |
158 | const BugType *BT = nullptr; |
159 | llvm::StringRef DerefStr1; |
160 | llvm::StringRef DerefStr2; |
161 | switch (K) { |
162 | case DerefKind::NullPointer: |
163 | BT = &BT_Null; |
164 | DerefStr1 = " results in a null pointer dereference" ; |
165 | DerefStr2 = " results in a dereference of a null pointer" ; |
166 | break; |
167 | case DerefKind::UndefinedPointerValue: |
168 | BT = &BT_Undef; |
169 | DerefStr1 = " results in an undefined pointer dereference" ; |
170 | DerefStr2 = " results in a dereference of an undefined pointer value" ; |
171 | break; |
172 | case DerefKind::AddressOfLabel: |
173 | BT = &BT_Label; |
174 | DerefStr1 = " results in an undefined pointer dereference" ; |
175 | DerefStr2 = " results in a dereference of an address of a label" ; |
176 | break; |
177 | }; |
178 | |
179 | // Generate an error node. |
180 | ExplodedNode *N = C.generateErrorNode(State); |
181 | if (!N) |
182 | return; |
183 | |
184 | SmallString<100> buf; |
185 | llvm::raw_svector_ostream os(buf); |
186 | |
187 | SmallVector<SourceRange, 2> Ranges; |
188 | |
189 | switch (S->getStmtClass()) { |
190 | case Stmt::ArraySubscriptExprClass: { |
191 | os << "Array access" ; |
192 | const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(Val: S); |
193 | AddDerefSource(os, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), |
194 | state: State.get(), LCtx: N->getLocationContext()); |
195 | os << DerefStr1; |
196 | break; |
197 | } |
198 | case Stmt::ArraySectionExprClass: { |
199 | os << "Array access" ; |
200 | const ArraySectionExpr *AE = cast<ArraySectionExpr>(Val: S); |
201 | AddDerefSource(os, Ranges, Ex: AE->getBase()->IgnoreParenCasts(), |
202 | state: State.get(), LCtx: N->getLocationContext()); |
203 | os << DerefStr1; |
204 | break; |
205 | } |
206 | case Stmt::UnaryOperatorClass: { |
207 | os << BT->getDescription(); |
208 | const UnaryOperator *U = cast<UnaryOperator>(Val: S); |
209 | AddDerefSource(os, Ranges, Ex: U->getSubExpr()->IgnoreParens(), |
210 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
211 | break; |
212 | } |
213 | case Stmt::MemberExprClass: { |
214 | const MemberExpr *M = cast<MemberExpr>(Val: S); |
215 | if (M->isArrow() || isDeclRefExprToReference(E: M->getBase())) { |
216 | os << "Access to field '" << M->getMemberNameInfo() << "'" << DerefStr2; |
217 | AddDerefSource(os, Ranges, Ex: M->getBase()->IgnoreParenCasts(), |
218 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
219 | } |
220 | break; |
221 | } |
222 | case Stmt::ObjCIvarRefExprClass: { |
223 | const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Val: S); |
224 | os << "Access to instance variable '" << *IV->getDecl() << "'" << DerefStr2; |
225 | AddDerefSource(os, Ranges, Ex: IV->getBase()->IgnoreParenCasts(), |
226 | state: State.get(), LCtx: N->getLocationContext(), loadedFrom: true); |
227 | break; |
228 | } |
229 | default: |
230 | break; |
231 | } |
232 | |
233 | auto report = std::make_unique<PathSensitiveBugReport>( |
234 | args: *BT, args: buf.empty() ? BT->getDescription() : buf.str(), args&: N); |
235 | |
236 | bugreporter::trackExpressionValue(N, E: bugreporter::getDerefExpr(S), R&: *report); |
237 | |
238 | for (SmallVectorImpl<SourceRange>::iterator |
239 | I = Ranges.begin(), E = Ranges.end(); I!=E; ++I) |
240 | report->addRange(R: *I); |
241 | |
242 | C.emitReport(R: std::move(report)); |
243 | } |
244 | |
245 | void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, |
246 | CheckerContext &C) const { |
247 | // Check for dereference of an undefined value. |
248 | if (l.isUndef()) { |
249 | const Expr *DerefExpr = getDereferenceExpr(S); |
250 | if (!suppressReport(C, E: DerefExpr)) |
251 | reportBug(K: DerefKind::UndefinedPointerValue, State: C.getState(), S: DerefExpr, C); |
252 | return; |
253 | } |
254 | |
255 | DefinedOrUnknownSVal location = l.castAs<DefinedOrUnknownSVal>(); |
256 | |
257 | // Check for null dereferences. |
258 | if (!isa<Loc>(Val: location)) |
259 | return; |
260 | |
261 | ProgramStateRef state = C.getState(); |
262 | |
263 | ProgramStateRef notNullState, nullState; |
264 | std::tie(args&: notNullState, args&: nullState) = state->assume(Cond: location); |
265 | |
266 | if (nullState) { |
267 | if (!notNullState) { |
268 | // We know that 'location' can only be null. This is what |
269 | // we call an "explicit" null dereference. |
270 | const Expr *expr = getDereferenceExpr(S); |
271 | if (!suppressReport(C, E: expr)) { |
272 | reportBug(K: DerefKind::NullPointer, State: nullState, S: expr, C); |
273 | return; |
274 | } |
275 | } |
276 | |
277 | // Otherwise, we have the case where the location could either be |
278 | // null or not-null. Record the error node as an "implicit" null |
279 | // dereference. |
280 | if (ExplodedNode *N = C.generateSink(State: nullState, Pred: C.getPredecessor())) { |
281 | ImplicitNullDerefEvent event = {.Location: l, .IsLoad: isLoad, .SinkNode: N, .BR: &C.getBugReporter(), |
282 | /*IsDirectDereference=*/true}; |
283 | dispatchEvent(event); |
284 | } |
285 | } |
286 | |
287 | // From this point forward, we know that the location is not null. |
288 | C.addTransition(State: notNullState); |
289 | } |
290 | |
291 | void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S, |
292 | CheckerContext &C) const { |
293 | // If we're binding to a reference, check if the value is known to be null. |
294 | if (V.isUndef()) |
295 | return; |
296 | |
297 | // One should never write to label addresses. |
298 | if (auto Label = L.getAs<loc::GotoLabel>()) { |
299 | reportBug(K: DerefKind::AddressOfLabel, State: C.getState(), S, C); |
300 | return; |
301 | } |
302 | |
303 | const MemRegion *MR = L.getAsRegion(); |
304 | const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(Val: MR); |
305 | if (!TVR) |
306 | return; |
307 | |
308 | if (!TVR->getValueType()->isReferenceType()) |
309 | return; |
310 | |
311 | ProgramStateRef State = C.getState(); |
312 | |
313 | ProgramStateRef StNonNull, StNull; |
314 | std::tie(args&: StNonNull, args&: StNull) = State->assume(Cond: V.castAs<DefinedOrUnknownSVal>()); |
315 | |
316 | if (StNull) { |
317 | if (!StNonNull) { |
318 | const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true); |
319 | if (!suppressReport(C, E: expr)) { |
320 | reportBug(K: DerefKind::NullPointer, State: StNull, S: expr, C); |
321 | return; |
322 | } |
323 | } |
324 | |
325 | // At this point the value could be either null or non-null. |
326 | // Record this as an "implicit" null dereference. |
327 | if (ExplodedNode *N = C.generateSink(State: StNull, Pred: C.getPredecessor())) { |
328 | ImplicitNullDerefEvent event = {.Location: V, /*isLoad=*/.IsLoad: true, .SinkNode: N, |
329 | .BR: &C.getBugReporter(), |
330 | /*IsDirectDereference=*/true}; |
331 | dispatchEvent(event); |
332 | } |
333 | } |
334 | |
335 | // Unlike a regular null dereference, initializing a reference with a |
336 | // dereferenced null pointer does not actually cause a runtime exception in |
337 | // Clang's implementation of references. |
338 | // |
339 | // int &r = *p; // safe?? |
340 | // if (p != NULL) return; // uh-oh |
341 | // r = 5; // trap here |
342 | // |
343 | // The standard says this is invalid as soon as we try to create a "null |
344 | // reference" (there is no such thing), but turning this into an assumption |
345 | // that 'p' is never null will not match our actual runtime behavior. |
346 | // So we do not record this assumption, allowing us to warn on the last line |
347 | // of this example. |
348 | // |
349 | // We do need to add a transition because we may have generated a sink for |
350 | // the "implicit" null dereference. |
351 | C.addTransition(State, Tag: this); |
352 | } |
353 | |
354 | void ento::registerDereferenceChecker(CheckerManager &mgr) { |
355 | auto *Chk = mgr.registerChecker<DereferenceChecker>(); |
356 | Chk->SuppressAddressSpaces = mgr.getAnalyzerOptions().getCheckerBooleanOption( |
357 | CheckerName: mgr.getCurrentCheckerName(), OptionName: "SuppressAddressSpaces" ); |
358 | } |
359 | |
360 | bool ento::shouldRegisterDereferenceChecker(const CheckerManager &mgr) { |
361 | return true; |
362 | } |
363 | |