1//===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a checker that reports uninitialized fields in objects
10// created after a constructor call.
11//
12// To read about command line options and how the checker works, refer to the
13// top of the file and inline comments in UninitializedObject.h.
14//
15// Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
16// complexity of this file.
17//
18//===----------------------------------------------------------------------===//
19
20#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21#include "UninitializedObject.h"
22#include "clang/ASTMatchers/ASTMatchFinder.h"
23#include "clang/Driver/DriverDiagnostic.h"
24#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
25#include "clang/StaticAnalyzer/Core/Checker.h"
26#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
28
29using namespace clang;
30using namespace clang::ento;
31using namespace clang::ast_matchers;
32
33/// We'll mark fields (and pointee of fields) that are confirmed to be
34/// uninitialized as already analyzed.
35REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *)
36
37namespace {
38
39class UninitializedObjectChecker
40 : public Checker<check::EndFunction, check::DeadSymbols> {
41 const BugType BT_uninitField{this, "Uninitialized fields"};
42
43public:
44 // The fields of this struct will be initialized when registering the checker.
45 UninitObjCheckerOptions Opts;
46
47 void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
48 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
49};
50
51/// A basic field type, that is not a pointer or a reference, it's dynamic and
52/// static type is the same.
53class RegularField final : public FieldNode {
54public:
55 RegularField(const FieldRegion *FR) : FieldNode(FR) {}
56
57 void printNoteMsg(llvm::raw_ostream &Out) const override {
58 Out << "uninitialized field ";
59 }
60
61 void printPrefix(llvm::raw_ostream &Out) const override {}
62
63 void printNode(llvm::raw_ostream &Out) const override {
64 Out << getVariableName(Field: getDecl());
65 }
66
67 void printSeparator(llvm::raw_ostream &Out) const override { Out << '.'; }
68};
69
70/// Represents that the FieldNode that comes after this is declared in a base
71/// of the previous FieldNode. As such, this descendant doesn't wrap a
72/// FieldRegion, and is purely a tool to describe a relation between two other
73/// FieldRegion wrapping descendants.
74class BaseClass final : public FieldNode {
75 const QualType BaseClassT;
76
77public:
78 BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
79 assert(!T.isNull());
80 assert(T->getAsCXXRecordDecl());
81 }
82
83 void printNoteMsg(llvm::raw_ostream &Out) const override {
84 llvm_unreachable("This node can never be the final node in the "
85 "fieldchain!");
86 }
87
88 void printPrefix(llvm::raw_ostream &Out) const override {}
89
90 void printNode(llvm::raw_ostream &Out) const override {
91 Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
92 }
93
94 void printSeparator(llvm::raw_ostream &Out) const override {}
95
96 bool isBase() const override { return true; }
97};
98
99} // end of anonymous namespace
100
101// Utility function declarations.
102
103/// Returns the region that was constructed by CtorDecl, or nullptr if that
104/// isn't possible.
105static const TypedValueRegion *
106getConstructedRegion(const CXXConstructorDecl *CtorDecl,
107 CheckerContext &Context);
108
109/// Checks whether the object constructed by \p Ctor will be analyzed later
110/// (e.g. if the object is a field of another object, in which case we'd check
111/// it multiple times).
112static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
113 CheckerContext &Context);
114
115/// Checks whether RD contains a field with a name or type name that matches
116/// \p Pattern.
117static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern);
118
119/// Checks _syntactically_ whether it is possible to access FD from the record
120/// that contains it without a preceding assert (even if that access happens
121/// inside a method). This is mainly used for records that act like unions, like
122/// having multiple bit fields, with only a fraction being properly initialized.
123/// If these fields are properly guarded with asserts, this method returns
124/// false.
125///
126/// Since this check is done syntactically, this method could be inaccurate.
127static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State);
128
129//===----------------------------------------------------------------------===//
130// Methods for UninitializedObjectChecker.
131//===----------------------------------------------------------------------===//
132
133void UninitializedObjectChecker::checkEndFunction(
134 const ReturnStmt *RS, CheckerContext &Context) const {
135
136 const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
137 Val: Context.getLocationContext()->getDecl());
138 if (!CtorDecl)
139 return;
140
141 if (!CtorDecl->isUserProvided())
142 return;
143
144 if (CtorDecl->getParent()->isUnion())
145 return;
146
147 // This avoids essentially the same error being reported multiple times.
148 if (willObjectBeAnalyzedLater(Ctor: CtorDecl, Context))
149 return;
150
151 const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context);
152 if (!R)
153 return;
154
155 FindUninitializedFields F(Context.getState(), R, Opts);
156
157 std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo =
158 F.getResults();
159
160 ProgramStateRef UpdatedState = UninitInfo.first;
161 const UninitFieldMap &UninitFields = UninitInfo.second;
162
163 if (UninitFields.empty()) {
164 Context.addTransition(State: UpdatedState);
165 return;
166 }
167
168 // There are uninitialized fields in the record.
169
170 ExplodedNode *Node = Context.generateNonFatalErrorNode(State: UpdatedState);
171 if (!Node)
172 return;
173
174 PathDiagnosticLocation LocUsedForUniqueing;
175 const Stmt *CallSite = Context.getStackFrame()->getCallSite();
176 if (CallSite)
177 LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
178 S: CallSite, SM: Context.getSourceManager(), LAC: Node->getLocationContext());
179
180 // For Plist consumers that don't support notes just yet, we'll convert notes
181 // to warnings.
182 if (Opts.ShouldConvertNotesToWarnings) {
183 for (const auto &Pair : UninitFields) {
184
185 auto Report = std::make_unique<PathSensitiveBugReport>(
186 args: BT_uninitField, args: Pair.second, args&: Node, args&: LocUsedForUniqueing,
187 args: Node->getLocationContext()->getDecl());
188 Context.emitReport(R: std::move(Report));
189 }
190 return;
191 }
192
193 SmallString<100> WarningBuf;
194 llvm::raw_svector_ostream WarningOS(WarningBuf);
195 WarningOS << UninitFields.size() << " uninitialized field"
196 << (UninitFields.size() == 1 ? "" : "s")
197 << " at the end of the constructor call";
198
199 auto Report = std::make_unique<PathSensitiveBugReport>(
200 args: BT_uninitField, args: WarningOS.str(), args&: Node, args&: LocUsedForUniqueing,
201 args: Node->getLocationContext()->getDecl());
202
203 for (const auto &Pair : UninitFields) {
204 Report->addNote(Msg: Pair.second,
205 Pos: PathDiagnosticLocation::create(D: Pair.first->getDecl(),
206 SM: Context.getSourceManager()));
207 }
208 Context.emitReport(R: std::move(Report));
209}
210
211void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR,
212 CheckerContext &C) const {
213 ProgramStateRef State = C.getState();
214 for (const MemRegion *R : State->get<AnalyzedRegions>()) {
215 if (!SR.isLiveRegion(region: R))
216 State = State->remove<AnalyzedRegions>(K: R);
217 }
218}
219
220//===----------------------------------------------------------------------===//
221// Methods for FindUninitializedFields.
222//===----------------------------------------------------------------------===//
223
224FindUninitializedFields::FindUninitializedFields(
225 ProgramStateRef State, const TypedValueRegion *const R,
226 const UninitObjCheckerOptions &Opts)
227 : State(State), ObjectR(R), Opts(Opts) {
228
229 isNonUnionUninit(R: ObjectR, LocalChain: FieldChainInfo(ChainFactory));
230
231 // In non-pedantic mode, if ObjectR doesn't contain a single initialized
232 // field, we'll assume that Object was intentionally left uninitialized.
233 if (!Opts.IsPedantic && !isAnyFieldInitialized())
234 UninitFields.clear();
235}
236
237bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain,
238 const MemRegion *PointeeR) {
239 const FieldRegion *FR = Chain.getUninitRegion();
240
241 assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) &&
242 "One must also pass the pointee region as a parameter for "
243 "dereferenceable fields!");
244
245 if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
246 Loc: FR->getDecl()->getLocation()))
247 return false;
248
249 if (Opts.IgnoreGuardedFields && !hasUnguardedAccess(FD: FR->getDecl(), State))
250 return false;
251
252 if (State->contains<AnalyzedRegions>(key: FR))
253 return false;
254
255 if (PointeeR) {
256 if (State->contains<AnalyzedRegions>(key: PointeeR)) {
257 return false;
258 }
259 State = State->add<AnalyzedRegions>(K: PointeeR);
260 }
261
262 State = State->add<AnalyzedRegions>(K: FR);
263
264 UninitFieldMap::mapped_type NoteMsgBuf;
265 llvm::raw_svector_ostream OS(NoteMsgBuf);
266 Chain.printNoteMsg(Out&: OS);
267
268 return UninitFields.insert(x: {FR, std::move(NoteMsgBuf)}).second;
269}
270
271bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
272 FieldChainInfo LocalChain) {
273 assert(R->getValueType()->isRecordType() &&
274 !R->getValueType()->isUnionType() &&
275 "This method only checks non-union record objects!");
276
277 const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
278
279 if (!RD) {
280 IsAnyFieldInitialized = true;
281 return true;
282 }
283
284 if (!Opts.IgnoredRecordsWithFieldPattern.empty() &&
285 shouldIgnoreRecord(RD, Pattern: Opts.IgnoredRecordsWithFieldPattern)) {
286 IsAnyFieldInitialized = true;
287 return false;
288 }
289
290 bool ContainsUninitField = false;
291
292 // Are all of this non-union's fields initialized?
293 for (const FieldDecl *I : RD->fields()) {
294
295 const auto FieldVal =
296 State->getLValue(decl: I, Base: loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
297 const auto *FR = FieldVal.getRegionAs<FieldRegion>();
298 QualType T = I->getType();
299
300 // If LocalChain already contains FR, then we encountered a cyclic
301 // reference. In this case, region FR is already under checking at an
302 // earlier node in the directed tree.
303 if (LocalChain.contains(FR))
304 return false;
305
306 if (T->isStructureOrClassType()) {
307 if (isNonUnionUninit(R: FR, LocalChain: LocalChain.add(FN: RegularField(FR))))
308 ContainsUninitField = true;
309 continue;
310 }
311
312 if (T->isUnionType()) {
313 if (isUnionUninit(R: FR)) {
314 if (addFieldToUninits(Chain: LocalChain.add(FN: RegularField(FR))))
315 ContainsUninitField = true;
316 } else
317 IsAnyFieldInitialized = true;
318 continue;
319 }
320
321 if (T->isArrayType()) {
322 IsAnyFieldInitialized = true;
323 continue;
324 }
325
326 SVal V = State->getSVal(LV: FieldVal);
327
328 if (isDereferencableType(T) || isa<nonloc::LocAsInteger>(Val: V)) {
329 if (isDereferencableUninit(FR, LocalChain))
330 ContainsUninitField = true;
331 continue;
332 }
333
334 if (isPrimitiveType(T)) {
335 if (isPrimitiveUninit(V)) {
336 if (addFieldToUninits(Chain: LocalChain.add(FN: RegularField(FR))))
337 ContainsUninitField = true;
338 }
339 continue;
340 }
341
342 llvm_unreachable("All cases are handled!");
343 }
344
345 // Checking bases. The checker will regard inherited data members as direct
346 // fields.
347 const auto *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD);
348 if (!CXXRD)
349 return ContainsUninitField;
350
351 for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
352 const auto *BaseRegion = State->getLValue(BaseSpec, Super: R)
353 .castAs<loc::MemRegionVal>()
354 .getRegionAs<TypedValueRegion>();
355
356 // If the head of the list is also a BaseClass, we'll overwrite it to avoid
357 // note messages like 'this->A::B::x'.
358 if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
359 if (isNonUnionUninit(R: BaseRegion, LocalChain: LocalChain.replaceHead(
360 FN: BaseClass(BaseSpec.getType()))))
361 ContainsUninitField = true;
362 } else {
363 if (isNonUnionUninit(R: BaseRegion,
364 LocalChain: LocalChain.add(FN: BaseClass(BaseSpec.getType()))))
365 ContainsUninitField = true;
366 }
367 }
368
369 return ContainsUninitField;
370}
371
372bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
373 assert(R->getValueType()->isUnionType() &&
374 "This method only checks union objects!");
375 // TODO: Implement support for union fields.
376 return false;
377}
378
379bool FindUninitializedFields::isPrimitiveUninit(SVal V) {
380 if (V.isUndef())
381 return true;
382
383 IsAnyFieldInitialized = true;
384 return false;
385}
386
387//===----------------------------------------------------------------------===//
388// Methods for FieldChainInfo.
389//===----------------------------------------------------------------------===//
390
391bool FieldChainInfo::contains(const FieldRegion *FR) const {
392 for (const FieldNode &Node : Chain) {
393 if (Node.isSameRegion(OtherFR: FR))
394 return true;
395 }
396 return false;
397}
398
399/// Prints every element except the last to `Out`. Since ImmutableLists store
400/// elements in reverse order, and have no reverse iterators, we use a
401/// recursive function to print the fieldchain correctly. The last element in
402/// the chain is to be printed by `FieldChainInfo::print`.
403static void printTail(llvm::raw_ostream &Out,
404 const FieldChainInfo::FieldChain L);
405
406// FIXME: This function constructs an incorrect string in the following case:
407//
408// struct Base { int x; };
409// struct D1 : Base {}; struct D2 : Base {};
410//
411// struct MostDerived : D1, D2 {
412// MostDerived() {}
413// }
414//
415// A call to MostDerived::MostDerived() will cause two notes that say
416// "uninitialized field 'this->x'", but we can't refer to 'x' directly,
417// we need an explicit namespace resolution whether the uninit field was
418// 'D1::x' or 'D2::x'.
419void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
420 if (Chain.isEmpty())
421 return;
422
423 const FieldNode &LastField = getHead();
424
425 LastField.printNoteMsg(Out);
426 Out << '\'';
427
428 for (const FieldNode &Node : Chain)
429 Node.printPrefix(Out);
430
431 Out << "this->";
432 printTail(Out, L: Chain.getTail());
433 LastField.printNode(Out);
434 Out << '\'';
435}
436
437static void printTail(llvm::raw_ostream &Out,
438 const FieldChainInfo::FieldChain L) {
439 if (L.isEmpty())
440 return;
441
442 printTail(Out, L: L.getTail());
443
444 L.getHead().printNode(Out);
445 L.getHead().printSeparator(Out);
446}
447
448//===----------------------------------------------------------------------===//
449// Utility functions.
450//===----------------------------------------------------------------------===//
451
452static const TypedValueRegion *
453getConstructedRegion(const CXXConstructorDecl *CtorDecl,
454 CheckerContext &Context) {
455
456 Loc ThisLoc =
457 Context.getSValBuilder().getCXXThis(D: CtorDecl, SFC: Context.getStackFrame());
458
459 SVal ObjectV = Context.getState()->getSVal(LV: ThisLoc);
460
461 auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>();
462 if (R && !R->getValueType()->getAsCXXRecordDecl())
463 return nullptr;
464
465 return R;
466}
467
468static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
469 CheckerContext &Context) {
470
471 const TypedValueRegion *CurrRegion = getConstructedRegion(CtorDecl: Ctor, Context);
472 if (!CurrRegion)
473 return false;
474
475 const LocationContext *LC = Context.getLocationContext();
476 while ((LC = LC->getParent())) {
477
478 // If \p Ctor was called by another constructor.
479 const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(Val: LC->getDecl());
480 if (!OtherCtor)
481 continue;
482
483 const TypedValueRegion *OtherRegion =
484 getConstructedRegion(CtorDecl: OtherCtor, Context);
485 if (!OtherRegion)
486 continue;
487
488 // If the CurrRegion is a subregion of OtherRegion, it will be analyzed
489 // during the analysis of OtherRegion.
490 if (CurrRegion->isSubRegionOf(R: OtherRegion))
491 return true;
492 }
493
494 return false;
495}
496
497static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) {
498 llvm::Regex R(Pattern);
499
500 for (const FieldDecl *FD : RD->fields()) {
501 if (R.match(String: FD->getType().getAsString()))
502 return true;
503 if (R.match(String: FD->getName()))
504 return true;
505 }
506
507 return false;
508}
509
510static const Stmt *getMethodBody(const CXXMethodDecl *M) {
511 if (isa<CXXConstructorDecl>(Val: M))
512 return nullptr;
513
514 if (!M->isDefined())
515 return nullptr;
516
517 return M->getDefinition()->getBody();
518}
519
520static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State) {
521
522 if (FD->getAccess() == AccessSpecifier::AS_public)
523 return true;
524
525 const auto *Parent = dyn_cast<CXXRecordDecl>(Val: FD->getParent());
526
527 if (!Parent)
528 return true;
529
530 Parent = Parent->getDefinition();
531 assert(Parent && "The record's definition must be avaible if an uninitialized"
532 " field of it was found!");
533
534 ASTContext &AC = State->getStateManager().getContext();
535
536 auto FieldAccessM = memberExpr(hasDeclaration(InnerMatcher: equalsNode(Other: FD))).bind(ID: "access");
537
538 auto AssertLikeM = callExpr(callee(InnerMatcher: functionDecl(
539 hasAnyName("exit", "panic", "error", "Assert", "assert", "ziperr",
540 "assfail", "db_error", "__assert", "__assert2", "_wassert",
541 "__assert_rtn", "__assert_fail", "dtrace_assfail",
542 "yy_fatal_error", "_XCAssertionFailureHandler",
543 "_DTAssertionFailureHandler", "_TSAssertionFailureHandler"))));
544
545 auto NoReturnFuncM = callExpr(callee(InnerMatcher: functionDecl(isNoReturn())));
546
547 auto GuardM =
548 stmt(anyOf(ifStmt(), switchStmt(), conditionalOperator(), AssertLikeM,
549 NoReturnFuncM))
550 .bind(ID: "guard");
551
552 for (const CXXMethodDecl *M : Parent->methods()) {
553 const Stmt *MethodBody = getMethodBody(M);
554 if (!MethodBody)
555 continue;
556
557 auto Accesses = match(Matcher: stmt(hasDescendant(FieldAccessM)), Node: *MethodBody, Context&: AC);
558 if (Accesses.empty())
559 continue;
560 const auto *FirstAccess = Accesses[0].getNodeAs<MemberExpr>(ID: "access");
561 assert(FirstAccess);
562
563 auto Guards = match(Matcher: stmt(hasDescendant(GuardM)), Node: *MethodBody, Context&: AC);
564 if (Guards.empty())
565 return true;
566 const auto *FirstGuard = Guards[0].getNodeAs<Stmt>(ID: "guard");
567 assert(FirstGuard);
568
569 if (FirstAccess->getBeginLoc() < FirstGuard->getBeginLoc())
570 return true;
571 }
572
573 return false;
574}
575
576std::string clang::ento::getVariableName(const FieldDecl *Field) {
577 // If Field is a captured lambda variable, Field->getName() will return with
578 // an empty string. We can however acquire it's name from the lambda's
579 // captures.
580 const auto *CXXParent = dyn_cast<CXXRecordDecl>(Val: Field->getParent());
581
582 if (CXXParent && CXXParent->isLambda()) {
583 assert(CXXParent->captures_begin());
584 auto It = CXXParent->captures_begin() + Field->getFieldIndex();
585
586 if (It->capturesVariable())
587 return llvm::Twine("/*captured variable*/" +
588 It->getCapturedVar()->getName())
589 .str();
590
591 if (It->capturesThis())
592 return "/*'this' capture*/";
593
594 llvm_unreachable("No other capture type is expected!");
595 }
596
597 return std::string(Field->getName());
598}
599
600void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
601 auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
602
603 const AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions();
604 UninitObjCheckerOptions &ChOpts = Chk->Opts;
605
606 ChOpts.IsPedantic = AnOpts.getCheckerBooleanOption(C: Chk, OptionName: "Pedantic");
607 ChOpts.ShouldConvertNotesToWarnings = AnOpts.getCheckerBooleanOption(
608 C: Chk, OptionName: "NotesAsWarnings");
609 ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption(
610 C: Chk, OptionName: "CheckPointeeInitialization");
611 ChOpts.IgnoredRecordsWithFieldPattern =
612 std::string(AnOpts.getCheckerStringOption(C: Chk, OptionName: "IgnoreRecordsWithField"));
613 ChOpts.IgnoreGuardedFields =
614 AnOpts.getCheckerBooleanOption(C: Chk, OptionName: "IgnoreGuardedFields");
615
616 std::string ErrorMsg;
617 if (!llvm::Regex(ChOpts.IgnoredRecordsWithFieldPattern).isValid(Error&: ErrorMsg))
618 Mgr.reportInvalidCheckerOptionValue(C: Chk, OptionName: "IgnoreRecordsWithField",
619 ExpectedValueDesc: "a valid regex, building failed with error message "
620 "\"" + ErrorMsg + "\"");
621}
622
623bool ento::shouldRegisterUninitializedObjectChecker(const CheckerManager &mgr) {
624 return true;
625}
626