| 1 | //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines a check that marks a raw pointer to a C++ container's |
| 10 | // inner buffer released when the object is destroyed. This information can |
| 11 | // be used by MallocChecker to detect use-after-free problems. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AllocationState.h" |
| 16 | #include "InterCheckerAPI.h" |
| 17 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
| 18 | #include "clang/StaticAnalyzer/Core/Checker.h" |
| 19 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
| 20 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
| 21 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
| 22 | |
| 23 | using namespace clang; |
| 24 | using namespace ento; |
| 25 | |
| 26 | // Associate container objects with a set of raw pointer symbols. |
| 27 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef) |
| 28 | REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet) |
| 29 | |
| 30 | |
| 31 | namespace { |
| 32 | |
| 33 | class InnerPointerChecker |
| 34 | : public Checker<check::DeadSymbols, check::PostCall> { |
| 35 | |
| 36 | CallDescriptionSet InvalidatingMemberFunctions{ |
| 37 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "append" }), |
| 38 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "assign" }), |
| 39 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "clear" }), |
| 40 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "erase" }), |
| 41 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "insert" }), |
| 42 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "pop_back" }), |
| 43 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "push_back" }), |
| 44 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "replace" }), |
| 45 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "reserve" }), |
| 46 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "resize" }), |
| 47 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "shrink_to_fit" }), |
| 48 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "swap" })}; |
| 49 | |
| 50 | CallDescriptionSet AddressofFunctions{ |
| 51 | CallDescription(CDM::SimpleFunc, {"std" , "addressof" }), |
| 52 | CallDescription(CDM::SimpleFunc, {"std" , "__addressof" })}; |
| 53 | |
| 54 | CallDescriptionSet InnerPointerAccessFunctions{ |
| 55 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "c_str" }), |
| 56 | CallDescription(CDM::SimpleFunc, {"std" , "data" }, 1), |
| 57 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "data" })}; |
| 58 | |
| 59 | public: |
| 60 | class InnerPointerBRVisitor : public BugReporterVisitor { |
| 61 | SymbolRef PtrToBuf; |
| 62 | |
| 63 | public: |
| 64 | InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {} |
| 65 | |
| 66 | static void *getTag() { |
| 67 | static int Tag = 0; |
| 68 | return &Tag; |
| 69 | } |
| 70 | |
| 71 | void Profile(llvm::FoldingSetNodeID &ID) const override { |
| 72 | ID.AddPointer(Ptr: getTag()); |
| 73 | } |
| 74 | |
| 75 | PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, |
| 76 | BugReporterContext &BRC, |
| 77 | PathSensitiveBugReport &BR) override; |
| 78 | |
| 79 | // FIXME: Scan the map once in the visitor's constructor and do a direct |
| 80 | // lookup by region. |
| 81 | bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) { |
| 82 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
| 83 | for (const auto &Entry : Map) { |
| 84 | if (Entry.second.contains(V: Sym)) |
| 85 | return true; |
| 86 | } |
| 87 | return false; |
| 88 | } |
| 89 | }; |
| 90 | |
| 91 | /// Check whether the called member function potentially invalidates |
| 92 | /// pointers referring to the container object's inner buffer. |
| 93 | bool isInvalidatingMemberFunction(const CallEvent &Call) const; |
| 94 | |
| 95 | /// Mark pointer symbols associated with the given memory region released |
| 96 | /// in the program state. |
| 97 | void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, |
| 98 | const MemRegion *ObjRegion, |
| 99 | CheckerContext &C) const; |
| 100 | |
| 101 | /// Standard library functions that take a non-const `basic_string` argument by |
| 102 | /// reference may invalidate its inner pointers. Check for these cases and |
| 103 | /// mark the pointers released. |
| 104 | void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State, |
| 105 | CheckerContext &C) const; |
| 106 | |
| 107 | /// Record the connection between raw pointers referring to a container |
| 108 | /// object's inner buffer and the object's memory region in the program state. |
| 109 | /// Mark potentially invalidated pointers released. |
| 110 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
| 111 | |
| 112 | /// Clean up the program state map. |
| 113 | void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; |
| 114 | }; |
| 115 | |
| 116 | } // end anonymous namespace |
| 117 | |
| 118 | bool InnerPointerChecker::isInvalidatingMemberFunction( |
| 119 | const CallEvent &Call) const { |
| 120 | if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(Val: &Call)) { |
| 121 | OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator(); |
| 122 | if (Opc == OO_Equal || Opc == OO_PlusEqual) |
| 123 | return true; |
| 124 | return false; |
| 125 | } |
| 126 | return isa<CXXDestructorCall>(Val: Call) || |
| 127 | InvalidatingMemberFunctions.contains(Call); |
| 128 | } |
| 129 | |
| 130 | void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, |
| 131 | ProgramStateRef State, |
| 132 | const MemRegion *MR, |
| 133 | CheckerContext &C) const { |
| 134 | if (const PtrSet *PS = State->get<RawPtrMap>(key: MR)) { |
| 135 | const Expr *Origin = Call.getOriginExpr(); |
| 136 | for (const auto Symbol : *PS) { |
| 137 | // NOTE: `Origin` may be null, and will be stored so in the symbol's |
| 138 | // `RefState` in MallocChecker's `RegionState` program state map. |
| 139 | State = allocation_state::markReleased(State, Sym: Symbol, Origin); |
| 140 | } |
| 141 | State = State->remove<RawPtrMap>(K: MR); |
| 142 | C.addTransition(State); |
| 143 | return; |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, |
| 148 | ProgramStateRef State, |
| 149 | CheckerContext &C) const { |
| 150 | if (const auto *FC = dyn_cast<AnyFunctionCall>(Val: &Call)) { |
| 151 | const FunctionDecl *FD = FC->getDecl(); |
| 152 | if (!FD || !FD->isInStdNamespace()) |
| 153 | return; |
| 154 | |
| 155 | for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) { |
| 156 | QualType ParamTy = FD->getParamDecl(i: I)->getType(); |
| 157 | if (!ParamTy->isReferenceType() || |
| 158 | ParamTy->getPointeeType().isConstQualified()) |
| 159 | continue; |
| 160 | |
| 161 | // In case of member operator calls, `this` is counted as an |
| 162 | // argument but not as a parameter. |
| 163 | bool isaMemberOpCall = isa<CXXMemberOperatorCall>(Val: FC); |
| 164 | unsigned ArgI = isaMemberOpCall ? I+1 : I; |
| 165 | |
| 166 | SVal Arg = FC->getArgSVal(Index: ArgI); |
| 167 | const auto *ArgRegion = |
| 168 | dyn_cast_or_null<TypedValueRegion>(Val: Arg.getAsRegion()); |
| 169 | if (!ArgRegion) |
| 170 | continue; |
| 171 | |
| 172 | // std::addressof functions accepts a non-const reference as an argument, |
| 173 | // but doesn't modify it. |
| 174 | if (AddressofFunctions.contains(Call)) |
| 175 | continue; |
| 176 | |
| 177 | markPtrSymbolsReleased(Call, State, MR: ArgRegion, C); |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | // [string.require] |
| 183 | // |
| 184 | // "References, pointers, and iterators referring to the elements of a |
| 185 | // basic_string sequence may be invalidated by the following uses of that |
| 186 | // basic_string object: |
| 187 | // |
| 188 | // -- As an argument to any standard library function taking a reference |
| 189 | // to non-const basic_string as an argument. For example, as an argument to |
| 190 | // non-member functions swap(), operator>>(), and getline(), or as an argument |
| 191 | // to basic_string::swap(). |
| 192 | // |
| 193 | // -- Calling non-const member functions, except operator[], at, front, back, |
| 194 | // begin, rbegin, end, and rend." |
| 195 | |
| 196 | void InnerPointerChecker::checkPostCall(const CallEvent &Call, |
| 197 | CheckerContext &C) const { |
| 198 | ProgramStateRef State = C.getState(); |
| 199 | |
| 200 | // TODO: Do we need these to be typed? |
| 201 | const TypedValueRegion *ObjRegion = nullptr; |
| 202 | |
| 203 | if (const auto *ICall = dyn_cast<CXXInstanceCall>(Val: &Call)) { |
| 204 | ObjRegion = dyn_cast_or_null<TypedValueRegion>( |
| 205 | Val: ICall->getCXXThisVal().getAsRegion()); |
| 206 | |
| 207 | // Check [string.require] / second point. |
| 208 | if (isInvalidatingMemberFunction(Call)) { |
| 209 | markPtrSymbolsReleased(Call, State, MR: ObjRegion, C); |
| 210 | return; |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | if (InnerPointerAccessFunctions.contains(Call)) { |
| 215 | |
| 216 | if (isa<SimpleFunctionCall>(Val: Call)) { |
| 217 | // NOTE: As of now, we only have one free access function: std::data. |
| 218 | // If we add more functions like this in the list, hardcoded |
| 219 | // argument index should be changed. |
| 220 | ObjRegion = |
| 221 | dyn_cast_or_null<TypedValueRegion>(Val: Call.getArgSVal(Index: 0).getAsRegion()); |
| 222 | } |
| 223 | |
| 224 | if (!ObjRegion) |
| 225 | return; |
| 226 | |
| 227 | SVal RawPtr = Call.getReturnValue(); |
| 228 | if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { |
| 229 | // Start tracking this raw pointer by adding it to the set of symbols |
| 230 | // associated with this container object in the program state map. |
| 231 | |
| 232 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
| 233 | const PtrSet *SetPtr = State->get<RawPtrMap>(key: ObjRegion); |
| 234 | PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); |
| 235 | assert(C.wasInlined || !Set.contains(Sym)); |
| 236 | Set = F.add(Old: Set, V: Sym); |
| 237 | |
| 238 | State = State->set<RawPtrMap>(K: ObjRegion, E: Set); |
| 239 | C.addTransition(State); |
| 240 | } |
| 241 | |
| 242 | return; |
| 243 | } |
| 244 | |
| 245 | // Check [string.require] / first point. |
| 246 | checkFunctionArguments(Call, State, C); |
| 247 | } |
| 248 | |
| 249 | void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper, |
| 250 | CheckerContext &C) const { |
| 251 | ProgramStateRef State = C.getState(); |
| 252 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
| 253 | RawPtrMapTy RPM = State->get<RawPtrMap>(); |
| 254 | for (const auto &Entry : RPM) { |
| 255 | if (!SymReaper.isLiveRegion(region: Entry.first)) { |
| 256 | // Due to incomplete destructor support, some dead regions might |
| 257 | // remain in the program state map. Clean them up. |
| 258 | State = State->remove<RawPtrMap>(K: Entry.first); |
| 259 | } |
| 260 | if (const PtrSet *OldSet = State->get<RawPtrMap>(key: Entry.first)) { |
| 261 | PtrSet CleanedUpSet = *OldSet; |
| 262 | for (const auto Symbol : Entry.second) { |
| 263 | if (!SymReaper.isLive(sym: Symbol)) |
| 264 | CleanedUpSet = F.remove(Old: CleanedUpSet, V: Symbol); |
| 265 | } |
| 266 | State = CleanedUpSet.isEmpty() |
| 267 | ? State->remove<RawPtrMap>(K: Entry.first) |
| 268 | : State->set<RawPtrMap>(K: Entry.first, E: CleanedUpSet); |
| 269 | } |
| 270 | } |
| 271 | C.addTransition(State); |
| 272 | } |
| 273 | |
| 274 | namespace clang { |
| 275 | namespace ento { |
| 276 | namespace allocation_state { |
| 277 | |
| 278 | std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) { |
| 279 | return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(args&: Sym); |
| 280 | } |
| 281 | |
| 282 | const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) { |
| 283 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
| 284 | for (const auto &Entry : Map) { |
| 285 | if (Entry.second.contains(V: Sym)) { |
| 286 | return Entry.first; |
| 287 | } |
| 288 | } |
| 289 | return nullptr; |
| 290 | } |
| 291 | |
| 292 | } // end namespace allocation_state |
| 293 | } // end namespace ento |
| 294 | } // end namespace clang |
| 295 | |
| 296 | PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode( |
| 297 | const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { |
| 298 | if (!isSymbolTracked(State: N->getState(), Sym: PtrToBuf) || |
| 299 | isSymbolTracked(State: N->getFirstPred()->getState(), Sym: PtrToBuf)) |
| 300 | return nullptr; |
| 301 | |
| 302 | const Stmt *S = N->getStmtForDiagnostics(); |
| 303 | if (!S) |
| 304 | return nullptr; |
| 305 | |
| 306 | const MemRegion *ObjRegion = |
| 307 | allocation_state::getContainerObjRegion(State: N->getState(), Sym: PtrToBuf); |
| 308 | const auto *TypedRegion = cast<TypedValueRegion>(Val: ObjRegion); |
| 309 | QualType ObjTy = TypedRegion->getValueType(); |
| 310 | |
| 311 | SmallString<256> Buf; |
| 312 | llvm::raw_svector_ostream OS(Buf); |
| 313 | OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here" ; |
| 314 | PathDiagnosticLocation Pos(S, BRC.getSourceManager(), |
| 315 | N->getLocationContext()); |
| 316 | return std::make_shared<PathDiagnosticEventPiece>(args&: Pos, args: OS.str(), args: true); |
| 317 | } |
| 318 | |
| 319 | void ento::registerInnerPointerChecker(CheckerManager &Mgr) { |
| 320 | registerInnerPointerCheckerAux(Mgr); |
| 321 | Mgr.registerChecker<InnerPointerChecker>(); |
| 322 | } |
| 323 | |
| 324 | bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) { |
| 325 | return true; |
| 326 | } |
| 327 | |