1 | //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a check that marks a raw pointer to a C++ container's |
10 | // inner buffer released when the object is destroyed. This information can |
11 | // be used by MallocChecker to detect use-after-free problems. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AllocationState.h" |
16 | #include "InterCheckerAPI.h" |
17 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
18 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
19 | #include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" |
20 | #include "clang/StaticAnalyzer/Core/Checker.h" |
21 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
22 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
23 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
24 | |
25 | using namespace clang; |
26 | using namespace ento; |
27 | |
28 | // Associate container objects with a set of raw pointer symbols. |
29 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef) |
30 | REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet) |
31 | |
32 | |
33 | namespace { |
34 | |
35 | class InnerPointerChecker |
36 | : public Checker<check::DeadSymbols, check::PostCall> { |
37 | |
38 | CallDescriptionSet InvalidatingMemberFunctions{ |
39 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "append" }), |
40 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "assign" }), |
41 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "clear" }), |
42 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "erase" }), |
43 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "insert" }), |
44 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "pop_back" }), |
45 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "push_back" }), |
46 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "replace" }), |
47 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "reserve" }), |
48 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "resize" }), |
49 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "shrink_to_fit" }), |
50 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "swap" })}; |
51 | |
52 | CallDescriptionSet AddressofFunctions{ |
53 | CallDescription(CDM::SimpleFunc, {"std" , "addressof" }), |
54 | CallDescription(CDM::SimpleFunc, {"std" , "__addressof" })}; |
55 | |
56 | CallDescriptionSet InnerPointerAccessFunctions{ |
57 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "c_str" }), |
58 | CallDescription(CDM::SimpleFunc, {"std" , "data" }, 1), |
59 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "data" })}; |
60 | |
61 | public: |
62 | class InnerPointerBRVisitor : public BugReporterVisitor { |
63 | SymbolRef PtrToBuf; |
64 | |
65 | public: |
66 | InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {} |
67 | |
68 | static void *getTag() { |
69 | static int Tag = 0; |
70 | return &Tag; |
71 | } |
72 | |
73 | void Profile(llvm::FoldingSetNodeID &ID) const override { |
74 | ID.AddPointer(Ptr: getTag()); |
75 | } |
76 | |
77 | PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, |
78 | BugReporterContext &BRC, |
79 | PathSensitiveBugReport &BR) override; |
80 | |
81 | // FIXME: Scan the map once in the visitor's constructor and do a direct |
82 | // lookup by region. |
83 | bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) { |
84 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
85 | for (const auto &Entry : Map) { |
86 | if (Entry.second.contains(V: Sym)) |
87 | return true; |
88 | } |
89 | return false; |
90 | } |
91 | }; |
92 | |
93 | /// Check whether the called member function potentially invalidates |
94 | /// pointers referring to the container object's inner buffer. |
95 | bool isInvalidatingMemberFunction(const CallEvent &Call) const; |
96 | |
97 | /// Mark pointer symbols associated with the given memory region released |
98 | /// in the program state. |
99 | void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, |
100 | const MemRegion *ObjRegion, |
101 | CheckerContext &C) const; |
102 | |
103 | /// Standard library functions that take a non-const `basic_string` argument by |
104 | /// reference may invalidate its inner pointers. Check for these cases and |
105 | /// mark the pointers released. |
106 | void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State, |
107 | CheckerContext &C) const; |
108 | |
109 | /// Record the connection between raw pointers referring to a container |
110 | /// object's inner buffer and the object's memory region in the program state. |
111 | /// Mark potentially invalidated pointers released. |
112 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
113 | |
114 | /// Clean up the program state map. |
115 | void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; |
116 | }; |
117 | |
118 | } // end anonymous namespace |
119 | |
120 | bool InnerPointerChecker::isInvalidatingMemberFunction( |
121 | const CallEvent &Call) const { |
122 | if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(Val: &Call)) { |
123 | OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator(); |
124 | if (Opc == OO_Equal || Opc == OO_PlusEqual) |
125 | return true; |
126 | return false; |
127 | } |
128 | return isa<CXXDestructorCall>(Val: Call) || |
129 | InvalidatingMemberFunctions.contains(Call); |
130 | } |
131 | |
132 | void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, |
133 | ProgramStateRef State, |
134 | const MemRegion *MR, |
135 | CheckerContext &C) const { |
136 | if (const PtrSet *PS = State->get<RawPtrMap>(key: MR)) { |
137 | const Expr *Origin = Call.getOriginExpr(); |
138 | for (const auto Symbol : *PS) { |
139 | // NOTE: `Origin` may be null, and will be stored so in the symbol's |
140 | // `RefState` in MallocChecker's `RegionState` program state map. |
141 | State = allocation_state::markReleased(State, Sym: Symbol, Origin); |
142 | } |
143 | State = State->remove<RawPtrMap>(K: MR); |
144 | C.addTransition(State); |
145 | return; |
146 | } |
147 | } |
148 | |
149 | void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, |
150 | ProgramStateRef State, |
151 | CheckerContext &C) const { |
152 | if (const auto *FC = dyn_cast<AnyFunctionCall>(Val: &Call)) { |
153 | const FunctionDecl *FD = FC->getDecl(); |
154 | if (!FD || !FD->isInStdNamespace()) |
155 | return; |
156 | |
157 | for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) { |
158 | QualType ParamTy = FD->getParamDecl(i: I)->getType(); |
159 | if (!ParamTy->isReferenceType() || |
160 | ParamTy->getPointeeType().isConstQualified()) |
161 | continue; |
162 | |
163 | // In case of member operator calls, `this` is counted as an |
164 | // argument but not as a parameter. |
165 | bool isaMemberOpCall = isa<CXXMemberOperatorCall>(Val: FC); |
166 | unsigned ArgI = isaMemberOpCall ? I+1 : I; |
167 | |
168 | SVal Arg = FC->getArgSVal(Index: ArgI); |
169 | const auto *ArgRegion = |
170 | dyn_cast_or_null<TypedValueRegion>(Val: Arg.getAsRegion()); |
171 | if (!ArgRegion) |
172 | continue; |
173 | |
174 | // std::addressof functions accepts a non-const reference as an argument, |
175 | // but doesn't modify it. |
176 | if (AddressofFunctions.contains(Call)) |
177 | continue; |
178 | |
179 | markPtrSymbolsReleased(Call, State, MR: ArgRegion, C); |
180 | } |
181 | } |
182 | } |
183 | |
184 | // [string.require] |
185 | // |
186 | // "References, pointers, and iterators referring to the elements of a |
187 | // basic_string sequence may be invalidated by the following uses of that |
188 | // basic_string object: |
189 | // |
190 | // -- As an argument to any standard library function taking a reference |
191 | // to non-const basic_string as an argument. For example, as an argument to |
192 | // non-member functions swap(), operator>>(), and getline(), or as an argument |
193 | // to basic_string::swap(). |
194 | // |
195 | // -- Calling non-const member functions, except operator[], at, front, back, |
196 | // begin, rbegin, end, and rend." |
197 | |
198 | void InnerPointerChecker::checkPostCall(const CallEvent &Call, |
199 | CheckerContext &C) const { |
200 | ProgramStateRef State = C.getState(); |
201 | |
202 | // TODO: Do we need these to be typed? |
203 | const TypedValueRegion *ObjRegion = nullptr; |
204 | |
205 | if (const auto *ICall = dyn_cast<CXXInstanceCall>(Val: &Call)) { |
206 | ObjRegion = dyn_cast_or_null<TypedValueRegion>( |
207 | Val: ICall->getCXXThisVal().getAsRegion()); |
208 | |
209 | // Check [string.require] / second point. |
210 | if (isInvalidatingMemberFunction(Call)) { |
211 | markPtrSymbolsReleased(Call, State, MR: ObjRegion, C); |
212 | return; |
213 | } |
214 | } |
215 | |
216 | if (InnerPointerAccessFunctions.contains(Call)) { |
217 | |
218 | if (isa<SimpleFunctionCall>(Val: Call)) { |
219 | // NOTE: As of now, we only have one free access function: std::data. |
220 | // If we add more functions like this in the list, hardcoded |
221 | // argument index should be changed. |
222 | ObjRegion = |
223 | dyn_cast_or_null<TypedValueRegion>(Val: Call.getArgSVal(Index: 0).getAsRegion()); |
224 | } |
225 | |
226 | if (!ObjRegion) |
227 | return; |
228 | |
229 | SVal RawPtr = Call.getReturnValue(); |
230 | if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { |
231 | // Start tracking this raw pointer by adding it to the set of symbols |
232 | // associated with this container object in the program state map. |
233 | |
234 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
235 | const PtrSet *SetPtr = State->get<RawPtrMap>(key: ObjRegion); |
236 | PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); |
237 | assert(C.wasInlined || !Set.contains(Sym)); |
238 | Set = F.add(Old: Set, V: Sym); |
239 | |
240 | State = State->set<RawPtrMap>(K: ObjRegion, E: Set); |
241 | C.addTransition(State); |
242 | } |
243 | |
244 | return; |
245 | } |
246 | |
247 | // Check [string.require] / first point. |
248 | checkFunctionArguments(Call, State, C); |
249 | } |
250 | |
251 | void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper, |
252 | CheckerContext &C) const { |
253 | ProgramStateRef State = C.getState(); |
254 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
255 | RawPtrMapTy RPM = State->get<RawPtrMap>(); |
256 | for (const auto &Entry : RPM) { |
257 | if (!SymReaper.isLiveRegion(region: Entry.first)) { |
258 | // Due to incomplete destructor support, some dead regions might |
259 | // remain in the program state map. Clean them up. |
260 | State = State->remove<RawPtrMap>(K: Entry.first); |
261 | } |
262 | if (const PtrSet *OldSet = State->get<RawPtrMap>(key: Entry.first)) { |
263 | PtrSet CleanedUpSet = *OldSet; |
264 | for (const auto Symbol : Entry.second) { |
265 | if (!SymReaper.isLive(sym: Symbol)) |
266 | CleanedUpSet = F.remove(Old: CleanedUpSet, V: Symbol); |
267 | } |
268 | State = CleanedUpSet.isEmpty() |
269 | ? State->remove<RawPtrMap>(K: Entry.first) |
270 | : State->set<RawPtrMap>(K: Entry.first, E: CleanedUpSet); |
271 | } |
272 | } |
273 | C.addTransition(State); |
274 | } |
275 | |
276 | namespace clang { |
277 | namespace ento { |
278 | namespace allocation_state { |
279 | |
280 | std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) { |
281 | return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(args&: Sym); |
282 | } |
283 | |
284 | const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) { |
285 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
286 | for (const auto &Entry : Map) { |
287 | if (Entry.second.contains(V: Sym)) { |
288 | return Entry.first; |
289 | } |
290 | } |
291 | return nullptr; |
292 | } |
293 | |
294 | } // end namespace allocation_state |
295 | } // end namespace ento |
296 | } // end namespace clang |
297 | |
298 | PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode( |
299 | const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { |
300 | if (!isSymbolTracked(State: N->getState(), Sym: PtrToBuf) || |
301 | isSymbolTracked(State: N->getFirstPred()->getState(), Sym: PtrToBuf)) |
302 | return nullptr; |
303 | |
304 | const Stmt *S = N->getStmtForDiagnostics(); |
305 | if (!S) |
306 | return nullptr; |
307 | |
308 | const MemRegion *ObjRegion = |
309 | allocation_state::getContainerObjRegion(State: N->getState(), Sym: PtrToBuf); |
310 | const auto *TypedRegion = cast<TypedValueRegion>(Val: ObjRegion); |
311 | QualType ObjTy = TypedRegion->getValueType(); |
312 | |
313 | SmallString<256> Buf; |
314 | llvm::raw_svector_ostream OS(Buf); |
315 | OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here" ; |
316 | PathDiagnosticLocation Pos(S, BRC.getSourceManager(), |
317 | N->getLocationContext()); |
318 | return std::make_shared<PathDiagnosticEventPiece>(args&: Pos, args: OS.str(), args: true); |
319 | } |
320 | |
321 | void ento::registerInnerPointerChecker(CheckerManager &Mgr) { |
322 | registerInnerPointerCheckerAux(Mgr); |
323 | Mgr.registerChecker<InnerPointerChecker>(); |
324 | } |
325 | |
326 | bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) { |
327 | return true; |
328 | } |
329 | |