1 | //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a check that marks a raw pointer to a C++ container's |
10 | // inner buffer released when the object is destroyed. This information can |
11 | // be used by MallocChecker to detect use-after-free problems. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AllocationState.h" |
16 | #include "InterCheckerAPI.h" |
17 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
18 | #include "clang/StaticAnalyzer/Core/Checker.h" |
19 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
20 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
21 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
22 | |
23 | using namespace clang; |
24 | using namespace ento; |
25 | |
26 | // Associate container objects with a set of raw pointer symbols. |
27 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef) |
28 | REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet) |
29 | |
30 | |
31 | namespace { |
32 | |
33 | class InnerPointerChecker |
34 | : public Checker<check::DeadSymbols, check::PostCall> { |
35 | |
36 | CallDescriptionSet InvalidatingMemberFunctions{ |
37 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "append" }), |
38 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "assign" }), |
39 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "clear" }), |
40 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "erase" }), |
41 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "insert" }), |
42 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "pop_back" }), |
43 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "push_back" }), |
44 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "replace" }), |
45 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "reserve" }), |
46 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "resize" }), |
47 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "shrink_to_fit" }), |
48 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "swap" })}; |
49 | |
50 | CallDescriptionSet AddressofFunctions{ |
51 | CallDescription(CDM::SimpleFunc, {"std" , "addressof" }), |
52 | CallDescription(CDM::SimpleFunc, {"std" , "__addressof" })}; |
53 | |
54 | CallDescriptionSet InnerPointerAccessFunctions{ |
55 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "c_str" }), |
56 | CallDescription(CDM::SimpleFunc, {"std" , "data" }, 1), |
57 | CallDescription(CDM::CXXMethod, {"std" , "basic_string" , "data" })}; |
58 | |
59 | public: |
60 | class InnerPointerBRVisitor : public BugReporterVisitor { |
61 | SymbolRef PtrToBuf; |
62 | |
63 | public: |
64 | InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {} |
65 | |
66 | static void *getTag() { |
67 | static int Tag = 0; |
68 | return &Tag; |
69 | } |
70 | |
71 | void Profile(llvm::FoldingSetNodeID &ID) const override { |
72 | ID.AddPointer(Ptr: getTag()); |
73 | } |
74 | |
75 | PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, |
76 | BugReporterContext &BRC, |
77 | PathSensitiveBugReport &BR) override; |
78 | |
79 | // FIXME: Scan the map once in the visitor's constructor and do a direct |
80 | // lookup by region. |
81 | bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) { |
82 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
83 | for (const auto &Entry : Map) { |
84 | if (Entry.second.contains(V: Sym)) |
85 | return true; |
86 | } |
87 | return false; |
88 | } |
89 | }; |
90 | |
91 | /// Check whether the called member function potentially invalidates |
92 | /// pointers referring to the container object's inner buffer. |
93 | bool isInvalidatingMemberFunction(const CallEvent &Call) const; |
94 | |
95 | /// Mark pointer symbols associated with the given memory region released |
96 | /// in the program state. |
97 | void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, |
98 | const MemRegion *ObjRegion, |
99 | CheckerContext &C) const; |
100 | |
101 | /// Standard library functions that take a non-const `basic_string` argument by |
102 | /// reference may invalidate its inner pointers. Check for these cases and |
103 | /// mark the pointers released. |
104 | void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State, |
105 | CheckerContext &C) const; |
106 | |
107 | /// Record the connection between raw pointers referring to a container |
108 | /// object's inner buffer and the object's memory region in the program state. |
109 | /// Mark potentially invalidated pointers released. |
110 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
111 | |
112 | /// Clean up the program state map. |
113 | void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; |
114 | }; |
115 | |
116 | } // end anonymous namespace |
117 | |
118 | bool InnerPointerChecker::isInvalidatingMemberFunction( |
119 | const CallEvent &Call) const { |
120 | if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(Val: &Call)) { |
121 | OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator(); |
122 | if (Opc == OO_Equal || Opc == OO_PlusEqual) |
123 | return true; |
124 | return false; |
125 | } |
126 | return isa<CXXDestructorCall>(Val: Call) || |
127 | InvalidatingMemberFunctions.contains(Call); |
128 | } |
129 | |
130 | void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, |
131 | ProgramStateRef State, |
132 | const MemRegion *MR, |
133 | CheckerContext &C) const { |
134 | if (const PtrSet *PS = State->get<RawPtrMap>(key: MR)) { |
135 | const Expr *Origin = Call.getOriginExpr(); |
136 | for (const auto Symbol : *PS) { |
137 | // NOTE: `Origin` may be null, and will be stored so in the symbol's |
138 | // `RefState` in MallocChecker's `RegionState` program state map. |
139 | State = allocation_state::markReleased(State, Sym: Symbol, Origin); |
140 | } |
141 | State = State->remove<RawPtrMap>(K: MR); |
142 | C.addTransition(State); |
143 | return; |
144 | } |
145 | } |
146 | |
147 | void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, |
148 | ProgramStateRef State, |
149 | CheckerContext &C) const { |
150 | if (const auto *FC = dyn_cast<AnyFunctionCall>(Val: &Call)) { |
151 | const FunctionDecl *FD = FC->getDecl(); |
152 | if (!FD || !FD->isInStdNamespace()) |
153 | return; |
154 | |
155 | for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) { |
156 | QualType ParamTy = FD->getParamDecl(i: I)->getType(); |
157 | if (!ParamTy->isReferenceType() || |
158 | ParamTy->getPointeeType().isConstQualified()) |
159 | continue; |
160 | |
161 | // In case of member operator calls, `this` is counted as an |
162 | // argument but not as a parameter. |
163 | bool isaMemberOpCall = isa<CXXMemberOperatorCall>(Val: FC); |
164 | unsigned ArgI = isaMemberOpCall ? I+1 : I; |
165 | |
166 | SVal Arg = FC->getArgSVal(Index: ArgI); |
167 | const auto *ArgRegion = |
168 | dyn_cast_or_null<TypedValueRegion>(Val: Arg.getAsRegion()); |
169 | if (!ArgRegion) |
170 | continue; |
171 | |
172 | // std::addressof functions accepts a non-const reference as an argument, |
173 | // but doesn't modify it. |
174 | if (AddressofFunctions.contains(Call)) |
175 | continue; |
176 | |
177 | markPtrSymbolsReleased(Call, State, MR: ArgRegion, C); |
178 | } |
179 | } |
180 | } |
181 | |
182 | // [string.require] |
183 | // |
184 | // "References, pointers, and iterators referring to the elements of a |
185 | // basic_string sequence may be invalidated by the following uses of that |
186 | // basic_string object: |
187 | // |
188 | // -- As an argument to any standard library function taking a reference |
189 | // to non-const basic_string as an argument. For example, as an argument to |
190 | // non-member functions swap(), operator>>(), and getline(), or as an argument |
191 | // to basic_string::swap(). |
192 | // |
193 | // -- Calling non-const member functions, except operator[], at, front, back, |
194 | // begin, rbegin, end, and rend." |
195 | |
196 | void InnerPointerChecker::checkPostCall(const CallEvent &Call, |
197 | CheckerContext &C) const { |
198 | ProgramStateRef State = C.getState(); |
199 | |
200 | // TODO: Do we need these to be typed? |
201 | const TypedValueRegion *ObjRegion = nullptr; |
202 | |
203 | if (const auto *ICall = dyn_cast<CXXInstanceCall>(Val: &Call)) { |
204 | ObjRegion = dyn_cast_or_null<TypedValueRegion>( |
205 | Val: ICall->getCXXThisVal().getAsRegion()); |
206 | |
207 | // Check [string.require] / second point. |
208 | if (isInvalidatingMemberFunction(Call)) { |
209 | markPtrSymbolsReleased(Call, State, MR: ObjRegion, C); |
210 | return; |
211 | } |
212 | } |
213 | |
214 | if (InnerPointerAccessFunctions.contains(Call)) { |
215 | |
216 | if (isa<SimpleFunctionCall>(Val: Call)) { |
217 | // NOTE: As of now, we only have one free access function: std::data. |
218 | // If we add more functions like this in the list, hardcoded |
219 | // argument index should be changed. |
220 | ObjRegion = |
221 | dyn_cast_or_null<TypedValueRegion>(Val: Call.getArgSVal(Index: 0).getAsRegion()); |
222 | } |
223 | |
224 | if (!ObjRegion) |
225 | return; |
226 | |
227 | SVal RawPtr = Call.getReturnValue(); |
228 | if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { |
229 | // Start tracking this raw pointer by adding it to the set of symbols |
230 | // associated with this container object in the program state map. |
231 | |
232 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
233 | const PtrSet *SetPtr = State->get<RawPtrMap>(key: ObjRegion); |
234 | PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); |
235 | assert(C.wasInlined || !Set.contains(Sym)); |
236 | Set = F.add(Old: Set, V: Sym); |
237 | |
238 | State = State->set<RawPtrMap>(K: ObjRegion, E: Set); |
239 | C.addTransition(State); |
240 | } |
241 | |
242 | return; |
243 | } |
244 | |
245 | // Check [string.require] / first point. |
246 | checkFunctionArguments(Call, State, C); |
247 | } |
248 | |
249 | void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper, |
250 | CheckerContext &C) const { |
251 | ProgramStateRef State = C.getState(); |
252 | PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); |
253 | RawPtrMapTy RPM = State->get<RawPtrMap>(); |
254 | for (const auto &Entry : RPM) { |
255 | if (!SymReaper.isLiveRegion(region: Entry.first)) { |
256 | // Due to incomplete destructor support, some dead regions might |
257 | // remain in the program state map. Clean them up. |
258 | State = State->remove<RawPtrMap>(K: Entry.first); |
259 | } |
260 | if (const PtrSet *OldSet = State->get<RawPtrMap>(key: Entry.first)) { |
261 | PtrSet CleanedUpSet = *OldSet; |
262 | for (const auto Symbol : Entry.second) { |
263 | if (!SymReaper.isLive(sym: Symbol)) |
264 | CleanedUpSet = F.remove(Old: CleanedUpSet, V: Symbol); |
265 | } |
266 | State = CleanedUpSet.isEmpty() |
267 | ? State->remove<RawPtrMap>(K: Entry.first) |
268 | : State->set<RawPtrMap>(K: Entry.first, E: CleanedUpSet); |
269 | } |
270 | } |
271 | C.addTransition(State); |
272 | } |
273 | |
274 | namespace clang { |
275 | namespace ento { |
276 | namespace allocation_state { |
277 | |
278 | std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) { |
279 | return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(args&: Sym); |
280 | } |
281 | |
282 | const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) { |
283 | RawPtrMapTy Map = State->get<RawPtrMap>(); |
284 | for (const auto &Entry : Map) { |
285 | if (Entry.second.contains(V: Sym)) { |
286 | return Entry.first; |
287 | } |
288 | } |
289 | return nullptr; |
290 | } |
291 | |
292 | } // end namespace allocation_state |
293 | } // end namespace ento |
294 | } // end namespace clang |
295 | |
296 | PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode( |
297 | const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { |
298 | if (!isSymbolTracked(State: N->getState(), Sym: PtrToBuf) || |
299 | isSymbolTracked(State: N->getFirstPred()->getState(), Sym: PtrToBuf)) |
300 | return nullptr; |
301 | |
302 | const Stmt *S = N->getStmtForDiagnostics(); |
303 | if (!S) |
304 | return nullptr; |
305 | |
306 | const MemRegion *ObjRegion = |
307 | allocation_state::getContainerObjRegion(State: N->getState(), Sym: PtrToBuf); |
308 | const auto *TypedRegion = cast<TypedValueRegion>(Val: ObjRegion); |
309 | QualType ObjTy = TypedRegion->getValueType(); |
310 | |
311 | SmallString<256> Buf; |
312 | llvm::raw_svector_ostream OS(Buf); |
313 | OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here" ; |
314 | PathDiagnosticLocation Pos(S, BRC.getSourceManager(), |
315 | N->getLocationContext()); |
316 | return std::make_shared<PathDiagnosticEventPiece>(args&: Pos, args: OS.str(), args: true); |
317 | } |
318 | |
319 | void ento::registerInnerPointerChecker(CheckerManager &Mgr) { |
320 | registerInnerPointerCheckerAux(Mgr); |
321 | Mgr.registerChecker<InnerPointerChecker>(); |
322 | } |
323 | |
324 | bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) { |
325 | return true; |
326 | } |
327 | |