1//=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a check that marks a raw pointer to a C++ container's
10// inner buffer released when the object is destroyed. This information can
11// be used by MallocChecker to detect use-after-free problems.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AllocationState.h"
16#include "InterCheckerAPI.h"
17#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22
23using namespace clang;
24using namespace ento;
25
26// Associate container objects with a set of raw pointer symbols.
27REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef)
28REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet)
29
30
31namespace {
32
33class InnerPointerChecker
34 : public Checker<check::DeadSymbols, check::PostCall> {
35
36 CallDescriptionSet InvalidatingMemberFunctions{
37 CallDescription(CDM::CXXMethod, {"std", "basic_string", "append"}),
38 CallDescription(CDM::CXXMethod, {"std", "basic_string", "assign"}),
39 CallDescription(CDM::CXXMethod, {"std", "basic_string", "clear"}),
40 CallDescription(CDM::CXXMethod, {"std", "basic_string", "erase"}),
41 CallDescription(CDM::CXXMethod, {"std", "basic_string", "insert"}),
42 CallDescription(CDM::CXXMethod, {"std", "basic_string", "pop_back"}),
43 CallDescription(CDM::CXXMethod, {"std", "basic_string", "push_back"}),
44 CallDescription(CDM::CXXMethod, {"std", "basic_string", "replace"}),
45 CallDescription(CDM::CXXMethod, {"std", "basic_string", "reserve"}),
46 CallDescription(CDM::CXXMethod, {"std", "basic_string", "resize"}),
47 CallDescription(CDM::CXXMethod, {"std", "basic_string", "shrink_to_fit"}),
48 CallDescription(CDM::CXXMethod, {"std", "basic_string", "swap"})};
49
50 CallDescriptionSet AddressofFunctions{
51 CallDescription(CDM::SimpleFunc, {"std", "addressof"}),
52 CallDescription(CDM::SimpleFunc, {"std", "__addressof"})};
53
54 CallDescriptionSet InnerPointerAccessFunctions{
55 CallDescription(CDM::CXXMethod, {"std", "basic_string", "c_str"}),
56 CallDescription(CDM::SimpleFunc, {"std", "data"}, 1),
57 CallDescription(CDM::CXXMethod, {"std", "basic_string", "data"})};
58
59public:
60 class InnerPointerBRVisitor : public BugReporterVisitor {
61 SymbolRef PtrToBuf;
62
63 public:
64 InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {}
65
66 static void *getTag() {
67 static int Tag = 0;
68 return &Tag;
69 }
70
71 void Profile(llvm::FoldingSetNodeID &ID) const override {
72 ID.AddPointer(Ptr: getTag());
73 }
74
75 PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
76 BugReporterContext &BRC,
77 PathSensitiveBugReport &BR) override;
78
79 bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) {
80 RawPtrMapTy Map = State->get<RawPtrMap>();
81 for (const auto &Entry : Map) {
82 if (Entry.second.contains(V: Sym))
83 return true;
84 }
85 return false;
86 }
87 };
88
89 /// Check whether the called member function potentially invalidates
90 /// pointers referring to the container object's inner buffer.
91 bool isInvalidatingMemberFunction(const CallEvent &Call) const;
92
93 /// Mark pointer symbols associated with the given memory region released
94 /// in the program state.
95 void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State,
96 const MemRegion *ObjRegion,
97 CheckerContext &C) const;
98
99 /// Standard library functions that take a non-const `basic_string` argument by
100 /// reference may invalidate its inner pointers. Check for these cases and
101 /// mark the pointers released.
102 void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State,
103 CheckerContext &C) const;
104
105 /// Record the connection between raw pointers referring to a container
106 /// object's inner buffer and the object's memory region in the program state.
107 /// Mark potentially invalidated pointers released.
108 void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
109
110 /// Clean up the program state map.
111 void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
112};
113
114} // end anonymous namespace
115
116bool InnerPointerChecker::isInvalidatingMemberFunction(
117 const CallEvent &Call) const {
118 if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(Val: &Call)) {
119 OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator();
120 if (Opc == OO_Equal || Opc == OO_PlusEqual)
121 return true;
122 return false;
123 }
124 return isa<CXXDestructorCall>(Val: Call) ||
125 InvalidatingMemberFunctions.contains(Call);
126}
127
128void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call,
129 ProgramStateRef State,
130 const MemRegion *MR,
131 CheckerContext &C) const {
132 if (const PtrSet *PS = State->get<RawPtrMap>(key: MR)) {
133 const Expr *Origin = Call.getOriginExpr();
134 for (const auto Symbol : *PS) {
135 // NOTE: `Origin` may be null, and will be stored so in the symbol's
136 // `RefState` in MallocChecker's `RegionState` program state map.
137 State = allocation_state::markReleased(State, Sym: Symbol, Origin);
138 }
139 State = State->remove<RawPtrMap>(K: MR);
140 C.addTransition(State);
141 return;
142 }
143}
144
145void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
146 ProgramStateRef State,
147 CheckerContext &C) const {
148 if (const auto *FC = dyn_cast<AnyFunctionCall>(Val: &Call)) {
149 const FunctionDecl *FD = FC->getDecl();
150 if (!FD || !FD->isInStdNamespace())
151 return;
152
153 for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) {
154 QualType ParamTy = FD->getParamDecl(i: I)->getType();
155 if (!ParamTy->isReferenceType() ||
156 ParamTy->getPointeeType().isConstQualified())
157 continue;
158
159 // In case of member operator calls, `this` is counted as an
160 // argument but not as a parameter.
161 bool isaMemberOpCall = isa<CXXMemberOperatorCall>(Val: FC);
162 unsigned ArgI = isaMemberOpCall ? I+1 : I;
163
164 SVal Arg = FC->getArgSVal(Index: ArgI);
165 const auto *ArgRegion =
166 dyn_cast_or_null<TypedValueRegion>(Val: Arg.getAsRegion());
167 if (!ArgRegion)
168 continue;
169
170 // std::addressof functions accepts a non-const reference as an argument,
171 // but doesn't modify it.
172 if (AddressofFunctions.contains(Call))
173 continue;
174
175 markPtrSymbolsReleased(Call, State, MR: ArgRegion, C);
176 }
177 }
178}
179
180// [string.require]
181//
182// "References, pointers, and iterators referring to the elements of a
183// basic_string sequence may be invalidated by the following uses of that
184// basic_string object:
185//
186// -- As an argument to any standard library function taking a reference
187// to non-const basic_string as an argument. For example, as an argument to
188// non-member functions swap(), operator>>(), and getline(), or as an argument
189// to basic_string::swap().
190//
191// -- Calling non-const member functions, except operator[], at, front, back,
192// begin, rbegin, end, and rend."
193
194void InnerPointerChecker::checkPostCall(const CallEvent &Call,
195 CheckerContext &C) const {
196 ProgramStateRef State = C.getState();
197
198 // TODO: Do we need these to be typed?
199 const TypedValueRegion *ObjRegion = nullptr;
200
201 if (const auto *ICall = dyn_cast<CXXInstanceCall>(Val: &Call)) {
202 ObjRegion = dyn_cast_or_null<TypedValueRegion>(
203 Val: ICall->getCXXThisVal().getAsRegion());
204
205 // Check [string.require] / second point.
206 if (isInvalidatingMemberFunction(Call)) {
207 markPtrSymbolsReleased(Call, State, MR: ObjRegion, C);
208 return;
209 }
210 }
211
212 if (InnerPointerAccessFunctions.contains(Call)) {
213
214 if (isa<SimpleFunctionCall>(Val: Call)) {
215 // NOTE: As of now, we only have one free access function: std::data.
216 // If we add more functions like this in the list, hardcoded
217 // argument index should be changed.
218 ObjRegion =
219 dyn_cast_or_null<TypedValueRegion>(Val: Call.getArgSVal(Index: 0).getAsRegion());
220 }
221
222 if (!ObjRegion)
223 return;
224
225 SVal RawPtr = Call.getReturnValue();
226 if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) {
227 // Start tracking this raw pointer by adding it to the set of symbols
228 // associated with this container object in the program state map.
229
230 PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
231 const PtrSet *SetPtr = State->get<RawPtrMap>(key: ObjRegion);
232 PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet();
233 assert(C.wasInlined || !Set.contains(Sym));
234 Set = F.add(Old: Set, V: Sym);
235
236 State = State->set<RawPtrMap>(K: ObjRegion, E: Set);
237 C.addTransition(State);
238 }
239
240 return;
241 }
242
243 // Check [string.require] / first point.
244 checkFunctionArguments(Call, State, C);
245}
246
247void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper,
248 CheckerContext &C) const {
249 ProgramStateRef State = C.getState();
250 PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
251 RawPtrMapTy RPM = State->get<RawPtrMap>();
252 for (const auto &Entry : RPM) {
253 if (!SymReaper.isLiveRegion(region: Entry.first)) {
254 // Due to incomplete destructor support, some dead regions might
255 // remain in the program state map. Clean them up.
256 State = State->remove<RawPtrMap>(K: Entry.first);
257 }
258 if (const PtrSet *OldSet = State->get<RawPtrMap>(key: Entry.first)) {
259 PtrSet CleanedUpSet = *OldSet;
260 for (const auto Symbol : Entry.second) {
261 if (!SymReaper.isLive(sym: Symbol))
262 CleanedUpSet = F.remove(Old: CleanedUpSet, V: Symbol);
263 }
264 State = CleanedUpSet.isEmpty()
265 ? State->remove<RawPtrMap>(K: Entry.first)
266 : State->set<RawPtrMap>(K: Entry.first, E: CleanedUpSet);
267 }
268 }
269 C.addTransition(State);
270}
271
272namespace clang {
273namespace ento {
274namespace allocation_state {
275
276std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) {
277 return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(args&: Sym);
278}
279
280const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) {
281 RawPtrMapTy Map = State->get<RawPtrMap>();
282 for (const auto &Entry : Map) {
283 if (Entry.second.contains(V: Sym)) {
284 return Entry.first;
285 }
286 }
287 return nullptr;
288}
289
290} // end namespace allocation_state
291} // end namespace ento
292} // end namespace clang
293
294PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode(
295 const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) {
296 if (!isSymbolTracked(State: N->getState(), Sym: PtrToBuf) ||
297 isSymbolTracked(State: N->getFirstPred()->getState(), Sym: PtrToBuf))
298 return nullptr;
299
300 const Stmt *S = N->getStmtForDiagnostics();
301 if (!S)
302 return nullptr;
303
304 const MemRegion *ObjRegion =
305 allocation_state::getContainerObjRegion(State: N->getState(), Sym: PtrToBuf);
306 const auto *TypedRegion = cast<TypedValueRegion>(Val: ObjRegion);
307 QualType ObjTy = TypedRegion->getValueType();
308
309 SmallString<256> Buf;
310 llvm::raw_svector_ostream OS(Buf);
311 OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here";
312 PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
313 N->getLocationContext());
314 return std::make_shared<PathDiagnosticEventPiece>(args&: Pos, args: OS.str(), args: true);
315}
316
317void ento::registerInnerPointerChecker(CheckerManager &Mgr) {
318 registerInnerPointerCheckerAux(Mgr);
319 Mgr.registerChecker<InnerPointerChecker>();
320}
321
322bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) {
323 return true;
324}
325