1//===- PointerFlowExtractor.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SSAFAnalysesCommon.h"
10#include "clang/AST/ASTContext.h"
11#include "clang/AST/ASTTypeTraits.h"
12#include "clang/AST/Decl.h"
13#include "clang/AST/DeclCXX.h"
14#include "clang/AST/Expr.h"
15#include "clang/AST/ExprCXX.h"
16#include "clang/AST/Stmt.h"
17#include "clang/AST/TypeBase.h"
18#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h"
19#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h"
20#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h"
21#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h"
22#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h"
23#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h"
24#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/STLFunctionalExtras.h"
27#include "llvm/ADT/Sequence.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/Error.h"
30#include <memory>
31#include <optional>
32
33namespace clang::ssaf {
34extern PointerFlowEntitySummary buildPointerFlowEntitySummary(EdgeSet Edges);
35} // namespace clang::ssaf
36
37namespace {
38using namespace clang;
39using namespace ssaf;
40
41class PointerFlowMatcher {
42public:
43 EdgeSet Results;
44 ASTContext &Ctx;
45 TUSummaryExtractor &Extractor;
46
47 PointerFlowMatcher(ASTContext &Ctx, TUSummaryExtractor &Extractor)
48 : Ctx(Ctx), Extractor(Extractor) {}
49
50 llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl);
51
52 llvm::Error matchesInitializerList(const ValueDecl *Base,
53 const Expr *InitExpr,
54 unsigned ArrayElementIndirectLevel = 0);
55
56 llvm::Error matchesStmt(const Stmt *S, const NamedDecl *RootDecl);
57
58 llvm::Error matchesDecl(const Decl *D, const NamedDecl *RootDecl);
59
60private:
61 std::function<EntityId(const EntityName &)> AddEntity;
62
63 Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N,
64 bool IsRet = false) const;
65
66 Expected<EntityPointerLevelSet> toEPL(const Expr *N) const;
67
68 llvm::Error addEdges(Expected<EntityPointerLevelSet> &&LHS,
69 Expected<EntityPointerLevelSet> &&RHS);
70
71 template <typename ParmsProvider, typename ArgsProvider>
72 llvm::Error matchesArgsWithParams(unsigned ArgIdxStart, ParmsProvider *PP,
73 ArgsProvider *AP) {
74 unsigned ArgIdx = ArgIdxStart;
75
76 for (unsigned ParmIdx = 0;
77 ParmIdx < PP->getNumParams() && ArgIdx < AP->getNumArgs();
78 ++ArgIdx, ++ParmIdx) {
79 if (const ParmVarDecl *PD = PP->getParamDecl(ParmIdx);
80 PD && hasPtrOrArrType(D: PD)) {
81 if (auto Err = addEdges(LHS: toEPL(N: PD), RHS: toEPL(AP->getArg(ArgIdx))))
82 return Err;
83 }
84 }
85 return llvm::Error::success();
86 }
87};
88
89Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const NamedDecl *N,
90 bool IsRet) const {
91 auto Ret = createEntityPointerLevel(ND: N, Extractor, IsFunRet: IsRet);
92
93 if (Ret)
94 return EntityPointerLevelSet{*Ret};
95 return Ret.takeError();
96}
97
98Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const Expr *N) const {
99 return translateEntityPointerLevel(E: N, Ctx, Extractor);
100}
101
102llvm::Error
103PointerFlowMatcher::addEdges(Expected<EntityPointerLevelSet> &&LHS,
104 Expected<EntityPointerLevelSet> &&RHS) {
105 if (!LHS && !RHS)
106 return llvm::joinErrors(E1: LHS.takeError(), E2: RHS.takeError());
107 if (!LHS)
108 return LHS.takeError();
109 if (!RHS)
110 return RHS.takeError();
111 if (RHS->empty())
112 return llvm::Error::success();
113 for (auto L : *LHS)
114 Results[L].insert(first: RHS->begin(), last: RHS->end());
115 return llvm::Error::success();
116}
117
118/// Match and extract pointer flow.
119/// The extraction function 'XF' can be described by the following rules:
120///
121/// XF(l = r) := add edge "toEPL(l) -> toEPL(r))"
122/// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ...
123/// XF(return e;) := XF(FunRet = e), where 'FunRet' is the return
124/// entity of the enclosing
125/// function
126/// XF(ctor(a, ...) : x1(y1), ... {...})
127/// := XF(Param_1 = a), ...,
128/// XF(x1 = y1), ...,
129/// ctor's body will be visited separately.
130/// XF(T var = e) := XF(var = e)
131/// XF(T var = init-list) := see \ref
132/// PointerFlowMatcher::matchInitializerList
133llvm::Error PointerFlowMatcher::matches(const DynTypedNode &DynNode,
134 const NamedDecl *RootDecl) {
135 if (const Stmt *S = DynNode.get<Stmt>())
136 return matchesStmt(S, RootDecl);
137 if (const Decl *D = DynNode.get<Decl>())
138 return matchesDecl(D, RootDecl);
139 return llvm::Error::success();
140}
141
142llvm::Error PointerFlowMatcher::matchesStmt(const Stmt *S,
143 const NamedDecl *RootDecl) {
144 // Match 'p = q' whenever it has pointer or array type:
145 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
146 BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(E: BO)) {
147 return addEdges(LHS: toEPL(N: BO->getLHS()), RHS: toEPL(N: BO->getRHS()));
148 }
149
150 // Match arg-to-param passing (in CallExpr) for any pointer type argument:
151 if (const auto *CE = dyn_cast<CallExpr>(Val: S)) {
152 const FunctionDecl *FD = CE->getDirectCallee();
153
154 if (!FD)
155 return llvm::Error::success();
156
157 unsigned ArgIdx = 0;
158
159 if (isa<CXXOperatorCallExpr>(Val: CE))
160 if (auto *MD = dyn_cast<CXXMethodDecl>(Val: FD);
161 MD && !MD->isExplicitObjectMemberFunction())
162 ArgIdx = 1;
163 return matchesArgsWithParams(ArgIdxStart: ArgIdx, PP: FD, AP: CE);
164 }
165 // Match arg-to-param passing (in CXXConstructExpr) for any pointer type
166 // argument:
167 if (const auto *CCE = dyn_cast<CXXConstructExpr>(Val: S)) {
168 return matchesArgsWithParams(/*ArgIdxStart=*/0, PP: CCE->getConstructor(), AP: CCE);
169 }
170 if (const auto *RS = dyn_cast<ReturnStmt>(Val: S)) {
171 const Expr *RetExpr = RS->getRetValue();
172 if (!RetExpr || !hasPtrOrArrType(E: RetExpr))
173 return llvm::Error::success();
174 return addEdges(LHS: toEPL(N: RootDecl, IsRet: true), RHS: toEPL(N: RetExpr));
175 }
176 return llvm::Error::success();
177}
178
179llvm::Error PointerFlowMatcher::matchesDecl(const Decl *D,
180 const NamedDecl *RootDecl) {
181 const Expr *InitExpr = nullptr;
182
183 if (const auto *VD = dyn_cast<ValueDecl>(Val: D)) {
184 if (const auto *Var = dyn_cast<VarDecl>(Val: VD))
185 InitExpr = Var->getInit();
186 if (const auto *Fd = dyn_cast<FieldDecl>(Val: VD))
187 InitExpr = Fd->getInClassInitializer();
188
189 // Match initializer-list:
190 if (auto *InitLst = dyn_cast_or_null<InitListExpr>(Val: InitExpr))
191 return matchesInitializerList(Base: VD, InitExpr: InitLst);
192
193 // Match initializers to variables/fields of a pointer type:
194 if (InitExpr && hasPtrOrArrType(D: VD))
195 return addEdges(LHS: toEPL(N: VD), RHS: toEPL(N: InitExpr));
196 }
197
198 // Match C++ constructor member-initializers:
199 if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(Val: D)) {
200 for (auto *E : CtorD->inits()) {
201 if (E->isDelegatingInitializer())
202 return matches(DynNode: DynTypedNode::create(Node: *E->getInit()), RootDecl);
203 if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(D: FD)) {
204 if (auto Err = addEdges(LHS: toEPL(N: E->getMember()), RHS: toEPL(N: E->getInit())))
205 return Err;
206 }
207 }
208 }
209 return llvm::Error::success();
210}
211
212// Helper function for matchInitializerList that handles record:
213llvm::Error matchInitializerListForRecordDecl(PointerFlowMatcher &Matcher,
214 const RecordDecl *RecordTy,
215 const InitListExpr *ILE) {
216 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(Val: RecordTy))
217 if (CXXRD->getNumBases() != 0) {
218 // FIXME: support this:
219 return makeErrAtNode(
220 Ctx&: Matcher.Ctx, N: ILE,
221 Fmt: "attempt to create pointer assignment edges between "
222 "CXXRecordDecls with base classes and initializer-lists");
223 }
224 // Handle union:
225 if (RecordTy->isUnion()) {
226 auto *InitField = ILE->getInitializedFieldInUnion();
227
228 if (!InitField || ILE->inits().empty())
229 return llvm::Error::success();
230 return Matcher.matchesInitializerList(Base: InitField, InitExpr: ILE->getInit(Init: 0));
231 }
232 // Handle struct/class:
233 ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm();
234
235 auto FieldIter = RecordTy->field_begin();
236
237 assert(RecordTy->getNumFields() >= ILE->getNumInits());
238 for (auto *Init : ILE->inits())
239 if (auto Err = Matcher.matchesInitializerList(Base: *(FieldIter++), InitExpr: Init))
240 return Err;
241 return llvm::Error::success();
242}
243
244// Helper function for matchInitializerList that handles array:
245llvm::Error matchInitializerListForArray(PointerFlowMatcher &Matcher,
246 const ValueDecl *Array,
247 const InitListExpr *ILE,
248 unsigned ArrayIndirectLevel = 0) {
249 for (auto *E : ILE->inits())
250 if (auto Err =
251 Matcher.matchesInitializerList(Base: Array, InitExpr: E, ArrayElementIndirectLevel: ArrayIndirectLevel + 1))
252 return Err;
253 return llvm::Error::success();
254}
255
256/// Match initializer lists of the form 'Var = {a, b, c, ...}':
257///
258/// If 'Var' is a struct/union:
259/// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a)
260/// XF(Var.field_2 = b)
261/// ...
262/// If 'Var' is an array:
263/// XF(Var = {a, b, c, ...}) := XF(*Var = a)
264/// XF(*Var = b)
265/// ...
266///
267/// The process is recursive: 'a', 'b', 'c', ... may themselves be
268/// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep
269/// track of the pointer level the left-hand side.
270llvm::Error
271PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base,
272 const Expr *InitExpr,
273 unsigned ArrayElementIndirectLevel) {
274 const InitListExpr *ILE = dyn_cast<InitListExpr>(Val: InitExpr);
275
276 if (!ILE) {
277 if (!hasPtrOrArrType(E: InitExpr))
278 return llvm::Error::success();
279
280 auto BaseEPL = toEPL(N: Base);
281
282 if (!BaseEPL)
283 return BaseEPL.takeError();
284
285 // Apply ArrayElementIndirectLevel to BaseEPL
286 auto R = llvm::map_range(C&: *BaseEPL, F: [&ArrayElementIndirectLevel](
287 const EntityPointerLevel &EPL) {
288 EntityPointerLevel Result = EPL;
289 for ([[maybe_unused]] auto Ignored : llvm::seq(Size: ArrayElementIndirectLevel))
290 Result = incrementPointerLevel(E: Result);
291 return Result;
292 });
293 return addEdges(LHS: EntityPointerLevelSet{R.begin(), R.end()}, RHS: toEPL(N: InitExpr));
294 }
295 // Note that `Base`'s type is NOT the real LHS type when
296 // ArrayElementIndirectLevel > 0:
297 QualType Type = InitExpr->getType();
298
299 if (auto *RD = Type->getAsRecordDecl())
300 return matchInitializerListForRecordDecl(Matcher&: *this, RecordTy: RD, ILE);
301 if (Type->isArrayType())
302 return matchInitializerListForArray(Matcher&: *this, Array: Base, ILE,
303 ArrayIndirectLevel: ArrayElementIndirectLevel);
304
305 // Must be the case of using a initializer-list for a scalar.
306 // The initializer-list can be either singleton or empty:
307 if (ILE->getNumInits() == 0)
308 return llvm::Error::success();
309 return matchesInitializerList(Base, InitExpr: ILE->getInit(Init: 0));
310}
311
312class PointerFlowTUSummaryExtractor : public TUSummaryExtractor {
313public:
314 using TUSummaryExtractor::TUSummaryExtractor;
315
316 /// \return a non-null unique pointer to a PointerFlowEntitySummary
317 std::unique_ptr<PointerFlowEntitySummary>
318 extractEntitySummary(const std::vector<const NamedDecl *> &ContributorDecls,
319 ASTContext &Ctx, TUSummaryExtractor &Extractor) {
320 PointerFlowMatcher Matcher(Ctx, Extractor);
321
322 for (const auto *Contrib : ContributorDecls) {
323 auto MatchAction = [&Matcher, Contrib](const DynTypedNode &Node) {
324 if (auto Err = Matcher.matches(DynNode: Node, RootDecl: Contrib))
325 logWarningFromError(Err: std::move(Err));
326 };
327
328 findMatchesIn(Contributor: Contrib, MatchActionRef: MatchAction);
329 }
330 return std::make_unique<PointerFlowEntitySummary>(
331 args: buildPointerFlowEntitySummary(Edges: std::move(Matcher.Results)));
332 }
333
334 void HandleTranslationUnit(ASTContext &Ctx) override {
335 extractAndAddSummaries(
336 Extractor&: *this, Builder&: SummaryBuilder, Ctx,
337 ExtractFn: [&](const std::vector<const NamedDecl *> &Decls) {
338 return extractEntitySummary(ContributorDecls: Decls, Ctx, Extractor&: *this);
339 },
340 ExtractorName: "PointerFlow");
341 }
342};
343} // namespace
344
345namespace clang::ssaf {
346// NOLINTNEXTLINE(misc-use-internal-linkage)
347volatile int PointerFlowExtractorAnchorSource = 0;
348} // namespace clang::ssaf
349
350static TUSummaryExtractorRegistry::Add<PointerFlowTUSummaryExtractor>
351 RegisterExtractor(PointerFlowEntitySummary::Name,
352 "Extract pointer flow information");
353