| 1 | //===- PointerFlowExtractor.cpp -------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "SSAFAnalysesCommon.h" |
| 10 | #include "clang/AST/ASTContext.h" |
| 11 | #include "clang/AST/ASTTypeTraits.h" |
| 12 | #include "clang/AST/Decl.h" |
| 13 | #include "clang/AST/DeclCXX.h" |
| 14 | #include "clang/AST/Expr.h" |
| 15 | #include "clang/AST/ExprCXX.h" |
| 16 | #include "clang/AST/Stmt.h" |
| 17 | #include "clang/AST/TypeBase.h" |
| 18 | #include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" |
| 19 | #include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" |
| 20 | #include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" |
| 21 | #include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" |
| 22 | #include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" |
| 23 | #include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" |
| 24 | #include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" |
| 25 | #include "llvm/ADT/STLExtras.h" |
| 26 | #include "llvm/ADT/STLFunctionalExtras.h" |
| 27 | #include "llvm/ADT/Sequence.h" |
| 28 | #include "llvm/Support/Debug.h" |
| 29 | #include "llvm/Support/Error.h" |
| 30 | #include <memory> |
| 31 | #include <optional> |
| 32 | |
| 33 | namespace clang::ssaf { |
| 34 | extern PointerFlowEntitySummary buildPointerFlowEntitySummary(EdgeSet Edges); |
| 35 | } // namespace clang::ssaf |
| 36 | |
| 37 | namespace { |
| 38 | using namespace clang; |
| 39 | using namespace ssaf; |
| 40 | |
| 41 | class PointerFlowMatcher { |
| 42 | public: |
| 43 | EdgeSet Results; |
| 44 | ASTContext &Ctx; |
| 45 | TUSummaryExtractor &; |
| 46 | |
| 47 | (ASTContext &Ctx, TUSummaryExtractor &) |
| 48 | : Ctx(Ctx), Extractor(Extractor) {} |
| 49 | |
| 50 | llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl); |
| 51 | |
| 52 | llvm::Error matchesInitializerList(const ValueDecl *Base, |
| 53 | const Expr *InitExpr, |
| 54 | unsigned ArrayElementIndirectLevel = 0); |
| 55 | |
| 56 | llvm::Error matchesStmt(const Stmt *S, const NamedDecl *RootDecl); |
| 57 | |
| 58 | llvm::Error matchesDecl(const Decl *D, const NamedDecl *RootDecl); |
| 59 | |
| 60 | private: |
| 61 | std::function<EntityId(const EntityName &)> AddEntity; |
| 62 | |
| 63 | Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N, |
| 64 | bool IsRet = false) const; |
| 65 | |
| 66 | Expected<EntityPointerLevelSet> toEPL(const Expr *N) const; |
| 67 | |
| 68 | llvm::Error addEdges(Expected<EntityPointerLevelSet> &&LHS, |
| 69 | Expected<EntityPointerLevelSet> &&RHS); |
| 70 | |
| 71 | template <typename ParmsProvider, typename ArgsProvider> |
| 72 | llvm::Error matchesArgsWithParams(unsigned ArgIdxStart, ParmsProvider *PP, |
| 73 | ArgsProvider *AP) { |
| 74 | unsigned ArgIdx = ArgIdxStart; |
| 75 | |
| 76 | for (unsigned ParmIdx = 0; |
| 77 | ParmIdx < PP->getNumParams() && ArgIdx < AP->getNumArgs(); |
| 78 | ++ArgIdx, ++ParmIdx) { |
| 79 | if (const ParmVarDecl *PD = PP->getParamDecl(ParmIdx); |
| 80 | PD && hasPtrOrArrType(D: PD)) { |
| 81 | if (auto Err = addEdges(LHS: toEPL(N: PD), RHS: toEPL(AP->getArg(ArgIdx)))) |
| 82 | return Err; |
| 83 | } |
| 84 | } |
| 85 | return llvm::Error::success(); |
| 86 | } |
| 87 | }; |
| 88 | |
| 89 | Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const NamedDecl *N, |
| 90 | bool IsRet) const { |
| 91 | auto Ret = createEntityPointerLevel(ND: N, Extractor, IsFunRet: IsRet); |
| 92 | |
| 93 | if (Ret) |
| 94 | return EntityPointerLevelSet{*Ret}; |
| 95 | return Ret.takeError(); |
| 96 | } |
| 97 | |
| 98 | Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const Expr *N) const { |
| 99 | return translateEntityPointerLevel(E: N, Ctx, Extractor); |
| 100 | } |
| 101 | |
| 102 | llvm::Error |
| 103 | PointerFlowMatcher::addEdges(Expected<EntityPointerLevelSet> &&LHS, |
| 104 | Expected<EntityPointerLevelSet> &&RHS) { |
| 105 | if (!LHS && !RHS) |
| 106 | return llvm::joinErrors(E1: LHS.takeError(), E2: RHS.takeError()); |
| 107 | if (!LHS) |
| 108 | return LHS.takeError(); |
| 109 | if (!RHS) |
| 110 | return RHS.takeError(); |
| 111 | if (RHS->empty()) |
| 112 | return llvm::Error::success(); |
| 113 | for (auto L : *LHS) |
| 114 | Results[L].insert(first: RHS->begin(), last: RHS->end()); |
| 115 | return llvm::Error::success(); |
| 116 | } |
| 117 | |
| 118 | /// Match and extract pointer flow. |
| 119 | /// The extraction function 'XF' can be described by the following rules: |
| 120 | /// |
| 121 | /// XF(l = r) := add edge "toEPL(l) -> toEPL(r))" |
| 122 | /// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ... |
| 123 | /// XF(return e;) := XF(FunRet = e), where 'FunRet' is the return |
| 124 | /// entity of the enclosing |
| 125 | /// function |
| 126 | /// XF(ctor(a, ...) : x1(y1), ... {...}) |
| 127 | /// := XF(Param_1 = a), ..., |
| 128 | /// XF(x1 = y1), ..., |
| 129 | /// ctor's body will be visited separately. |
| 130 | /// XF(T var = e) := XF(var = e) |
| 131 | /// XF(T var = init-list) := see \ref |
| 132 | /// PointerFlowMatcher::matchInitializerList |
| 133 | llvm::Error PointerFlowMatcher::matches(const DynTypedNode &DynNode, |
| 134 | const NamedDecl *RootDecl) { |
| 135 | if (const Stmt *S = DynNode.get<Stmt>()) |
| 136 | return matchesStmt(S, RootDecl); |
| 137 | if (const Decl *D = DynNode.get<Decl>()) |
| 138 | return matchesDecl(D, RootDecl); |
| 139 | return llvm::Error::success(); |
| 140 | } |
| 141 | |
| 142 | llvm::Error PointerFlowMatcher::matchesStmt(const Stmt *S, |
| 143 | const NamedDecl *RootDecl) { |
| 144 | // Match 'p = q' whenever it has pointer or array type: |
| 145 | if (const auto *BO = dyn_cast<BinaryOperator>(Val: S); |
| 146 | BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(E: BO)) { |
| 147 | return addEdges(LHS: toEPL(N: BO->getLHS()), RHS: toEPL(N: BO->getRHS())); |
| 148 | } |
| 149 | |
| 150 | // Match arg-to-param passing (in CallExpr) for any pointer type argument: |
| 151 | if (const auto *CE = dyn_cast<CallExpr>(Val: S)) { |
| 152 | const FunctionDecl *FD = CE->getDirectCallee(); |
| 153 | |
| 154 | if (!FD) |
| 155 | return llvm::Error::success(); |
| 156 | |
| 157 | unsigned ArgIdx = 0; |
| 158 | |
| 159 | if (isa<CXXOperatorCallExpr>(Val: CE)) |
| 160 | if (auto *MD = dyn_cast<CXXMethodDecl>(Val: FD); |
| 161 | MD && !MD->isExplicitObjectMemberFunction()) |
| 162 | ArgIdx = 1; |
| 163 | return matchesArgsWithParams(ArgIdxStart: ArgIdx, PP: FD, AP: CE); |
| 164 | } |
| 165 | // Match arg-to-param passing (in CXXConstructExpr) for any pointer type |
| 166 | // argument: |
| 167 | if (const auto *CCE = dyn_cast<CXXConstructExpr>(Val: S)) { |
| 168 | return matchesArgsWithParams(/*ArgIdxStart=*/0, PP: CCE->getConstructor(), AP: CCE); |
| 169 | } |
| 170 | if (const auto *RS = dyn_cast<ReturnStmt>(Val: S)) { |
| 171 | const Expr *RetExpr = RS->getRetValue(); |
| 172 | if (!RetExpr || !hasPtrOrArrType(E: RetExpr)) |
| 173 | return llvm::Error::success(); |
| 174 | return addEdges(LHS: toEPL(N: RootDecl, IsRet: true), RHS: toEPL(N: RetExpr)); |
| 175 | } |
| 176 | return llvm::Error::success(); |
| 177 | } |
| 178 | |
| 179 | llvm::Error PointerFlowMatcher::matchesDecl(const Decl *D, |
| 180 | const NamedDecl *RootDecl) { |
| 181 | const Expr *InitExpr = nullptr; |
| 182 | |
| 183 | if (const auto *VD = dyn_cast<ValueDecl>(Val: D)) { |
| 184 | if (const auto *Var = dyn_cast<VarDecl>(Val: VD)) |
| 185 | InitExpr = Var->getInit(); |
| 186 | if (const auto *Fd = dyn_cast<FieldDecl>(Val: VD)) |
| 187 | InitExpr = Fd->getInClassInitializer(); |
| 188 | |
| 189 | // Match initializer-list: |
| 190 | if (auto *InitLst = dyn_cast_or_null<InitListExpr>(Val: InitExpr)) |
| 191 | return matchesInitializerList(Base: VD, InitExpr: InitLst); |
| 192 | |
| 193 | // Match initializers to variables/fields of a pointer type: |
| 194 | if (InitExpr && hasPtrOrArrType(D: VD)) |
| 195 | return addEdges(LHS: toEPL(N: VD), RHS: toEPL(N: InitExpr)); |
| 196 | } |
| 197 | |
| 198 | // Match C++ constructor member-initializers: |
| 199 | if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(Val: D)) { |
| 200 | for (auto *E : CtorD->inits()) { |
| 201 | if (E->isDelegatingInitializer()) |
| 202 | return matches(DynNode: DynTypedNode::create(Node: *E->getInit()), RootDecl); |
| 203 | if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(D: FD)) { |
| 204 | if (auto Err = addEdges(LHS: toEPL(N: E->getMember()), RHS: toEPL(N: E->getInit()))) |
| 205 | return Err; |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | return llvm::Error::success(); |
| 210 | } |
| 211 | |
| 212 | // Helper function for matchInitializerList that handles record: |
| 213 | llvm::Error matchInitializerListForRecordDecl(PointerFlowMatcher &Matcher, |
| 214 | const RecordDecl *RecordTy, |
| 215 | const InitListExpr *ILE) { |
| 216 | if (auto *CXXRD = dyn_cast<CXXRecordDecl>(Val: RecordTy)) |
| 217 | if (CXXRD->getNumBases() != 0) { |
| 218 | // FIXME: support this: |
| 219 | return makeErrAtNode( |
| 220 | Ctx&: Matcher.Ctx, N: ILE, |
| 221 | Fmt: "attempt to create pointer assignment edges between " |
| 222 | "CXXRecordDecls with base classes and initializer-lists" ); |
| 223 | } |
| 224 | // Handle union: |
| 225 | if (RecordTy->isUnion()) { |
| 226 | auto *InitField = ILE->getInitializedFieldInUnion(); |
| 227 | |
| 228 | if (!InitField || ILE->inits().empty()) |
| 229 | return llvm::Error::success(); |
| 230 | return Matcher.matchesInitializerList(Base: InitField, InitExpr: ILE->getInit(Init: 0)); |
| 231 | } |
| 232 | // Handle struct/class: |
| 233 | ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm(); |
| 234 | |
| 235 | auto FieldIter = RecordTy->field_begin(); |
| 236 | |
| 237 | assert(RecordTy->getNumFields() >= ILE->getNumInits()); |
| 238 | for (auto *Init : ILE->inits()) |
| 239 | if (auto Err = Matcher.matchesInitializerList(Base: *(FieldIter++), InitExpr: Init)) |
| 240 | return Err; |
| 241 | return llvm::Error::success(); |
| 242 | } |
| 243 | |
| 244 | // Helper function for matchInitializerList that handles array: |
| 245 | llvm::Error matchInitializerListForArray(PointerFlowMatcher &Matcher, |
| 246 | const ValueDecl *Array, |
| 247 | const InitListExpr *ILE, |
| 248 | unsigned ArrayIndirectLevel = 0) { |
| 249 | for (auto *E : ILE->inits()) |
| 250 | if (auto Err = |
| 251 | Matcher.matchesInitializerList(Base: Array, InitExpr: E, ArrayElementIndirectLevel: ArrayIndirectLevel + 1)) |
| 252 | return Err; |
| 253 | return llvm::Error::success(); |
| 254 | } |
| 255 | |
| 256 | /// Match initializer lists of the form 'Var = {a, b, c, ...}': |
| 257 | /// |
| 258 | /// If 'Var' is a struct/union: |
| 259 | /// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a) |
| 260 | /// XF(Var.field_2 = b) |
| 261 | /// ... |
| 262 | /// If 'Var' is an array: |
| 263 | /// XF(Var = {a, b, c, ...}) := XF(*Var = a) |
| 264 | /// XF(*Var = b) |
| 265 | /// ... |
| 266 | /// |
| 267 | /// The process is recursive: 'a', 'b', 'c', ... may themselves be |
| 268 | /// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep |
| 269 | /// track of the pointer level the left-hand side. |
| 270 | llvm::Error |
| 271 | PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base, |
| 272 | const Expr *InitExpr, |
| 273 | unsigned ArrayElementIndirectLevel) { |
| 274 | const InitListExpr *ILE = dyn_cast<InitListExpr>(Val: InitExpr); |
| 275 | |
| 276 | if (!ILE) { |
| 277 | if (!hasPtrOrArrType(E: InitExpr)) |
| 278 | return llvm::Error::success(); |
| 279 | |
| 280 | auto BaseEPL = toEPL(N: Base); |
| 281 | |
| 282 | if (!BaseEPL) |
| 283 | return BaseEPL.takeError(); |
| 284 | |
| 285 | // Apply ArrayElementIndirectLevel to BaseEPL |
| 286 | auto R = llvm::map_range(C&: *BaseEPL, F: [&ArrayElementIndirectLevel]( |
| 287 | const EntityPointerLevel &EPL) { |
| 288 | EntityPointerLevel Result = EPL; |
| 289 | for ([[maybe_unused]] auto Ignored : llvm::seq(Size: ArrayElementIndirectLevel)) |
| 290 | Result = incrementPointerLevel(E: Result); |
| 291 | return Result; |
| 292 | }); |
| 293 | return addEdges(LHS: EntityPointerLevelSet{R.begin(), R.end()}, RHS: toEPL(N: InitExpr)); |
| 294 | } |
| 295 | // Note that `Base`'s type is NOT the real LHS type when |
| 296 | // ArrayElementIndirectLevel > 0: |
| 297 | QualType Type = InitExpr->getType(); |
| 298 | |
| 299 | if (auto *RD = Type->getAsRecordDecl()) |
| 300 | return matchInitializerListForRecordDecl(Matcher&: *this, RecordTy: RD, ILE); |
| 301 | if (Type->isArrayType()) |
| 302 | return matchInitializerListForArray(Matcher&: *this, Array: Base, ILE, |
| 303 | ArrayIndirectLevel: ArrayElementIndirectLevel); |
| 304 | |
| 305 | // Must be the case of using a initializer-list for a scalar. |
| 306 | // The initializer-list can be either singleton or empty: |
| 307 | if (ILE->getNumInits() == 0) |
| 308 | return llvm::Error::success(); |
| 309 | return matchesInitializerList(Base, InitExpr: ILE->getInit(Init: 0)); |
| 310 | } |
| 311 | |
| 312 | class : public TUSummaryExtractor { |
| 313 | public: |
| 314 | using TUSummaryExtractor::TUSummaryExtractor; |
| 315 | |
| 316 | /// \return a non-null unique pointer to a PointerFlowEntitySummary |
| 317 | std::unique_ptr<PointerFlowEntitySummary> |
| 318 | (const std::vector<const NamedDecl *> &ContributorDecls, |
| 319 | ASTContext &Ctx, TUSummaryExtractor &) { |
| 320 | PointerFlowMatcher Matcher(Ctx, Extractor); |
| 321 | |
| 322 | for (const auto *Contrib : ContributorDecls) { |
| 323 | auto MatchAction = [&Matcher, Contrib](const DynTypedNode &Node) { |
| 324 | if (auto Err = Matcher.matches(DynNode: Node, RootDecl: Contrib)) |
| 325 | logWarningFromError(Err: std::move(Err)); |
| 326 | }; |
| 327 | |
| 328 | findMatchesIn(Contributor: Contrib, MatchActionRef: MatchAction); |
| 329 | } |
| 330 | return std::make_unique<PointerFlowEntitySummary>( |
| 331 | args: buildPointerFlowEntitySummary(Edges: std::move(Matcher.Results))); |
| 332 | } |
| 333 | |
| 334 | void HandleTranslationUnit(ASTContext &Ctx) override { |
| 335 | extractAndAddSummaries( |
| 336 | Extractor&: *this, Builder&: SummaryBuilder, Ctx, |
| 337 | ExtractFn: [&](const std::vector<const NamedDecl *> &Decls) { |
| 338 | return extractEntitySummary(ContributorDecls: Decls, Ctx, Extractor&: *this); |
| 339 | }, |
| 340 | ExtractorName: "PointerFlow" ); |
| 341 | } |
| 342 | }; |
| 343 | } // namespace |
| 344 | |
| 345 | namespace clang::ssaf { |
| 346 | // NOLINTNEXTLINE(misc-use-internal-linkage) |
| 347 | volatile int = 0; |
| 348 | } // namespace clang::ssaf |
| 349 | |
| 350 | static TUSummaryExtractorRegistry::Add<PointerFlowTUSummaryExtractor> |
| 351 | (PointerFlowEntitySummary::Name, |
| 352 | "Extract pointer flow information" ); |
| 353 | |