1//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10#include "clang/AST/APValue.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/AST/ASTTypeTraits.h"
13#include "clang/AST/Attr.h"
14#include "clang/AST/Decl.h"
15#include "clang/AST/DeclCXX.h"
16#include "clang/AST/DynamicRecursiveASTVisitor.h"
17#include "clang/AST/Expr.h"
18#include "clang/AST/FormatString.h"
19#include "clang/AST/ParentMapContext.h"
20#include "clang/AST/Stmt.h"
21#include "clang/AST/StmtVisitor.h"
22#include "clang/AST/Type.h"
23#include "clang/ASTMatchers/LowLevelHelpers.h"
24#include "clang/Analysis/Support/FixitUtil.h"
25#include "clang/Basic/SourceLocation.h"
26#include "clang/Lex/Lexer.h"
27#include "clang/Lex/Preprocessor.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/STLFunctionalExtras.h"
30#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringRef.h"
33#include <cstddef>
34#include <optional>
35#include <queue>
36#include <set>
37#include <sstream>
38
39using namespace clang;
40
41#ifndef NDEBUG
42namespace {
43class StmtDebugPrinter
44 : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
45public:
46 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }
47
48 std::string VisitBinaryOperator(const BinaryOperator *BO) {
49 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
50 }
51
52 std::string VisitUnaryOperator(const UnaryOperator *UO) {
53 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")";
54 }
55
56 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
57 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
58 }
59};
60
61// Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
62// "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
63static std::string getDREAncestorString(const DeclRefExpr *DRE,
64 ASTContext &Ctx) {
65 std::stringstream SS;
66 const Stmt *St = DRE;
67 StmtDebugPrinter StmtPriner;
68
69 do {
70 SS << StmtPriner.Visit(St);
71
72 DynTypedNodeList StParents = Ctx.getParents(*St);
73
74 if (StParents.size() > 1)
75 return "unavailable due to multiple parents";
76 if (StParents.empty())
77 break;
78 St = StParents.begin()->get<Stmt>();
79 if (St)
80 SS << " ==> ";
81 } while (St);
82 return SS.str();
83}
84
85} // namespace
86#endif /* NDEBUG */
87
88namespace {
89// Using a custom `FastMatcher` instead of ASTMatchers to achieve better
90// performance. FastMatcher uses simple function `matches` to find if a node
91// is a match, avoiding the dependency on the ASTMatchers framework which
92// provide a nice abstraction, but incur big performance costs.
93class FastMatcher {
94public:
95 virtual bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
96 const UnsafeBufferUsageHandler &Handler) = 0;
97 virtual ~FastMatcher() = default;
98};
99
100class MatchResult {
101
102public:
103 template <typename T> const T *getNodeAs(StringRef ID) const {
104 auto It = Nodes.find(Key: ID);
105 if (It == Nodes.end()) {
106 return nullptr;
107 }
108 return It->second.get<T>();
109 }
110
111 void addNode(StringRef ID, const DynTypedNode &Node) { Nodes[ID] = Node; }
112
113private:
114 llvm::StringMap<DynTypedNode> Nodes;
115};
116} // namespace
117
118#define SIZED_CONTAINER_OR_VIEW_LIST \
119 "span", "array", "vector", "basic_string_view", "basic_string", \
120 "initializer_list",
121
122// A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
123// except for those belonging to a different callable of "n".
124class MatchDescendantVisitor : public DynamicRecursiveASTVisitor {
125public:
126 // Creates an AST visitor that matches `Matcher` on all
127 // descendants of a given node "n" except for the ones
128 // belonging to a different callable of "n".
129 MatchDescendantVisitor(ASTContext &Context, FastMatcher &Matcher,
130 bool FindAll, bool ignoreUnevaluatedContext,
131 const UnsafeBufferUsageHandler &NewHandler)
132 : Matcher(&Matcher), FindAll(FindAll), Matches(false),
133 ignoreUnevaluatedContext(ignoreUnevaluatedContext),
134 ActiveASTContext(&Context), Handler(&NewHandler) {
135 ShouldVisitTemplateInstantiations = true;
136 ShouldVisitImplicitCode = false; // TODO: let's ignore implicit code for now
137 }
138
139 // Returns true if a match is found in a subtree of `DynNode`, which belongs
140 // to the same callable of `DynNode`.
141 bool findMatch(const DynTypedNode &DynNode) {
142 Matches = false;
143 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
144 TraverseStmt(Node: const_cast<Stmt *>(StmtNode));
145 return Matches;
146 }
147 return false;
148 }
149
150 // The following are overriding methods from the base visitor class.
151 // They are public only to allow CRTP to work. They are *not *part
152 // of the public API of this class.
153
154 // For the matchers so far used in safe buffers, we only need to match
155 // `Stmt`s. To override more as needed.
156
157 bool TraverseDecl(Decl *Node) override {
158 if (!Node)
159 return true;
160 if (!match(Node: *Node))
161 return false;
162 // To skip callables:
163 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Val: Node))
164 return true;
165 // Traverse descendants
166 return DynamicRecursiveASTVisitor::TraverseDecl(D: Node);
167 }
168
169 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) override {
170 // These are unevaluated, except the result expression.
171 if (ignoreUnevaluatedContext)
172 return TraverseStmt(Node: Node->getResultExpr());
173 return DynamicRecursiveASTVisitor::TraverseGenericSelectionExpr(S: Node);
174 }
175
176 bool
177 TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) override {
178 // Unevaluated context.
179 if (ignoreUnevaluatedContext)
180 return true;
181 return DynamicRecursiveASTVisitor::TraverseUnaryExprOrTypeTraitExpr(S: Node);
182 }
183
184 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) override {
185 // Unevaluated context.
186 if (ignoreUnevaluatedContext)
187 return true;
188 return DynamicRecursiveASTVisitor::TraverseTypeOfExprTypeLoc(TL: Node);
189 }
190
191 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) override {
192 // Unevaluated context.
193 if (ignoreUnevaluatedContext)
194 return true;
195 return DynamicRecursiveASTVisitor::TraverseDecltypeTypeLoc(TL: Node);
196 }
197
198 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) override {
199 // Unevaluated context.
200 if (ignoreUnevaluatedContext)
201 return true;
202 return DynamicRecursiveASTVisitor::TraverseCXXNoexceptExpr(S: Node);
203 }
204
205 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) override {
206 // Unevaluated context.
207 if (ignoreUnevaluatedContext)
208 return true;
209 return DynamicRecursiveASTVisitor::TraverseCXXTypeidExpr(S: Node);
210 }
211
212 bool TraverseCXXDefaultInitExpr(CXXDefaultInitExpr *Node) override {
213 if (!TraverseStmt(Node: Node->getExpr()))
214 return false;
215 return DynamicRecursiveASTVisitor::TraverseCXXDefaultInitExpr(S: Node);
216 }
217
218 bool TraverseStmt(Stmt *Node) override {
219 if (!Node)
220 return true;
221 if (!match(Node: *Node))
222 return false;
223 return DynamicRecursiveASTVisitor::TraverseStmt(S: Node);
224 }
225
226private:
227 // Sets 'Matched' to true if 'Matcher' matches 'Node'
228 //
229 // Returns 'true' if traversal should continue after this function
230 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
231 template <typename T> bool match(const T &Node) {
232 if (Matcher->matches(DynNode: DynTypedNode::create(Node), Ctx&: *ActiveASTContext,
233 Handler: *Handler)) {
234 Matches = true;
235 if (!FindAll)
236 return false; // Abort as soon as a match is found.
237 }
238 return true;
239 }
240
241 FastMatcher *const Matcher;
242 // When true, finds all matches. When false, finds the first match and stops.
243 const bool FindAll;
244 bool Matches;
245 bool ignoreUnevaluatedContext;
246 ASTContext *ActiveASTContext;
247 const UnsafeBufferUsageHandler *Handler;
248};
249
250// Because we're dealing with raw pointers, let's define what we mean by that.
251static bool hasPointerType(const Expr &E) {
252 return isa<PointerType>(Val: E.getType().getCanonicalType());
253}
254
255static bool hasArrayType(const Expr &E) {
256 return isa<ArrayType>(Val: E.getType().getCanonicalType());
257}
258
259static void
260forEachDescendantEvaluatedStmt(const Stmt *S, ASTContext &Ctx,
261 const UnsafeBufferUsageHandler &Handler,
262 FastMatcher &Matcher) {
263 MatchDescendantVisitor Visitor(Ctx, Matcher, /*FindAll=*/true,
264 /*ignoreUnevaluatedContext=*/true, Handler);
265 Visitor.findMatch(DynNode: DynTypedNode::create(Node: *S));
266}
267
268static void forEachDescendantStmt(const Stmt *S, ASTContext &Ctx,
269 const UnsafeBufferUsageHandler &Handler,
270 FastMatcher &Matcher) {
271 MatchDescendantVisitor Visitor(Ctx, Matcher, /*FindAll=*/true,
272 /*ignoreUnevaluatedContext=*/false, Handler);
273 Visitor.findMatch(DynNode: DynTypedNode::create(Node: *S));
274}
275
276// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
277static bool notInSafeBufferOptOut(const Stmt &Node,
278 const UnsafeBufferUsageHandler *Handler) {
279 return !Handler->isSafeBufferOptOut(Loc: Node.getBeginLoc());
280}
281
282static bool
283ignoreUnsafeBufferInContainer(const Stmt &Node,
284 const UnsafeBufferUsageHandler *Handler) {
285 return Handler->ignoreUnsafeBufferInContainer(Loc: Node.getBeginLoc());
286}
287
288static bool ignoreUnsafeLibcCall(const ASTContext &Ctx, const Stmt &Node,
289 const UnsafeBufferUsageHandler *Handler) {
290 if (Ctx.getLangOpts().CPlusPlus)
291 return Handler->ignoreUnsafeBufferInLibcCall(Loc: Node.getBeginLoc());
292 return true; /* Only warn about libc calls for C++ */
293}
294
295// Finds any expression 'e' such that `OnResult`
296// matches 'e' and 'e' is in an Unspecified Lvalue Context.
297static void findStmtsInUnspecifiedLvalueContext(
298 const Stmt *S, const llvm::function_ref<void(const Expr *)> OnResult) {
299 if (const auto *CE = dyn_cast<ImplicitCastExpr>(Val: S);
300 CE && CE->getCastKind() == CastKind::CK_LValueToRValue)
301 OnResult(CE->getSubExpr());
302 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
303 BO && BO->getOpcode() == BO_Assign)
304 OnResult(BO->getLHS());
305}
306
307// Finds any expression `e` such that `InnerMatcher` matches `e` and
308// `e` is in an Unspecified Pointer Context (UPC).
309static void findStmtsInUnspecifiedPointerContext(
310 const Stmt *S, llvm::function_ref<void(const Stmt *)> InnerMatcher) {
311 // A UPC can be
312 // 1. an argument of a function call (except the callee has [[unsafe_...]]
313 // attribute), or
314 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
315 // 3. the operand of a comparator operation; or
316 // 4. the operand of a pointer subtraction operation
317 // (i.e., computing the distance between two pointers); or ...
318
319 if (auto *CE = dyn_cast<CallExpr>(Val: S)) {
320 if (const auto *FnDecl = CE->getDirectCallee();
321 FnDecl && FnDecl->hasAttr<UnsafeBufferUsageAttr>())
322 return;
323 ast_matchers::matchEachArgumentWithParamType(
324 Node: *CE, OnParamAndArg: [&InnerMatcher](QualType Type, const Expr *Arg) {
325 if (Type->isAnyPointerType())
326 InnerMatcher(Arg);
327 });
328 }
329
330 if (auto *CE = dyn_cast<CastExpr>(Val: S)) {
331 if (CE->getCastKind() != CastKind::CK_PointerToIntegral &&
332 CE->getCastKind() != CastKind::CK_PointerToBoolean)
333 return;
334 if (!hasPointerType(E: *CE->getSubExpr()))
335 return;
336 InnerMatcher(CE->getSubExpr());
337 }
338
339 // Pointer comparison operator.
340 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
341 BO && (BO->getOpcode() == BO_EQ || BO->getOpcode() == BO_NE ||
342 BO->getOpcode() == BO_LT || BO->getOpcode() == BO_LE ||
343 BO->getOpcode() == BO_GT || BO->getOpcode() == BO_GE)) {
344 auto *LHS = BO->getLHS();
345 if (hasPointerType(E: *LHS))
346 InnerMatcher(LHS);
347
348 auto *RHS = BO->getRHS();
349 if (hasPointerType(E: *RHS))
350 InnerMatcher(RHS);
351 }
352
353 // Pointer subtractions.
354 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
355 BO && BO->getOpcode() == BO_Sub && hasPointerType(E: *BO->getLHS()) &&
356 hasPointerType(E: *BO->getRHS())) {
357 // Note that here we need both LHS and RHS to be
358 // pointer. Then the inner matcher can match any of
359 // them:
360 InnerMatcher(BO->getLHS());
361 InnerMatcher(BO->getRHS());
362 }
363 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now
364 // we don't have to check that.)
365}
366
367// Finds statements in unspecified untyped context i.e. any expression 'e' such
368// that `InnerMatcher` matches 'e' and 'e' is in an unspecified untyped context
369// (i.e the expression 'e' isn't evaluated to an RValue). For example, consider
370// the following code:
371// int *p = new int[4];
372// int *q = new int[4];
373// if ((p = q)) {}
374// p = q;
375// The expression `p = q` in the conditional of the `if` statement
376// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
377// in the assignment statement is in an untyped context.
378static void findStmtsInUnspecifiedUntypedContext(
379 const Stmt *S, llvm::function_ref<void(const Stmt *)> InnerMatcher) {
380 // An unspecified context can be
381 // 1. A compound statement,
382 // 2. The body of an if statement
383 // 3. Body of a loop
384 if (auto *CS = dyn_cast<CompoundStmt>(Val: S)) {
385 for (auto *Child : CS->body())
386 InnerMatcher(Child);
387 }
388 if (auto *IfS = dyn_cast<IfStmt>(Val: S)) {
389 if (IfS->getThen())
390 InnerMatcher(IfS->getThen());
391 if (IfS->getElse())
392 InnerMatcher(IfS->getElse());
393 }
394 // FIXME: Handle loop bodies.
395}
396
397// Returns true iff integer E1 is equivalent to integer E2.
398//
399// For now we only support such expressions:
400// expr := DRE | const-value | expr BO expr
401// BO := '*' | '+'
402//
403// FIXME: We can reuse the expression comparator of the interop analysis after
404// it has been upstreamed.
405static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx);
406static bool areEqualIntegralBinaryOperators(const BinaryOperator *E1,
407 const Expr *E2_LHS,
408 BinaryOperatorKind BOP,
409 const Expr *E2_RHS,
410 ASTContext &Ctx) {
411 if (E1->getOpcode() == BOP) {
412 switch (BOP) {
413 // Commutative operators:
414 case BO_Mul:
415 case BO_Add:
416 return (areEqualIntegers(E1: E1->getLHS(), E2: E2_LHS, Ctx) &&
417 areEqualIntegers(E1: E1->getRHS(), E2: E2_RHS, Ctx)) ||
418 (areEqualIntegers(E1: E1->getLHS(), E2: E2_RHS, Ctx) &&
419 areEqualIntegers(E1: E1->getRHS(), E2: E2_LHS, Ctx));
420 default:
421 return false;
422 }
423 }
424 return false;
425}
426
427static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx) {
428 E1 = E1->IgnoreParenImpCasts();
429 E2 = E2->IgnoreParenImpCasts();
430 if (!E1->getType()->isIntegerType() || E1->getType() != E2->getType())
431 return false;
432
433 Expr::EvalResult ER1, ER2;
434
435 // If both are constants:
436 if (E1->EvaluateAsInt(Result&: ER1, Ctx) && E2->EvaluateAsInt(Result&: ER2, Ctx))
437 return ER1.Val.getInt() == ER2.Val.getInt();
438
439 // Otherwise, they should have identical stmt kind:
440 if (E1->getStmtClass() != E2->getStmtClass())
441 return false;
442 switch (E1->getStmtClass()) {
443 case Stmt::DeclRefExprClass:
444 return cast<DeclRefExpr>(Val: E1)->getDecl() == cast<DeclRefExpr>(Val: E2)->getDecl();
445 case Stmt::BinaryOperatorClass: {
446 auto BO2 = cast<BinaryOperator>(Val: E2);
447 return areEqualIntegralBinaryOperators(E1: cast<BinaryOperator>(Val: E1),
448 E2_LHS: BO2->getLHS(), BOP: BO2->getOpcode(),
449 E2_RHS: BO2->getRHS(), Ctx);
450 }
451 default:
452 return false;
453 }
454}
455
456// Given a two-param std::span construct call, matches iff the call has the
457// following forms:
458// 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
459// 2. `std::span<T>{new T, 1}`
460// 3. `std::span<T>{&var, 1}` or `std::span<T>{std::addressof(...), 1}`
461// 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size
462// `n`
463// 5. `std::span<T>{any, 0}`
464// 6. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where
465// `f` is a function with attribute `alloc_size(N, M)`;
466// `args` represents the list of arguments;
467// `N, M` are parameter indexes to the allocating element number and size.
468// Sometimes, there is only one parameter index representing the total
469// size.
470// 7. `std::span<T>{x.begin(), x.end()}` where `x` is an object in the
471// SIZED_CONTAINER_OR_VIEW_LIST.
472static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
473 ASTContext &Ctx) {
474 assert(Node.getNumArgs() == 2 &&
475 "expecting a two-parameter std::span constructor");
476 const Expr *Arg0 = Node.getArg(Arg: 0)->IgnoreParenImpCasts();
477 const Expr *Arg1 = Node.getArg(Arg: 1)->IgnoreParenImpCasts();
478 auto HaveEqualConstantValues = [&Ctx](const Expr *E0, const Expr *E1) {
479 if (auto E0CV = E0->getIntegerConstantExpr(Ctx))
480 if (auto E1CV = E1->getIntegerConstantExpr(Ctx)) {
481 return llvm::APSInt::compareValues(I1: *E0CV, I2: *E1CV) == 0;
482 }
483 return false;
484 };
485 auto AreSameDRE = [](const Expr *E0, const Expr *E1) {
486 if (auto *DRE0 = dyn_cast<DeclRefExpr>(Val: E0))
487 if (auto *DRE1 = dyn_cast<DeclRefExpr>(Val: E1)) {
488 return DRE0->getDecl() == DRE1->getDecl();
489 }
490 return false;
491 };
492 std::optional<llvm::APSInt> Arg1CV = Arg1->getIntegerConstantExpr(Ctx);
493
494 if (Arg1CV && Arg1CV->isZero())
495 // Check form 5:
496 return true;
497
498 // Check forms 1-3:
499 switch (Arg0->getStmtClass()) {
500 case Stmt::CXXNewExprClass:
501 if (auto Size = cast<CXXNewExpr>(Val: Arg0)->getArraySize()) {
502 // Check form 1:
503 return AreSameDRE((*Size)->IgnoreImplicit(), Arg1) ||
504 HaveEqualConstantValues(*Size, Arg1);
505 }
506 // TODO: what's placeholder type? avoid it for now.
507 if (!cast<CXXNewExpr>(Val: Arg0)->hasPlaceholderType()) {
508 // Check form 2:
509 return Arg1CV && Arg1CV->isOne();
510 }
511 break;
512 case Stmt::UnaryOperatorClass:
513 if (cast<UnaryOperator>(Val: Arg0)->getOpcode() ==
514 UnaryOperator::Opcode::UO_AddrOf)
515 // Check form 3:
516 return Arg1CV && Arg1CV->isOne();
517 break;
518 case Stmt::CallExprClass:
519 // Check form 3:
520 if (const auto *CE = dyn_cast<CallExpr>(Val: Arg0)) {
521 const auto FnDecl = CE->getDirectCallee();
522 if (FnDecl && FnDecl->getNameAsString() == "addressof" &&
523 FnDecl->isInStdNamespace()) {
524 return Arg1CV && Arg1CV->isOne();
525 }
526 }
527 break;
528 default:
529 break;
530 }
531
532 QualType Arg0Ty = Arg0->IgnoreImplicit()->getType();
533
534 if (auto *ConstArrTy = Ctx.getAsConstantArrayType(T: Arg0Ty)) {
535 const llvm::APSInt ConstArrSize = llvm::APSInt(ConstArrTy->getSize());
536
537 // Check form 4:
538 return Arg1CV && llvm::APSInt::compareValues(I1: ConstArrSize, I2: *Arg1CV) == 0;
539 }
540 // Check form 6:
541 if (auto CCast = dyn_cast<CStyleCastExpr>(Val: Arg0)) {
542 if (!CCast->getType()->isPointerType())
543 return false;
544
545 QualType PteTy = CCast->getType()->getPointeeType();
546
547 if (!(PteTy->isConstantSizeType() && Ctx.getTypeSizeInChars(T: PteTy).isOne()))
548 return false;
549
550 if (const auto *Call = dyn_cast<CallExpr>(Val: CCast->getSubExpr())) {
551 if (const FunctionDecl *FD = Call->getDirectCallee())
552 if (auto *AllocAttr = FD->getAttr<AllocSizeAttr>()) {
553 const Expr *EleSizeExpr =
554 Call->getArg(Arg: AllocAttr->getElemSizeParam().getASTIndex());
555 // NumElemIdx is invalid if AllocSizeAttr has 1 argument:
556 ParamIdx NumElemIdx = AllocAttr->getNumElemsParam();
557
558 if (!NumElemIdx.isValid())
559 return areEqualIntegers(E1: Arg1, E2: EleSizeExpr, Ctx);
560
561 const Expr *NumElesExpr = Call->getArg(Arg: NumElemIdx.getASTIndex());
562
563 if (auto BO = dyn_cast<BinaryOperator>(Val: Arg1))
564 return areEqualIntegralBinaryOperators(E1: BO, E2_LHS: NumElesExpr, BOP: BO_Mul,
565 E2_RHS: EleSizeExpr, Ctx);
566 }
567 }
568 }
569 // Check form 7:
570 auto IsMethodCallToSizedObject = [](const Stmt *Node, StringRef MethodName) {
571 if (const auto *MC = dyn_cast<CXXMemberCallExpr>(Val: Node)) {
572 const auto *MD = MC->getMethodDecl();
573 const auto *RD = MC->getRecordDecl();
574
575 if (RD && MD)
576 if (auto *II = RD->getDeclName().getAsIdentifierInfo();
577 II && RD->isInStdNamespace())
578 return llvm::is_contained(Set: {SIZED_CONTAINER_OR_VIEW_LIST},
579 Element: II->getName()) &&
580 MD->getName() == MethodName;
581 }
582 return false;
583 };
584
585 if (IsMethodCallToSizedObject(Arg0, "begin") &&
586 IsMethodCallToSizedObject(Arg1, "end"))
587 return AreSameDRE(
588 // We know Arg0 and Arg1 are `CXXMemberCallExpr`s:
589 cast<CXXMemberCallExpr>(Val: Arg0)
590 ->getImplicitObjectArgument()
591 ->IgnoreParenImpCasts(),
592 cast<CXXMemberCallExpr>(Val: Arg1)
593 ->getImplicitObjectArgument()
594 ->IgnoreParenImpCasts());
595 return false;
596}
597
598static bool isSafeArraySubscript(const ArraySubscriptExpr &Node,
599 const ASTContext &Ctx) {
600 // FIXME: Proper solution:
601 // - refactor Sema::CheckArrayAccess
602 // - split safe/OOB/unknown decision logic from diagnostics emitting code
603 // - e. g. "Try harder to find a NamedDecl to point at in the note."
604 // already duplicated
605 // - call both from Sema and from here
606
607 uint64_t limit;
608 if (const auto *CATy =
609 dyn_cast<ConstantArrayType>(Val: Node.getBase()
610 ->IgnoreParenImpCasts()
611 ->getType()
612 ->getUnqualifiedDesugaredType())) {
613 limit = CATy->getLimitedSize();
614 } else if (const auto *SLiteral = dyn_cast<clang::StringLiteral>(
615 Val: Node.getBase()->IgnoreParenImpCasts())) {
616 limit = SLiteral->getLength() + 1;
617 } else {
618 return false;
619 }
620
621 Expr::EvalResult EVResult;
622 const Expr *IndexExpr = Node.getIdx();
623 if (!IndexExpr->isValueDependent() &&
624 IndexExpr->EvaluateAsInt(Result&: EVResult, Ctx)) {
625 llvm::APSInt ArrIdx = EVResult.Val.getInt();
626 // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a
627 // bug
628 if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < limit)
629 return true;
630 } else if (const auto *BE = dyn_cast<BinaryOperator>(Val: IndexExpr)) {
631 // For an integer expression `e` and an integer constant `n`, `e & n` and
632 // `n & e` are bounded by `n`:
633 if (BE->getOpcode() != BO_And && BE->getOpcode() != BO_Rem)
634 return false;
635
636 const Expr *LHS = BE->getLHS();
637 const Expr *RHS = BE->getRHS();
638
639 if (BE->getOpcode() == BO_Rem) {
640 // If n is a negative number, then n % const can be greater than const
641 if (!LHS->getType()->isUnsignedIntegerType()) {
642 return false;
643 }
644
645 if (!RHS->isValueDependent() && RHS->EvaluateAsInt(Result&: EVResult, Ctx)) {
646 llvm::APSInt result = EVResult.Val.getInt();
647 if (result.isNonNegative() && result.getLimitedValue() <= limit)
648 return true;
649 }
650
651 return false;
652 }
653
654 if ((!LHS->isValueDependent() &&
655 LHS->EvaluateAsInt(Result&: EVResult, Ctx)) || // case: `n & e`
656 (!RHS->isValueDependent() &&
657 RHS->EvaluateAsInt(Result&: EVResult, Ctx))) { // `e & n`
658 llvm::APSInt result = EVResult.Val.getInt();
659 if (result.isNonNegative() && result.getLimitedValue() < limit)
660 return true;
661 }
662 return false;
663 }
664 return false;
665}
666
667namespace libc_func_matchers {
668// Under `libc_func_matchers`, define a set of matchers that match unsafe
669// functions in libc and unsafe calls to them.
670
671// A tiny parser to strip off common prefix and suffix of libc function names
672// in real code.
673//
674// Given a function name, `matchName` returns `CoreName` according to the
675// following grammar:
676//
677// LibcName := CoreName | CoreName + "_s"
678// MatchingName := "__builtin_" + LibcName |
679// "__builtin___" + LibcName + "_chk" |
680// "__asan_" + LibcName
681//
682struct LibcFunNamePrefixSuffixParser {
683 StringRef matchName(StringRef FunName, bool isBuiltin) {
684 // Try to match __builtin_:
685 if (isBuiltin && FunName.starts_with(Prefix: "__builtin_"))
686 // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
687 // no match:
688 return matchLibcNameOrBuiltinChk(
689 Name: FunName.drop_front(N: 10 /* truncate "__builtin_" */));
690 // Try to match __asan_:
691 if (FunName.starts_with(Prefix: "__asan_"))
692 return matchLibcName(Name: FunName.drop_front(N: 7 /* truncate of "__asan_" */));
693 return matchLibcName(Name: FunName);
694 }
695
696 // Parameter `Name` is the substring after stripping off the prefix
697 // "__builtin_".
698 StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
699 if (Name.starts_with(Prefix: "__") && Name.ends_with(Suffix: "_chk"))
700 return matchLibcName(
701 Name: Name.drop_front(N: 2).drop_back(N: 4) /* truncate "__" and "_chk" */);
702 return matchLibcName(Name);
703 }
704
705 StringRef matchLibcName(StringRef Name) {
706 if (Name.ends_with(Suffix: "_s"))
707 return Name.drop_back(N: 2 /* truncate "_s" */);
708 return Name;
709 }
710};
711
712// A pointer type expression is known to be null-terminated, if it has the
713// form: E.c_str(), for any expression E of `std::string` type.
714static bool isNullTermPointer(const Expr *Ptr) {
715 if (isa<clang::StringLiteral>(Val: Ptr->IgnoreParenImpCasts()))
716 return true;
717 if (isa<PredefinedExpr>(Val: Ptr->IgnoreParenImpCasts()))
718 return true;
719 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Val: Ptr->IgnoreParenImpCasts())) {
720 const CXXMethodDecl *MD = MCE->getMethodDecl();
721 const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
722
723 if (MD && RD && RD->isInStdNamespace() && MD->getIdentifier())
724 if (MD->getName() == "c_str" && RD->getName() == "basic_string")
725 return true;
726 }
727 return false;
728}
729
730// Return true iff at least one of following cases holds:
731// 1. Format string is a literal and there is an unsafe pointer argument
732// corresponding to an `s` specifier;
733// 2. Format string is not a literal and there is least an unsafe pointer
734// argument (including the formatter argument).
735//
736// `UnsafeArg` is the output argument that will be set only if this function
737// returns true.
738static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
739 const unsigned FmtArgIdx, ASTContext &Ctx,
740 bool isKprintf = false) {
741 class StringFormatStringHandler
742 : public analyze_format_string::FormatStringHandler {
743 const CallExpr *Call;
744 unsigned FmtArgIdx;
745 const Expr *&UnsafeArg;
746
747 public:
748 StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
749 const Expr *&UnsafeArg)
750 : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
751
752 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
753 const char *startSpecifier,
754 unsigned specifierLen,
755 const TargetInfo &Target) override {
756 if (FS.getConversionSpecifier().getKind() ==
757 analyze_printf::PrintfConversionSpecifier::sArg) {
758 unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
759
760 if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
761 if (!isNullTermPointer(Ptr: Call->getArg(Arg: ArgIdx))) {
762 UnsafeArg = Call->getArg(Arg: ArgIdx); // output
763 // returning false stops parsing immediately
764 return false;
765 }
766 }
767 return true; // continue parsing
768 }
769 };
770
771 const Expr *Fmt = Call->getArg(Arg: FmtArgIdx);
772
773 if (auto *SL = dyn_cast<clang::StringLiteral>(Val: Fmt->IgnoreParenImpCasts())) {
774 StringRef FmtStr;
775
776 if (SL->getCharByteWidth() == 1)
777 FmtStr = SL->getString();
778 else if (auto EvaledFmtStr = SL->tryEvaluateString(Ctx))
779 FmtStr = *EvaledFmtStr;
780 else
781 goto CHECK_UNSAFE_PTR;
782
783 StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
784
785 return analyze_format_string::ParsePrintfString(
786 H&: Handler, beg: FmtStr.begin(), end: FmtStr.end(), LO: Ctx.getLangOpts(),
787 Target: Ctx.getTargetInfo(), isFreeBSDKPrintf: isKprintf);
788 }
789CHECK_UNSAFE_PTR:
790 // If format is not a string literal, we cannot analyze the format string.
791 // In this case, this call is considered unsafe if at least one argument
792 // (including the format argument) is unsafe pointer.
793 return llvm::any_of(
794 Range: llvm::make_range(x: Call->arg_begin() + FmtArgIdx, y: Call->arg_end()),
795 P: [&UnsafeArg](const Expr *Arg) -> bool {
796 if (Arg->getType()->isPointerType() && !isNullTermPointer(Ptr: Arg)) {
797 UnsafeArg = Arg;
798 return true;
799 }
800 return false;
801 });
802}
803
804// Matches a FunctionDecl node such that
805// 1. It's name, after stripping off predefined prefix and suffix, is
806// `CoreName`; and
807// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
808// is a set of libc function names.
809//
810// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
811// The notation `CoreName[str/wcs]` means a new name obtained from replace
812// string "wcs" with "str" in `CoreName`.
813static bool isPredefinedUnsafeLibcFunc(const FunctionDecl &Node) {
814 static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr;
815 if (!PredefinedNames)
816 PredefinedNames =
817 std::make_unique<std::set<StringRef>, std::set<StringRef>>(args: {
818 // numeric conversion:
819 "atof",
820 "atoi",
821 "atol",
822 "atoll",
823 "strtol",
824 "strtoll",
825 "strtoul",
826 "strtoull",
827 "strtof",
828 "strtod",
829 "strtold",
830 "strtoimax",
831 "strtoumax",
832 // "strfromf", "strfromd", "strfroml", // C23?
833 // string manipulation:
834 "strcpy",
835 "strncpy",
836 "strlcpy",
837 "strcat",
838 "strncat",
839 "strlcat",
840 "strxfrm",
841 "strdup",
842 "strndup",
843 // string examination:
844 "strlen",
845 "strnlen",
846 "strcmp",
847 "strncmp",
848 "stricmp",
849 "strcasecmp",
850 "strcoll",
851 "strchr",
852 "strrchr",
853 "strspn",
854 "strcspn",
855 "strpbrk",
856 "strstr",
857 "strtok",
858 // "mem-" functions
859 "memchr",
860 "wmemchr",
861 "memcmp",
862 "wmemcmp",
863 "memcpy",
864 "memccpy",
865 "mempcpy",
866 "wmemcpy",
867 "memmove",
868 "wmemmove",
869 "memset",
870 "wmemset",
871 // IO:
872 "fread",
873 "fwrite",
874 "fgets",
875 "fgetws",
876 "gets",
877 "fputs",
878 "fputws",
879 "puts",
880 // others
881 "strerror_s",
882 "strerror_r",
883 "bcopy",
884 "bzero",
885 "bsearch",
886 "qsort",
887 });
888
889 auto *II = Node.getIdentifier();
890
891 if (!II)
892 return false;
893
894 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
895 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
896
897 // Match predefined names:
898 if (PredefinedNames->find(x: Name) != PredefinedNames->end())
899 return true;
900
901 std::string NameWCS = Name.str();
902 size_t WcsPos = NameWCS.find(s: "wcs");
903
904 while (WcsPos != std::string::npos) {
905 NameWCS[WcsPos++] = 's';
906 NameWCS[WcsPos++] = 't';
907 NameWCS[WcsPos++] = 'r';
908 WcsPos = NameWCS.find(s: "wcs", pos: WcsPos);
909 }
910 if (PredefinedNames->find(x: NameWCS) != PredefinedNames->end())
911 return true;
912 // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
913 // all should end with "scanf"):
914 return Name.ends_with(Suffix: "scanf");
915}
916
917// Match a call to one of the `v*printf` functions taking `va_list`. We cannot
918// check safety for these functions so they should be changed to their
919// non-va_list versions.
920static bool isUnsafeVaListPrintfFunc(const FunctionDecl &Node) {
921 auto *II = Node.getIdentifier();
922
923 if (!II)
924 return false;
925
926 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
927 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
928
929 if (!Name.ends_with(Suffix: "printf"))
930 return false; // neither printf nor scanf
931 return Name.starts_with(Prefix: "v");
932}
933
934// Matches a call to one of the `sprintf` functions as they are always unsafe
935// and should be changed to `snprintf`.
936static bool isUnsafeSprintfFunc(const FunctionDecl &Node) {
937 auto *II = Node.getIdentifier();
938
939 if (!II)
940 return false;
941
942 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
943 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
944
945 if (!Name.ends_with(Suffix: "printf") ||
946 // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
947 Name.starts_with(Prefix: "v"))
948 return false;
949
950 StringRef Prefix = Name.drop_back(N: 6);
951
952 if (Prefix.ends_with(Suffix: "w"))
953 Prefix = Prefix.drop_back(N: 1);
954 return Prefix == "s";
955}
956
957// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
958// character versions. Calls to these functions can be safe if their arguments
959// are carefully made safe.
960static bool isNormalPrintfFunc(const FunctionDecl &Node) {
961 auto *II = Node.getIdentifier();
962
963 if (!II)
964 return false;
965
966 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
967 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
968
969 if (!Name.ends_with(Suffix: "printf") || Name.starts_with(Prefix: "v"))
970 return false;
971
972 StringRef Prefix = Name.drop_back(N: 6);
973
974 if (Prefix.ends_with(Suffix: "w"))
975 Prefix = Prefix.drop_back(N: 1);
976
977 return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn";
978}
979
980// This matcher requires that it is known that the callee `isNormalPrintf`.
981// Then if the format string is a string literal, this matcher matches when at
982// least one string argument is unsafe. If the format is not a string literal,
983// this matcher matches when at least one pointer type argument is unsafe.
984static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
985 MatchResult &Result, llvm::StringRef Tag) {
986 // Determine what printf it is by examining formal parameters:
987 const FunctionDecl *FD = Node.getDirectCallee();
988
989 assert(FD && "It should have been checked that FD is non-null.");
990
991 unsigned NumParms = FD->getNumParams();
992
993 if (NumParms < 1)
994 return false; // possibly some user-defined printf function
995
996 QualType FirstParmTy = FD->getParamDecl(i: 0)->getType();
997
998 if (!FirstParmTy->isPointerType())
999 return false; // possibly some user-defined printf function
1000
1001 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType();
1002
1003 if (!Ctx.getFILEType()
1004 .isNull() && //`FILE *` must be in the context if it is fprintf
1005 FirstPteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
1006 // It is a fprintf:
1007 const Expr *UnsafeArg;
1008
1009 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 1, Ctx, isKprintf: false)) {
1010 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
1011 return true;
1012 }
1013 return false;
1014 }
1015
1016 if (FirstPteTy.isConstQualified()) {
1017 // If the first parameter is a `const char *`, it is a printf/kprintf:
1018 bool isKprintf = false;
1019 const Expr *UnsafeArg;
1020
1021 if (auto *II = FD->getIdentifier())
1022 isKprintf = II->getName() == "kprintf";
1023 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 0, Ctx, isKprintf)) {
1024 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
1025 return true;
1026 }
1027 return false;
1028 }
1029
1030 if (NumParms > 2) {
1031 QualType SecondParmTy = FD->getParamDecl(i: 1)->getType();
1032
1033 if (!FirstPteTy.isConstQualified() && SecondParmTy->isIntegerType()) {
1034 // If the first parameter type is non-const qualified `char *` and the
1035 // second is an integer, it is a snprintf:
1036 const Expr *UnsafeArg;
1037
1038 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 2, Ctx, isKprintf: false)) {
1039 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
1040 return true;
1041 }
1042 return false;
1043 }
1044 }
1045 // We don't really recognize this "normal" printf, the only thing we
1046 // can do is to require all pointers to be null-terminated:
1047 for (const auto *Arg : Node.arguments())
1048 if (Arg->getType()->isPointerType() && !isNullTermPointer(Ptr: Arg)) {
1049 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *Arg));
1050 return true;
1051 }
1052 return false;
1053}
1054
1055// This matcher requires that it is known that the callee `isNormalPrintf`.
1056// Then it matches if the first two arguments of the call is a pointer and an
1057// integer and they are not in a safe pattern.
1058//
1059// For the first two arguments: `ptr` and `size`, they are safe if in the
1060// following patterns:
1061//
1062// Pattern 1:
1063// ptr := DRE.data();
1064// size:= DRE.size()/DRE.size_bytes()
1065// And DRE is a hardened container or view.
1066//
1067// Pattern 2:
1068// ptr := Constant-Array-DRE;
1069// size:= any expression that has compile-time constant value equivalent to
1070// sizeof (Constant-Array-DRE)
1071static bool hasUnsafeSnprintfBuffer(const CallExpr &Node,
1072 const ASTContext &Ctx) {
1073 const FunctionDecl *FD = Node.getDirectCallee();
1074
1075 assert(FD && "It should have been checked that FD is non-null.");
1076
1077 if (FD->getNumParams() < 3)
1078 return false; // Not an snprint
1079
1080 QualType FirstParmTy = FD->getParamDecl(i: 0)->getType();
1081
1082 if (!FirstParmTy->isPointerType())
1083 return false; // Not an snprint
1084
1085 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType();
1086 const Expr *Buf = Node.getArg(Arg: 0), *Size = Node.getArg(Arg: 1);
1087
1088 if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
1089 !Size->getType()->isIntegerType())
1090 return false; // not an snprintf call
1091
1092 // Pattern 1:
1093 static StringRef SizedObjs[] = {SIZED_CONTAINER_OR_VIEW_LIST};
1094 Buf = Buf->IgnoreParenImpCasts();
1095 Size = Size->IgnoreParenImpCasts();
1096 if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Val: Buf))
1097 if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Val: Size)) {
1098 auto *DREOfPtr = dyn_cast<DeclRefExpr>(
1099 Val: MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
1100 auto *DREOfSize = dyn_cast<DeclRefExpr>(
1101 Val: MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
1102
1103 if (!DREOfPtr || !DREOfSize)
1104 return true; // not in safe pattern
1105 if (DREOfPtr->getDecl() != DREOfSize->getDecl())
1106 return true; // not in safe pattern
1107 if (MCEPtr->getMethodDecl()->getName() != "data")
1108 return true; // not in safe pattern
1109
1110 if (MCESize->getMethodDecl()->getName() == "size_bytes" ||
1111 // Note here the pointer must be a pointer-to-char type unless there
1112 // is explicit casting. If there is explicit casting, this branch
1113 // is unreachable. Thus, at this branch "size" and "size_bytes" are
1114 // equivalent as the pointer is a char pointer:
1115 MCESize->getMethodDecl()->getName() == "size")
1116 for (StringRef SizedObj : SizedObjs)
1117 if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
1118 MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() ==
1119 SizedObj)
1120 return false; // It is in fact safe
1121 }
1122
1123 // Pattern 2:
1124 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: Buf->IgnoreParenImpCasts())) {
1125 if (auto *CAT = Ctx.getAsConstantArrayType(T: DRE->getType())) {
1126 Expr::EvalResult ER;
1127 // The array element type must be compatible with `char` otherwise an
1128 // explicit cast will be needed, which will make this check unreachable.
1129 // Therefore, the array extent is same as its' bytewise size.
1130 if (Size->EvaluateAsInt(Result&: ER, Ctx)) {
1131 llvm::APSInt EVal = ER.Val.getInt(); // Size must have integer type
1132
1133 return llvm::APSInt::compareValues(
1134 I1: EVal, I2: llvm::APSInt(CAT->getSize(), true)) != 0;
1135 }
1136 }
1137 }
1138 return true; // ptr and size are not in safe pattern
1139}
1140} // namespace libc_func_matchers
1141
1142namespace {
1143// Because the analysis revolves around variables and their types, we'll need to
1144// track uses of variables (aka DeclRefExprs).
1145using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
1146
1147// Convenience typedef.
1148using FixItList = SmallVector<FixItHint, 4>;
1149} // namespace
1150
1151namespace {
1152/// Gadget is an individual operation in the code that may be of interest to
1153/// this analysis. Each (non-abstract) subclass corresponds to a specific
1154/// rigid AST structure that constitutes an operation on a pointer-type object.
1155/// Discovery of a gadget in the code corresponds to claiming that we understand
1156/// what this part of code is doing well enough to potentially improve it.
1157/// Gadgets can be warning (immediately deserving a warning) or fixable (not
1158/// always deserving a warning per se, but requires our attention to identify
1159/// it warrants a fixit).
1160class Gadget {
1161public:
1162 enum class Kind {
1163#define GADGET(x) x,
1164#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1165 };
1166
1167 Gadget(Kind K) : K(K) {}
1168
1169 Kind getKind() const { return K; }
1170
1171#ifndef NDEBUG
1172 StringRef getDebugName() const {
1173 switch (K) {
1174#define GADGET(x) \
1175 case Kind::x: \
1176 return #x;
1177#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1178 }
1179 llvm_unreachable("Unhandled Gadget::Kind enum");
1180 }
1181#endif
1182
1183 virtual bool isWarningGadget() const = 0;
1184 // TODO remove this method from WarningGadget interface. It's only used for
1185 // debug prints in FixableGadget.
1186 virtual SourceLocation getSourceLoc() const = 0;
1187
1188 /// Returns the list of pointer-type variables on which this gadget performs
1189 /// its operation. Typically, there's only one variable. This isn't a list
1190 /// of all DeclRefExprs in the gadget's AST!
1191 virtual DeclUseList getClaimedVarUseSites() const = 0;
1192
1193 virtual ~Gadget() = default;
1194
1195private:
1196 Kind K;
1197};
1198
1199/// Warning gadgets correspond to unsafe code patterns that warrants
1200/// an immediate warning.
1201class WarningGadget : public Gadget {
1202public:
1203 WarningGadget(Kind K) : Gadget(K) {}
1204
1205 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
1206 bool isWarningGadget() const final { return true; }
1207
1208 virtual void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1209 bool IsRelatedToDecl,
1210 ASTContext &Ctx) const = 0;
1211
1212 virtual SmallVector<const Expr *, 1> getUnsafePtrs() const = 0;
1213};
1214
1215/// Fixable gadgets correspond to code patterns that aren't always unsafe but
1216/// need to be properly recognized in order to emit fixes. For example, if a raw
1217/// pointer-type variable is replaced by a safe C++ container, every use of such
1218/// variable must be carefully considered and possibly updated.
1219class FixableGadget : public Gadget {
1220public:
1221 FixableGadget(Kind K) : Gadget(K) {}
1222
1223 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
1224 bool isWarningGadget() const final { return false; }
1225
1226 /// Returns a fixit that would fix the current gadget according to
1227 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
1228 /// returns an empty list if no fixes are necessary.
1229 virtual std::optional<FixItList> getFixits(const FixitStrategy &) const {
1230 return std::nullopt;
1231 }
1232
1233 /// Returns a list of two elements where the first element is the LHS of a
1234 /// pointer assignment statement and the second element is the RHS. This
1235 /// two-element list represents the fact that the LHS buffer gets its bounds
1236 /// information from the RHS buffer. This information will be used later to
1237 /// group all those variables whose types must be modified together to prevent
1238 /// type mismatches.
1239 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1240 getStrategyImplications() const {
1241 return std::nullopt;
1242 }
1243};
1244
1245static bool isSupportedVariable(const DeclRefExpr &Node) {
1246 const Decl *D = Node.getDecl();
1247 return D != nullptr && isa<VarDecl>(Val: D);
1248}
1249
1250using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
1251using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
1252
1253/// An increment of a pointer-type value is unsafe as it may run the pointer
1254/// out of bounds.
1255class IncrementGadget : public WarningGadget {
1256 static constexpr const char *const OpTag = "op";
1257 const UnaryOperator *Op;
1258
1259public:
1260 IncrementGadget(const MatchResult &Result)
1261 : WarningGadget(Kind::Increment),
1262 Op(Result.getNodeAs<UnaryOperator>(ID: OpTag)) {}
1263
1264 static bool classof(const Gadget *G) {
1265 return G->getKind() == Kind::Increment;
1266 }
1267
1268 static bool matches(const Stmt *S, const ASTContext &Ctx,
1269 MatchResult &Result) {
1270 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
1271 if (!UO || !UO->isIncrementOp())
1272 return false;
1273 if (!hasPointerType(E: *UO->getSubExpr()->IgnoreParenImpCasts()))
1274 return false;
1275 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *UO));
1276 return true;
1277 }
1278
1279 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1280 bool IsRelatedToDecl,
1281 ASTContext &Ctx) const override {
1282 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1283 }
1284 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1285
1286 DeclUseList getClaimedVarUseSites() const override {
1287 SmallVector<const DeclRefExpr *, 2> Uses;
1288 if (const auto *DRE =
1289 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
1290 Uses.push_back(Elt: DRE);
1291 }
1292
1293 return std::move(Uses);
1294 }
1295
1296 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1297 return {Op->getSubExpr()->IgnoreParenImpCasts()};
1298 }
1299};
1300
1301/// A decrement of a pointer-type value is unsafe as it may run the pointer
1302/// out of bounds.
1303class DecrementGadget : public WarningGadget {
1304 static constexpr const char *const OpTag = "op";
1305 const UnaryOperator *Op;
1306
1307public:
1308 DecrementGadget(const MatchResult &Result)
1309 : WarningGadget(Kind::Decrement),
1310 Op(Result.getNodeAs<UnaryOperator>(ID: OpTag)) {}
1311
1312 static bool classof(const Gadget *G) {
1313 return G->getKind() == Kind::Decrement;
1314 }
1315
1316 static bool matches(const Stmt *S, const ASTContext &Ctx,
1317 MatchResult &Result) {
1318 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
1319 if (!UO || !UO->isDecrementOp())
1320 return false;
1321 if (!hasPointerType(E: *UO->getSubExpr()->IgnoreParenImpCasts()))
1322 return false;
1323 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *UO));
1324 return true;
1325 }
1326
1327 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1328 bool IsRelatedToDecl,
1329 ASTContext &Ctx) const override {
1330 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1331 }
1332 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1333
1334 DeclUseList getClaimedVarUseSites() const override {
1335 if (const auto *DRE =
1336 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
1337 return {DRE};
1338 }
1339
1340 return {};
1341 }
1342
1343 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1344 return {Op->getSubExpr()->IgnoreParenImpCasts()};
1345 }
1346};
1347
1348/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
1349/// it doesn't have any bounds checks for the array.
1350class ArraySubscriptGadget : public WarningGadget {
1351 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
1352 const ArraySubscriptExpr *ASE;
1353
1354public:
1355 ArraySubscriptGadget(const MatchResult &Result)
1356 : WarningGadget(Kind::ArraySubscript),
1357 ASE(Result.getNodeAs<ArraySubscriptExpr>(ID: ArraySubscrTag)) {}
1358
1359 static bool classof(const Gadget *G) {
1360 return G->getKind() == Kind::ArraySubscript;
1361 }
1362
1363 static bool matches(const Stmt *S, const ASTContext &Ctx,
1364 MatchResult &Result) {
1365 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: S);
1366 if (!ASE)
1367 return false;
1368 const auto *const Base = ASE->getBase()->IgnoreParenImpCasts();
1369 if (!hasPointerType(E: *Base) && !hasArrayType(E: *Base))
1370 return false;
1371 const auto *Idx = dyn_cast<IntegerLiteral>(Val: ASE->getIdx());
1372 bool IsSafeIndex = (Idx && Idx->getValue().isZero()) ||
1373 isa<ArrayInitIndexExpr>(Val: ASE->getIdx());
1374 if (IsSafeIndex || isSafeArraySubscript(Node: *ASE, Ctx))
1375 return false;
1376 Result.addNode(ID: ArraySubscrTag, Node: DynTypedNode::create(Node: *ASE));
1377 return true;
1378 }
1379
1380 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1381 bool IsRelatedToDecl,
1382 ASTContext &Ctx) const override {
1383 Handler.handleUnsafeOperation(Operation: ASE, IsRelatedToDecl, Ctx);
1384 }
1385 SourceLocation getSourceLoc() const override { return ASE->getBeginLoc(); }
1386
1387 DeclUseList getClaimedVarUseSites() const override {
1388 if (const auto *DRE =
1389 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts())) {
1390 return {DRE};
1391 }
1392
1393 return {};
1394 }
1395
1396 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1397 return {ASE->getBase()->IgnoreParenImpCasts()};
1398 }
1399};
1400
1401/// A pointer arithmetic expression of one of the forms:
1402/// \code
1403/// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
1404/// \endcode
1405class PointerArithmeticGadget : public WarningGadget {
1406 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
1407 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
1408 const BinaryOperator *PA; // pointer arithmetic expression
1409 const Expr *Ptr; // the pointer expression in `PA`
1410
1411public:
1412 PointerArithmeticGadget(const MatchResult &Result)
1413 : WarningGadget(Kind::PointerArithmetic),
1414 PA(Result.getNodeAs<BinaryOperator>(ID: PointerArithmeticTag)),
1415 Ptr(Result.getNodeAs<Expr>(ID: PointerArithmeticPointerTag)) {}
1416
1417 static bool classof(const Gadget *G) {
1418 return G->getKind() == Kind::PointerArithmetic;
1419 }
1420
1421 static bool matches(const Stmt *S, const ASTContext &Ctx,
1422 MatchResult &Result) {
1423 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1424 if (!BO)
1425 return false;
1426 const auto *LHS = BO->getLHS();
1427 const auto *RHS = BO->getRHS();
1428 // ptr at left
1429 if (BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub ||
1430 BO->getOpcode() == BO_AddAssign || BO->getOpcode() == BO_SubAssign) {
1431 if (hasPointerType(E: *LHS) && (RHS->getType()->isIntegerType() ||
1432 RHS->getType()->isEnumeralType())) {
1433 Result.addNode(ID: PointerArithmeticPointerTag, Node: DynTypedNode::create(Node: *LHS));
1434 Result.addNode(ID: PointerArithmeticTag, Node: DynTypedNode::create(Node: *BO));
1435 return true;
1436 }
1437 }
1438 // ptr at right
1439 if (BO->getOpcode() == BO_Add && hasPointerType(E: *RHS) &&
1440 (LHS->getType()->isIntegerType() || LHS->getType()->isEnumeralType())) {
1441 Result.addNode(ID: PointerArithmeticPointerTag, Node: DynTypedNode::create(Node: *RHS));
1442 Result.addNode(ID: PointerArithmeticTag, Node: DynTypedNode::create(Node: *BO));
1443 return true;
1444 }
1445 return false;
1446 }
1447
1448 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1449 bool IsRelatedToDecl,
1450 ASTContext &Ctx) const override {
1451 Handler.handleUnsafeOperation(Operation: PA, IsRelatedToDecl, Ctx);
1452 }
1453 SourceLocation getSourceLoc() const override { return PA->getBeginLoc(); }
1454
1455 DeclUseList getClaimedVarUseSites() const override {
1456 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ptr->IgnoreParenImpCasts())) {
1457 return {DRE};
1458 }
1459
1460 return {};
1461 }
1462
1463 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1464 return {Ptr->IgnoreParenImpCasts()};
1465 }
1466
1467 // FIXME: pointer adding zero should be fine
1468 // FIXME: this gadge will need a fix-it
1469};
1470
1471class SpanTwoParamConstructorGadget : public WarningGadget {
1472 static constexpr const char *const SpanTwoParamConstructorTag =
1473 "spanTwoParamConstructor";
1474 const CXXConstructExpr *Ctor; // the span constructor expression
1475
1476public:
1477 SpanTwoParamConstructorGadget(const MatchResult &Result)
1478 : WarningGadget(Kind::SpanTwoParamConstructor),
1479 Ctor(Result.getNodeAs<CXXConstructExpr>(ID: SpanTwoParamConstructorTag)) {}
1480
1481 static bool classof(const Gadget *G) {
1482 return G->getKind() == Kind::SpanTwoParamConstructor;
1483 }
1484
1485 static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
1486 const auto *CE = dyn_cast<CXXConstructExpr>(Val: S);
1487 if (!CE)
1488 return false;
1489 const auto *CDecl = CE->getConstructor();
1490 const auto *CRecordDecl = CDecl->getParent();
1491 auto HasTwoParamSpanCtorDecl =
1492 CRecordDecl->isInStdNamespace() &&
1493 CDecl->getDeclName().getAsString() == "span" && CE->getNumArgs() == 2;
1494 if (!HasTwoParamSpanCtorDecl || isSafeSpanTwoParamConstruct(Node: *CE, Ctx))
1495 return false;
1496 Result.addNode(ID: SpanTwoParamConstructorTag, Node: DynTypedNode::create(Node: *CE));
1497 return true;
1498 }
1499
1500 static bool matches(const Stmt *S, ASTContext &Ctx,
1501 const UnsafeBufferUsageHandler *Handler,
1502 MatchResult &Result) {
1503 if (ignoreUnsafeBufferInContainer(Node: *S, Handler))
1504 return false;
1505 return matches(S, Ctx, Result);
1506 }
1507
1508 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1509 bool IsRelatedToDecl,
1510 ASTContext &Ctx) const override {
1511 Handler.handleUnsafeOperationInContainer(Operation: Ctor, IsRelatedToDecl, Ctx);
1512 }
1513 SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); }
1514
1515 DeclUseList getClaimedVarUseSites() const override {
1516 // If the constructor call is of the form `std::span{var, n}`, `var` is
1517 // considered an unsafe variable.
1518 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: Ctor->getArg(Arg: 0))) {
1519 if (isa<VarDecl>(Val: DRE->getDecl()))
1520 return {DRE};
1521 }
1522 return {};
1523 }
1524
1525 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1526};
1527
1528/// A pointer initialization expression of the form:
1529/// \code
1530/// int *p = q;
1531/// \endcode
1532class PointerInitGadget : public FixableGadget {
1533private:
1534 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
1535 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
1536 const VarDecl *PtrInitLHS; // the LHS pointer expression in `PI`
1537 const DeclRefExpr *PtrInitRHS; // the RHS pointer expression in `PI`
1538
1539public:
1540 PointerInitGadget(const MatchResult &Result)
1541 : FixableGadget(Kind::PointerInit),
1542 PtrInitLHS(Result.getNodeAs<VarDecl>(ID: PointerInitLHSTag)),
1543 PtrInitRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerInitRHSTag)) {}
1544
1545 static bool classof(const Gadget *G) {
1546 return G->getKind() == Kind::PointerInit;
1547 }
1548
1549 static bool matches(const Stmt *S,
1550 llvm::SmallVectorImpl<MatchResult> &Results) {
1551 const DeclStmt *DS = dyn_cast<DeclStmt>(Val: S);
1552 if (!DS || !DS->isSingleDecl())
1553 return false;
1554 const VarDecl *VD = dyn_cast<VarDecl>(Val: DS->getSingleDecl());
1555 if (!VD)
1556 return false;
1557 const Expr *Init = VD->getAnyInitializer();
1558 if (!Init)
1559 return false;
1560 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Init->IgnoreImpCasts());
1561 if (!DRE || !hasPointerType(E: *DRE) || !isSupportedVariable(Node: *DRE)) {
1562 return false;
1563 }
1564 MatchResult R;
1565 R.addNode(ID: PointerInitLHSTag, Node: DynTypedNode::create(Node: *VD));
1566 R.addNode(ID: PointerInitRHSTag, Node: DynTypedNode::create(Node: *DRE));
1567 Results.emplace_back(Args: std::move(R));
1568 return true;
1569 }
1570
1571 virtual std::optional<FixItList>
1572 getFixits(const FixitStrategy &S) const override;
1573 SourceLocation getSourceLoc() const override {
1574 return PtrInitRHS->getBeginLoc();
1575 }
1576
1577 virtual DeclUseList getClaimedVarUseSites() const override {
1578 return DeclUseList{PtrInitRHS};
1579 }
1580
1581 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1582 getStrategyImplications() const override {
1583 return std::make_pair(x: PtrInitLHS, y: cast<VarDecl>(Val: PtrInitRHS->getDecl()));
1584 }
1585};
1586
1587/// A pointer assignment expression of the form:
1588/// \code
1589/// p = q;
1590/// \endcode
1591/// where both `p` and `q` are pointers.
1592class PtrToPtrAssignmentGadget : public FixableGadget {
1593private:
1594 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
1595 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
1596 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA`
1597 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA`
1598
1599public:
1600 PtrToPtrAssignmentGadget(const MatchResult &Result)
1601 : FixableGadget(Kind::PtrToPtrAssignment),
1602 PtrLHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignLHSTag)),
1603 PtrRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignRHSTag)) {}
1604
1605 static bool classof(const Gadget *G) {
1606 return G->getKind() == Kind::PtrToPtrAssignment;
1607 }
1608
1609 static bool matches(const Stmt *S,
1610 llvm::SmallVectorImpl<MatchResult> &Results) {
1611 size_t SizeBefore = Results.size();
1612 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
1613 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1614 if (!BO || BO->getOpcode() != BO_Assign)
1615 return;
1616 const auto *RHS = BO->getRHS()->IgnoreParenImpCasts();
1617 if (const auto *RHSRef = dyn_cast<DeclRefExpr>(Val: RHS);
1618 !RHSRef || !hasPointerType(E: *RHSRef) ||
1619 !isSupportedVariable(Node: *RHSRef)) {
1620 return;
1621 }
1622 const auto *LHS = BO->getLHS();
1623 if (const auto *LHSRef = dyn_cast<DeclRefExpr>(Val: LHS);
1624 !LHSRef || !hasPointerType(E: *LHSRef) ||
1625 !isSupportedVariable(Node: *LHSRef)) {
1626 return;
1627 }
1628 MatchResult R;
1629 R.addNode(ID: PointerAssignLHSTag, Node: DynTypedNode::create(Node: *LHS));
1630 R.addNode(ID: PointerAssignRHSTag, Node: DynTypedNode::create(Node: *RHS));
1631 Results.emplace_back(Args: std::move(R));
1632 });
1633 return SizeBefore != Results.size();
1634 }
1635
1636 virtual std::optional<FixItList>
1637 getFixits(const FixitStrategy &S) const override;
1638 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); }
1639
1640 virtual DeclUseList getClaimedVarUseSites() const override {
1641 return DeclUseList{PtrLHS, PtrRHS};
1642 }
1643
1644 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1645 getStrategyImplications() const override {
1646 return std::make_pair(x: cast<VarDecl>(Val: PtrLHS->getDecl()),
1647 y: cast<VarDecl>(Val: PtrRHS->getDecl()));
1648 }
1649};
1650
1651/// An assignment expression of the form:
1652/// \code
1653/// ptr = array;
1654/// \endcode
1655/// where `p` is a pointer and `array` is a constant size array.
1656class CArrayToPtrAssignmentGadget : public FixableGadget {
1657private:
1658 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
1659 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
1660 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA`
1661 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA`
1662
1663public:
1664 CArrayToPtrAssignmentGadget(const MatchResult &Result)
1665 : FixableGadget(Kind::CArrayToPtrAssignment),
1666 PtrLHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignLHSTag)),
1667 PtrRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignRHSTag)) {}
1668
1669 static bool classof(const Gadget *G) {
1670 return G->getKind() == Kind::CArrayToPtrAssignment;
1671 }
1672
1673 static bool matches(const Stmt *S,
1674 llvm::SmallVectorImpl<MatchResult> &Results) {
1675 size_t SizeBefore = Results.size();
1676 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
1677 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1678 if (!BO || BO->getOpcode() != BO_Assign)
1679 return;
1680 const auto *RHS = BO->getRHS()->IgnoreParenImpCasts();
1681 if (const auto *RHSRef = dyn_cast<DeclRefExpr>(Val: RHS);
1682 !RHSRef ||
1683 !isa<ConstantArrayType>(Val: RHSRef->getType().getCanonicalType()) ||
1684 !isSupportedVariable(Node: *RHSRef)) {
1685 return;
1686 }
1687 const auto *LHS = BO->getLHS();
1688 if (const auto *LHSRef = dyn_cast<DeclRefExpr>(Val: LHS);
1689 !LHSRef || !hasPointerType(E: *LHSRef) ||
1690 !isSupportedVariable(Node: *LHSRef)) {
1691 return;
1692 }
1693 MatchResult R;
1694 R.addNode(ID: PointerAssignLHSTag, Node: DynTypedNode::create(Node: *LHS));
1695 R.addNode(ID: PointerAssignRHSTag, Node: DynTypedNode::create(Node: *RHS));
1696 Results.emplace_back(Args: std::move(R));
1697 });
1698 return SizeBefore != Results.size();
1699 }
1700
1701 virtual std::optional<FixItList>
1702 getFixits(const FixitStrategy &S) const override;
1703 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); }
1704
1705 virtual DeclUseList getClaimedVarUseSites() const override {
1706 return DeclUseList{PtrLHS, PtrRHS};
1707 }
1708
1709 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1710 getStrategyImplications() const override {
1711 return {};
1712 }
1713};
1714
1715/// A call of a function or method that performs unchecked buffer operations
1716/// over one of its pointer parameters.
1717class UnsafeBufferUsageAttrGadget : public WarningGadget {
1718 constexpr static const char *const OpTag = "attr_expr";
1719 const Expr *Op;
1720
1721public:
1722 UnsafeBufferUsageAttrGadget(const MatchResult &Result)
1723 : WarningGadget(Kind::UnsafeBufferUsageAttr),
1724 Op(Result.getNodeAs<Expr>(ID: OpTag)) {}
1725
1726 static bool classof(const Gadget *G) {
1727 return G->getKind() == Kind::UnsafeBufferUsageAttr;
1728 }
1729
1730 static bool matches(const Stmt *S, const ASTContext &Ctx,
1731 MatchResult &Result) {
1732 if (auto *CE = dyn_cast<CallExpr>(Val: S)) {
1733 if (CE->getDirectCallee() &&
1734 CE->getDirectCallee()->hasAttr<UnsafeBufferUsageAttr>()) {
1735 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1736 return true;
1737 }
1738 }
1739 if (auto *ME = dyn_cast<MemberExpr>(Val: S)) {
1740 if (!isa<FieldDecl>(Val: ME->getMemberDecl()))
1741 return false;
1742 if (ME->getMemberDecl()->hasAttr<UnsafeBufferUsageAttr>()) {
1743 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *ME));
1744 return true;
1745 }
1746 }
1747 return false;
1748 }
1749
1750 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1751 bool IsRelatedToDecl,
1752 ASTContext &Ctx) const override {
1753 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1754 }
1755 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1756
1757 DeclUseList getClaimedVarUseSites() const override { return {}; }
1758
1759 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1760};
1761
1762/// A call of a constructor that performs unchecked buffer operations
1763/// over one of its pointer parameters, or constructs a class object that will
1764/// perform buffer operations that depend on the correctness of the parameters.
1765class UnsafeBufferUsageCtorAttrGadget : public WarningGadget {
1766 constexpr static const char *const OpTag = "cxx_construct_expr";
1767 const CXXConstructExpr *Op;
1768
1769public:
1770 UnsafeBufferUsageCtorAttrGadget(const MatchResult &Result)
1771 : WarningGadget(Kind::UnsafeBufferUsageCtorAttr),
1772 Op(Result.getNodeAs<CXXConstructExpr>(ID: OpTag)) {}
1773
1774 static bool classof(const Gadget *G) {
1775 return G->getKind() == Kind::UnsafeBufferUsageCtorAttr;
1776 }
1777
1778 static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
1779 const auto *CE = dyn_cast<CXXConstructExpr>(Val: S);
1780 if (!CE || !CE->getConstructor()->hasAttr<UnsafeBufferUsageAttr>())
1781 return false;
1782 // std::span(ptr, size) ctor is handled by SpanTwoParamConstructorGadget.
1783 MatchResult Tmp;
1784 if (SpanTwoParamConstructorGadget::matches(S: CE, Ctx, Result&: Tmp))
1785 return false;
1786 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1787 return true;
1788 }
1789
1790 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1791 bool IsRelatedToDecl,
1792 ASTContext &Ctx) const override {
1793 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1794 }
1795 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1796
1797 DeclUseList getClaimedVarUseSites() const override { return {}; }
1798
1799 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1800};
1801
1802// Warning gadget for unsafe invocation of span::data method.
1803// Triggers when the pointer returned by the invocation is immediately
1804// cast to a larger type.
1805
1806class DataInvocationGadget : public WarningGadget {
1807 constexpr static const char *const OpTag = "data_invocation_expr";
1808 const ExplicitCastExpr *Op;
1809
1810public:
1811 DataInvocationGadget(const MatchResult &Result)
1812 : WarningGadget(Kind::DataInvocation),
1813 Op(Result.getNodeAs<ExplicitCastExpr>(ID: OpTag)) {}
1814
1815 static bool classof(const Gadget *G) {
1816 return G->getKind() == Kind::DataInvocation;
1817 }
1818
1819 static bool matches(const Stmt *S, const ASTContext &Ctx,
1820 MatchResult &Result) {
1821 auto *CE = dyn_cast<ExplicitCastExpr>(Val: S);
1822 if (!CE)
1823 return false;
1824 for (auto *Child : CE->children()) {
1825 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Val: Child);
1826 MCE && isDataFunction(call: MCE)) {
1827 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1828 return true;
1829 }
1830 if (auto *Paren = dyn_cast<ParenExpr>(Val: Child)) {
1831 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Val: Paren->getSubExpr());
1832 MCE && isDataFunction(call: MCE)) {
1833 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1834 return true;
1835 }
1836 }
1837 }
1838 return false;
1839 }
1840
1841 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1842 bool IsRelatedToDecl,
1843 ASTContext &Ctx) const override {
1844 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1845 }
1846 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1847
1848 DeclUseList getClaimedVarUseSites() const override { return {}; }
1849
1850private:
1851 static bool isDataFunction(const CXXMemberCallExpr *call) {
1852 if (!call)
1853 return false;
1854 auto *callee = call->getDirectCallee();
1855 if (!callee || !isa<CXXMethodDecl>(Val: callee))
1856 return false;
1857 auto *method = cast<CXXMethodDecl>(Val: callee);
1858 if (method->getNameAsString() == "data" &&
1859 method->getParent()->isInStdNamespace() &&
1860 llvm::is_contained(Set: {SIZED_CONTAINER_OR_VIEW_LIST},
1861 Element: method->getParent()->getName()))
1862 return true;
1863 return false;
1864 }
1865
1866 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1867};
1868
1869class UnsafeLibcFunctionCallGadget : public WarningGadget {
1870 const CallExpr *const Call;
1871 const Expr *UnsafeArg = nullptr;
1872 constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
1873 // Extra tags for additional information:
1874 constexpr static const char *const UnsafeSprintfTag =
1875 "UnsafeLibcFunctionCall_sprintf";
1876 constexpr static const char *const UnsafeSizedByTag =
1877 "UnsafeLibcFunctionCall_sized_by";
1878 constexpr static const char *const UnsafeStringTag =
1879 "UnsafeLibcFunctionCall_string";
1880 constexpr static const char *const UnsafeVaListTag =
1881 "UnsafeLibcFunctionCall_va_list";
1882
1883 enum UnsafeKind {
1884 OTHERS = 0, // no specific information, the callee function is unsafe
1885 SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
1886 SIZED_BY =
1887 2, // the first two arguments of `snprintf` function have
1888 // "__sized_by" relation but they do not conform to safe patterns
1889 STRING = 3, // an argument is a pointer-to-char-as-string but does not
1890 // guarantee null-termination
1891 VA_LIST = 4, // one of the `-printf`s function that take va_list, which is
1892 // considered unsafe as it is not compile-time check
1893 } WarnedFunKind = OTHERS;
1894
1895public:
1896 UnsafeLibcFunctionCallGadget(const MatchResult &Result)
1897 : WarningGadget(Kind::UnsafeLibcFunctionCall),
1898 Call(Result.getNodeAs<CallExpr>(ID: Tag)) {
1899 if (Result.getNodeAs<Decl>(ID: UnsafeSprintfTag))
1900 WarnedFunKind = SPRINTF;
1901 else if (auto *E = Result.getNodeAs<Expr>(ID: UnsafeStringTag)) {
1902 WarnedFunKind = STRING;
1903 UnsafeArg = E;
1904 } else if (Result.getNodeAs<CallExpr>(ID: UnsafeSizedByTag)) {
1905 WarnedFunKind = SIZED_BY;
1906 UnsafeArg = Call->getArg(Arg: 0);
1907 } else if (Result.getNodeAs<Decl>(ID: UnsafeVaListTag))
1908 WarnedFunKind = VA_LIST;
1909 }
1910
1911 static bool matches(const Stmt *S, ASTContext &Ctx,
1912 const UnsafeBufferUsageHandler *Handler,
1913 MatchResult &Result) {
1914 if (ignoreUnsafeLibcCall(Ctx, Node: *S, Handler))
1915 return false;
1916 auto *CE = dyn_cast<CallExpr>(Val: S);
1917 if (!CE || !CE->getDirectCallee())
1918 return false;
1919 const auto *FD = dyn_cast<FunctionDecl>(Val: CE->getDirectCallee());
1920 if (!FD)
1921 return false;
1922 auto isSingleStringLiteralArg = false;
1923 if (CE->getNumArgs() == 1) {
1924 isSingleStringLiteralArg =
1925 isa<clang::StringLiteral>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts());
1926 }
1927 if (!isSingleStringLiteralArg) {
1928 // (unless the call has a sole string literal argument):
1929 if (libc_func_matchers::isPredefinedUnsafeLibcFunc(Node: *FD)) {
1930 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1931 return true;
1932 }
1933 if (libc_func_matchers::isUnsafeVaListPrintfFunc(Node: *FD)) {
1934 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1935 Result.addNode(ID: UnsafeVaListTag, Node: DynTypedNode::create(Node: *FD));
1936 return true;
1937 }
1938 if (libc_func_matchers::isUnsafeSprintfFunc(Node: *FD)) {
1939 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1940 Result.addNode(ID: UnsafeSprintfTag, Node: DynTypedNode::create(Node: *FD));
1941 return true;
1942 }
1943 }
1944 if (libc_func_matchers::isNormalPrintfFunc(Node: *FD)) {
1945 if (libc_func_matchers::hasUnsafeSnprintfBuffer(Node: *CE, Ctx)) {
1946 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1947 Result.addNode(ID: UnsafeSizedByTag, Node: DynTypedNode::create(Node: *CE));
1948 return true;
1949 }
1950 if (libc_func_matchers::hasUnsafePrintfStringArg(Node: *CE, Ctx, Result,
1951 Tag: UnsafeStringTag)) {
1952 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1953 return true;
1954 }
1955 }
1956 return false;
1957 }
1958
1959 const Stmt *getBaseStmt() const { return Call; }
1960
1961 SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
1962
1963 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1964 bool IsRelatedToDecl,
1965 ASTContext &Ctx) const override {
1966 Handler.handleUnsafeLibcCall(Call, PrintfInfo: WarnedFunKind, Ctx, UnsafeArg);
1967 }
1968
1969 DeclUseList getClaimedVarUseSites() const override { return {}; }
1970
1971 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1972};
1973
1974// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
1975// Context (see `findStmtsInUnspecifiedLvalueContext`).
1976// Note here `[]` is the built-in subscript operator.
1977class ULCArraySubscriptGadget : public FixableGadget {
1978private:
1979 static constexpr const char *const ULCArraySubscriptTag =
1980 "ArraySubscriptUnderULC";
1981 const ArraySubscriptExpr *Node;
1982
1983public:
1984 ULCArraySubscriptGadget(const MatchResult &Result)
1985 : FixableGadget(Kind::ULCArraySubscript),
1986 Node(Result.getNodeAs<ArraySubscriptExpr>(ID: ULCArraySubscriptTag)) {
1987 assert(Node != nullptr && "Expecting a non-null matching result");
1988 }
1989
1990 static bool classof(const Gadget *G) {
1991 return G->getKind() == Kind::ULCArraySubscript;
1992 }
1993
1994 static bool matches(const Stmt *S,
1995 llvm::SmallVectorImpl<MatchResult> &Results) {
1996 size_t SizeBefore = Results.size();
1997 findStmtsInUnspecifiedLvalueContext(S, OnResult: [&Results](const Expr *E) {
1998 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: E);
1999 if (!ASE)
2000 return;
2001 const auto *DRE =
2002 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts());
2003 if (!DRE || !(hasPointerType(E: *DRE) || hasArrayType(E: *DRE)) ||
2004 !isSupportedVariable(Node: *DRE))
2005 return;
2006 MatchResult R;
2007 R.addNode(ID: ULCArraySubscriptTag, Node: DynTypedNode::create(Node: *ASE));
2008 Results.emplace_back(Args: std::move(R));
2009 });
2010 return SizeBefore != Results.size();
2011 }
2012
2013 virtual std::optional<FixItList>
2014 getFixits(const FixitStrategy &S) const override;
2015 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2016
2017 virtual DeclUseList getClaimedVarUseSites() const override {
2018 if (const auto *DRE =
2019 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts())) {
2020 return {DRE};
2021 }
2022 return {};
2023 }
2024};
2025
2026// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
2027// unspecified pointer context (findStmtsInUnspecifiedPointerContext). The
2028// gadget emits fixit of the form `UPC(DRE.data())`.
2029class UPCStandalonePointerGadget : public FixableGadget {
2030private:
2031 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
2032 const DeclRefExpr *Node;
2033
2034public:
2035 UPCStandalonePointerGadget(const MatchResult &Result)
2036 : FixableGadget(Kind::UPCStandalonePointer),
2037 Node(Result.getNodeAs<DeclRefExpr>(ID: DeclRefExprTag)) {
2038 assert(Node != nullptr && "Expecting a non-null matching result");
2039 }
2040
2041 static bool classof(const Gadget *G) {
2042 return G->getKind() == Kind::UPCStandalonePointer;
2043 }
2044
2045 static bool matches(const Stmt *S,
2046 llvm::SmallVectorImpl<MatchResult> &Results) {
2047 size_t SizeBefore = Results.size();
2048 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2049 auto *E = dyn_cast<Expr>(Val: S);
2050 if (!E)
2051 return;
2052 const auto *DRE = dyn_cast<DeclRefExpr>(Val: E->IgnoreParenImpCasts());
2053 if (!DRE || (!hasPointerType(E: *DRE) && !hasArrayType(E: *DRE)) ||
2054 !isSupportedVariable(Node: *DRE))
2055 return;
2056 MatchResult R;
2057 R.addNode(ID: DeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2058 Results.emplace_back(Args: std::move(R));
2059 });
2060 return SizeBefore != Results.size();
2061 }
2062
2063 virtual std::optional<FixItList>
2064 getFixits(const FixitStrategy &S) const override;
2065 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2066
2067 virtual DeclUseList getClaimedVarUseSites() const override { return {Node}; }
2068};
2069
2070class PointerDereferenceGadget : public FixableGadget {
2071 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
2072 static constexpr const char *const OperatorTag = "op";
2073
2074 const DeclRefExpr *BaseDeclRefExpr = nullptr;
2075 const UnaryOperator *Op = nullptr;
2076
2077public:
2078 PointerDereferenceGadget(const MatchResult &Result)
2079 : FixableGadget(Kind::PointerDereference),
2080 BaseDeclRefExpr(Result.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
2081 Op(Result.getNodeAs<UnaryOperator>(ID: OperatorTag)) {}
2082
2083 static bool classof(const Gadget *G) {
2084 return G->getKind() == Kind::PointerDereference;
2085 }
2086
2087 static bool matches(const Stmt *S,
2088 llvm::SmallVectorImpl<MatchResult> &Results) {
2089 size_t SizeBefore = Results.size();
2090 findStmtsInUnspecifiedLvalueContext(S, OnResult: [&Results](const Stmt *S) {
2091 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
2092 if (!UO || UO->getOpcode() != UO_Deref)
2093 return;
2094 const auto *CE = dyn_cast<Expr>(Val: UO->getSubExpr());
2095 if (!CE)
2096 return;
2097 CE = CE->IgnoreParenImpCasts();
2098 const auto *DRE = dyn_cast<DeclRefExpr>(Val: CE);
2099 if (!DRE || !isSupportedVariable(Node: *DRE))
2100 return;
2101 MatchResult R;
2102 R.addNode(ID: BaseDeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2103 R.addNode(ID: OperatorTag, Node: DynTypedNode::create(Node: *UO));
2104 Results.emplace_back(Args: std::move(R));
2105 });
2106 return SizeBefore != Results.size();
2107 }
2108
2109 DeclUseList getClaimedVarUseSites() const override {
2110 return {BaseDeclRefExpr};
2111 }
2112
2113 virtual std::optional<FixItList>
2114 getFixits(const FixitStrategy &S) const override;
2115 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
2116};
2117
2118// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
2119// Context (see `findStmtsInUnspecifiedPointerContext`).
2120// Note here `[]` is the built-in subscript operator.
2121class UPCAddressofArraySubscriptGadget : public FixableGadget {
2122private:
2123 static constexpr const char *const UPCAddressofArraySubscriptTag =
2124 "AddressofArraySubscriptUnderUPC";
2125 const UnaryOperator *Node; // the `&DRE[any]` node
2126
2127public:
2128 UPCAddressofArraySubscriptGadget(const MatchResult &Result)
2129 : FixableGadget(Kind::ULCArraySubscript),
2130 Node(Result.getNodeAs<UnaryOperator>(ID: UPCAddressofArraySubscriptTag)) {
2131 assert(Node != nullptr && "Expecting a non-null matching result");
2132 }
2133
2134 static bool classof(const Gadget *G) {
2135 return G->getKind() == Kind::UPCAddressofArraySubscript;
2136 }
2137
2138 static bool matches(const Stmt *S,
2139 llvm::SmallVectorImpl<MatchResult> &Results) {
2140 size_t SizeBefore = Results.size();
2141 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2142 auto *E = dyn_cast<Expr>(Val: S);
2143 if (!E)
2144 return;
2145 const auto *UO = dyn_cast<UnaryOperator>(Val: E->IgnoreImpCasts());
2146 if (!UO || UO->getOpcode() != UO_AddrOf)
2147 return;
2148 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: UO->getSubExpr());
2149 if (!ASE)
2150 return;
2151 const auto *DRE =
2152 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts());
2153 if (!DRE || !isSupportedVariable(Node: *DRE))
2154 return;
2155 MatchResult R;
2156 R.addNode(ID: UPCAddressofArraySubscriptTag, Node: DynTypedNode::create(Node: *UO));
2157 Results.emplace_back(Args: std::move(R));
2158 });
2159 return SizeBefore != Results.size();
2160 }
2161
2162 virtual std::optional<FixItList>
2163 getFixits(const FixitStrategy &) const override;
2164 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2165
2166 virtual DeclUseList getClaimedVarUseSites() const override {
2167 const auto *ArraySubst = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
2168 const auto *DRE =
2169 cast<DeclRefExpr>(Val: ArraySubst->getBase()->IgnoreParenImpCasts());
2170 return {DRE};
2171 }
2172};
2173} // namespace
2174
2175namespace {
2176// An auxiliary tracking facility for the fixit analysis. It helps connect
2177// declarations to its uses and make sure we've covered all uses with our
2178// analysis before we try to fix the declaration.
2179class DeclUseTracker {
2180 using UseSetTy = llvm::SmallSet<const DeclRefExpr *, 16>;
2181 using DefMapTy = llvm::DenseMap<const VarDecl *, const DeclStmt *>;
2182
2183 // Allocate on the heap for easier move.
2184 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
2185 DefMapTy Defs{};
2186
2187public:
2188 DeclUseTracker() = default;
2189 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
2190 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
2191 DeclUseTracker(DeclUseTracker &&) = default;
2192 DeclUseTracker &operator=(DeclUseTracker &&) = default;
2193
2194 // Start tracking a freshly discovered DRE.
2195 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(Ptr: DRE); }
2196
2197 // Stop tracking the DRE as it's been fully figured out.
2198 void claimUse(const DeclRefExpr *DRE) {
2199 assert(Uses->count(DRE) &&
2200 "DRE not found or claimed by multiple matchers!");
2201 Uses->erase(Ptr: DRE);
2202 }
2203
2204 // A variable is unclaimed if at least one use is unclaimed.
2205 bool hasUnclaimedUses(const VarDecl *VD) const {
2206 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
2207 return any_of(Range&: *Uses, P: [VD](const DeclRefExpr *DRE) {
2208 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
2209 });
2210 }
2211
2212 UseSetTy getUnclaimedUses(const VarDecl *VD) const {
2213 UseSetTy ReturnSet;
2214 for (auto use : *Uses) {
2215 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) {
2216 ReturnSet.insert(Ptr: use);
2217 }
2218 }
2219 return ReturnSet;
2220 }
2221
2222 void discoverDecl(const DeclStmt *DS) {
2223 for (const Decl *D : DS->decls()) {
2224 if (const auto *VD = dyn_cast<VarDecl>(Val: D)) {
2225 // FIXME: Assertion temporarily disabled due to a bug in
2226 // ASTMatcher internal behavior in presence of GNU
2227 // statement-expressions. We need to properly investigate this
2228 // because it can screw up our algorithm in other ways.
2229 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
2230 Defs[VD] = DS;
2231 }
2232 }
2233 }
2234
2235 const DeclStmt *lookupDecl(const VarDecl *VD) const {
2236 return Defs.lookup(Val: VD);
2237 }
2238};
2239} // namespace
2240
2241// Representing a pointer type expression of the form `++Ptr` in an Unspecified
2242// Pointer Context (UPC):
2243class UPCPreIncrementGadget : public FixableGadget {
2244private:
2245 static constexpr const char *const UPCPreIncrementTag =
2246 "PointerPreIncrementUnderUPC";
2247 const UnaryOperator *Node; // the `++Ptr` node
2248
2249public:
2250 UPCPreIncrementGadget(const MatchResult &Result)
2251 : FixableGadget(Kind::UPCPreIncrement),
2252 Node(Result.getNodeAs<UnaryOperator>(ID: UPCPreIncrementTag)) {
2253 assert(Node != nullptr && "Expecting a non-null matching result");
2254 }
2255
2256 static bool classof(const Gadget *G) {
2257 return G->getKind() == Kind::UPCPreIncrement;
2258 }
2259
2260 static bool matches(const Stmt *S,
2261 llvm::SmallVectorImpl<MatchResult> &Results) {
2262 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
2263 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
2264 // can have the matcher be general, so long as `getClaimedVarUseSites` does
2265 // things right.
2266 size_t SizeBefore = Results.size();
2267 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2268 auto *E = dyn_cast<Expr>(Val: S);
2269 if (!E)
2270 return;
2271 const auto *UO = dyn_cast<UnaryOperator>(Val: E->IgnoreImpCasts());
2272 if (!UO || UO->getOpcode() != UO_PreInc)
2273 return;
2274 const auto *DRE = dyn_cast<DeclRefExpr>(Val: UO->getSubExpr());
2275 if (!DRE || !isSupportedVariable(Node: *DRE))
2276 return;
2277 MatchResult R;
2278 R.addNode(ID: UPCPreIncrementTag, Node: DynTypedNode::create(Node: *UO));
2279 Results.emplace_back(Args: std::move(R));
2280 });
2281 return SizeBefore != Results.size();
2282 }
2283
2284 virtual std::optional<FixItList>
2285 getFixits(const FixitStrategy &S) const override;
2286 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2287
2288 virtual DeclUseList getClaimedVarUseSites() const override {
2289 return {dyn_cast<DeclRefExpr>(Val: Node->getSubExpr())};
2290 }
2291};
2292
2293// Representing a pointer type expression of the form `Ptr += n` in an
2294// Unspecified Untyped Context (UUC):
2295class UUCAddAssignGadget : public FixableGadget {
2296private:
2297 static constexpr const char *const UUCAddAssignTag =
2298 "PointerAddAssignUnderUUC";
2299 static constexpr const char *const OffsetTag = "Offset";
2300
2301 const BinaryOperator *Node; // the `Ptr += n` node
2302 const Expr *Offset = nullptr;
2303
2304public:
2305 UUCAddAssignGadget(const MatchResult &Result)
2306 : FixableGadget(Kind::UUCAddAssign),
2307 Node(Result.getNodeAs<BinaryOperator>(ID: UUCAddAssignTag)),
2308 Offset(Result.getNodeAs<Expr>(ID: OffsetTag)) {
2309 assert(Node != nullptr && "Expecting a non-null matching result");
2310 }
2311
2312 static bool classof(const Gadget *G) {
2313 return G->getKind() == Kind::UUCAddAssign;
2314 }
2315
2316 static bool matches(const Stmt *S,
2317 llvm::SmallVectorImpl<MatchResult> &Results) {
2318 size_t SizeBefore = Results.size();
2319 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
2320 const auto *E = dyn_cast<Expr>(Val: S);
2321 if (!E)
2322 return;
2323 const auto *BO = dyn_cast<BinaryOperator>(Val: E->IgnoreImpCasts());
2324 if (!BO || BO->getOpcode() != BO_AddAssign)
2325 return;
2326 const auto *DRE = dyn_cast<DeclRefExpr>(Val: BO->getLHS());
2327 if (!DRE || !hasPointerType(E: *DRE) || !isSupportedVariable(Node: *DRE))
2328 return;
2329 MatchResult R;
2330 R.addNode(ID: UUCAddAssignTag, Node: DynTypedNode::create(Node: *BO));
2331 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *BO->getRHS()));
2332 Results.emplace_back(Args: std::move(R));
2333 });
2334 return SizeBefore != Results.size();
2335 }
2336
2337 virtual std::optional<FixItList>
2338 getFixits(const FixitStrategy &S) const override;
2339 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2340
2341 virtual DeclUseList getClaimedVarUseSites() const override {
2342 return {dyn_cast<DeclRefExpr>(Val: Node->getLHS())};
2343 }
2344};
2345
2346// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
2347// ptr)`:
2348class DerefSimplePtrArithFixableGadget : public FixableGadget {
2349 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
2350 static constexpr const char *const DerefOpTag = "DerefOp";
2351 static constexpr const char *const AddOpTag = "AddOp";
2352 static constexpr const char *const OffsetTag = "Offset";
2353
2354 const DeclRefExpr *BaseDeclRefExpr = nullptr;
2355 const UnaryOperator *DerefOp = nullptr;
2356 const BinaryOperator *AddOp = nullptr;
2357 const IntegerLiteral *Offset = nullptr;
2358
2359public:
2360 DerefSimplePtrArithFixableGadget(const MatchResult &Result)
2361 : FixableGadget(Kind::DerefSimplePtrArithFixable),
2362 BaseDeclRefExpr(Result.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
2363 DerefOp(Result.getNodeAs<UnaryOperator>(ID: DerefOpTag)),
2364 AddOp(Result.getNodeAs<BinaryOperator>(ID: AddOpTag)),
2365 Offset(Result.getNodeAs<IntegerLiteral>(ID: OffsetTag)) {}
2366
2367 static bool matches(const Stmt *S,
2368 llvm::SmallVectorImpl<MatchResult> &Results) {
2369 auto IsPtr = [](const Expr *E, MatchResult &R) {
2370 if (!E || !hasPointerType(E: *E))
2371 return false;
2372 const auto *DRE = dyn_cast<DeclRefExpr>(Val: E->IgnoreImpCasts());
2373 if (!DRE || !isSupportedVariable(Node: *DRE))
2374 return false;
2375 R.addNode(ID: BaseDeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2376 return true;
2377 };
2378 const auto IsPlusOverPtrAndInteger = [&IsPtr](const Expr *E,
2379 MatchResult &R) {
2380 const auto *BO = dyn_cast<BinaryOperator>(Val: E);
2381 if (!BO || BO->getOpcode() != BO_Add)
2382 return false;
2383
2384 const auto *LHS = BO->getLHS();
2385 const auto *RHS = BO->getRHS();
2386 if (isa<IntegerLiteral>(Val: RHS) && IsPtr(LHS, R)) {
2387 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *RHS));
2388 R.addNode(ID: AddOpTag, Node: DynTypedNode::create(Node: *BO));
2389 return true;
2390 }
2391 if (isa<IntegerLiteral>(Val: LHS) && IsPtr(RHS, R)) {
2392 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *LHS));
2393 R.addNode(ID: AddOpTag, Node: DynTypedNode::create(Node: *BO));
2394 return true;
2395 }
2396 return false;
2397 };
2398 size_t SizeBefore = Results.size();
2399 const auto InnerMatcher = [&IsPlusOverPtrAndInteger,
2400 &Results](const Expr *E) {
2401 const auto *UO = dyn_cast<UnaryOperator>(Val: E);
2402 if (!UO || UO->getOpcode() != UO_Deref)
2403 return;
2404
2405 const auto *Operand = UO->getSubExpr()->IgnoreParens();
2406 MatchResult R;
2407 if (IsPlusOverPtrAndInteger(Operand, R)) {
2408 R.addNode(ID: DerefOpTag, Node: DynTypedNode::create(Node: *UO));
2409 Results.emplace_back(Args: std::move(R));
2410 }
2411 };
2412 findStmtsInUnspecifiedLvalueContext(S, OnResult: InnerMatcher);
2413 return SizeBefore != Results.size();
2414 }
2415
2416 virtual std::optional<FixItList>
2417 getFixits(const FixitStrategy &s) const final;
2418 SourceLocation getSourceLoc() const override {
2419 return DerefOp->getBeginLoc();
2420 }
2421
2422 virtual DeclUseList getClaimedVarUseSites() const final {
2423 return {BaseDeclRefExpr};
2424 }
2425};
2426
2427class WarningGadgetMatcher : public FastMatcher {
2428
2429public:
2430 WarningGadgetMatcher(WarningGadgetList &WarningGadgets)
2431 : WarningGadgets(WarningGadgets) {}
2432
2433 bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
2434 const UnsafeBufferUsageHandler &Handler) override {
2435 const Stmt *S = DynNode.get<Stmt>();
2436 if (!S)
2437 return false;
2438
2439 MatchResult Result;
2440#define WARNING_GADGET(name) \
2441 if (name##Gadget::matches(S, Ctx, Result) && \
2442 notInSafeBufferOptOut(*S, &Handler)) { \
2443 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2444 return true; \
2445 }
2446#define WARNING_OPTIONAL_GADGET(name) \
2447 if (name##Gadget::matches(S, Ctx, &Handler, Result) && \
2448 notInSafeBufferOptOut(*S, &Handler)) { \
2449 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2450 return true; \
2451 }
2452#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2453 return false;
2454 }
2455
2456private:
2457 WarningGadgetList &WarningGadgets;
2458};
2459
2460class FixableGadgetMatcher : public FastMatcher {
2461
2462public:
2463 FixableGadgetMatcher(FixableGadgetList &FixableGadgets,
2464 DeclUseTracker &Tracker)
2465 : FixableGadgets(FixableGadgets), Tracker(Tracker) {}
2466
2467 bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
2468 const UnsafeBufferUsageHandler &Handler) override {
2469 bool matchFound = false;
2470 const Stmt *S = DynNode.get<Stmt>();
2471 if (!S) {
2472 return matchFound;
2473 }
2474
2475 llvm::SmallVector<MatchResult> Results;
2476#define FIXABLE_GADGET(name) \
2477 if (name##Gadget::matches(S, Results)) { \
2478 for (const auto &R : Results) { \
2479 FixableGadgets.push_back(std::make_unique<name##Gadget>(R)); \
2480 matchFound = true; \
2481 } \
2482 Results = {}; \
2483 }
2484#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2485 // In parallel, match all DeclRefExprs so that to find out
2486 // whether there are any uncovered by gadgets.
2487 if (auto *DRE = findDeclRefExpr(S); DRE) {
2488 Tracker.discoverUse(DRE);
2489 matchFound = true;
2490 }
2491 // Also match DeclStmts because we'll need them when fixing
2492 // their underlying VarDecls that otherwise don't have
2493 // any backreferences to DeclStmts.
2494 if (auto *DS = findDeclStmt(S); DS) {
2495 Tracker.discoverDecl(DS);
2496 matchFound = true;
2497 }
2498 return matchFound;
2499 }
2500
2501private:
2502 const DeclRefExpr *findDeclRefExpr(const Stmt *S) {
2503 const auto *DRE = dyn_cast<DeclRefExpr>(Val: S);
2504 if (!DRE || (!hasPointerType(E: *DRE) && !hasArrayType(E: *DRE)))
2505 return nullptr;
2506 const Decl *D = DRE->getDecl();
2507 if (!D || (!isa<VarDecl>(Val: D) && !isa<BindingDecl>(Val: D)))
2508 return nullptr;
2509 return DRE;
2510 }
2511 const DeclStmt *findDeclStmt(const Stmt *S) {
2512 const auto *DS = dyn_cast<DeclStmt>(Val: S);
2513 if (!DS)
2514 return nullptr;
2515 return DS;
2516 }
2517 FixableGadgetList &FixableGadgets;
2518 DeclUseTracker &Tracker;
2519};
2520
2521// Scan the function and return a list of gadgets found with provided kits.
2522static void findGadgets(const Stmt *S, ASTContext &Ctx,
2523 const UnsafeBufferUsageHandler &Handler,
2524 bool EmitSuggestions, FixableGadgetList &FixableGadgets,
2525 WarningGadgetList &WarningGadgets,
2526 DeclUseTracker &Tracker) {
2527 WarningGadgetMatcher WMatcher{WarningGadgets};
2528 forEachDescendantEvaluatedStmt(S, Ctx, Handler, Matcher&: WMatcher);
2529 if (EmitSuggestions) {
2530 FixableGadgetMatcher FMatcher{FixableGadgets, Tracker};
2531 forEachDescendantStmt(S, Ctx, Handler, Matcher&: FMatcher);
2532 }
2533}
2534
2535// Compares AST nodes by source locations.
2536template <typename NodeTy> struct CompareNode {
2537 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
2538 return N1->getBeginLoc().getRawEncoding() <
2539 N2->getBeginLoc().getRawEncoding();
2540 }
2541};
2542
2543std::set<const Expr *> clang::findUnsafePointers(const FunctionDecl *FD) {
2544 class MockReporter : public UnsafeBufferUsageHandler {
2545 public:
2546 MockReporter() {}
2547 void handleUnsafeOperation(const Stmt *, bool, ASTContext &) override {}
2548 void handleUnsafeLibcCall(const CallExpr *, unsigned, ASTContext &,
2549 const Expr *UnsafeArg = nullptr) override {}
2550 void handleUnsafeOperationInContainer(const Stmt *, bool,
2551 ASTContext &) override {}
2552 void handleUnsafeVariableGroup(const VarDecl *,
2553 const VariableGroupsManager &, FixItList &&,
2554 const Decl *,
2555 const FixitStrategy &) override {}
2556 bool isSafeBufferOptOut(const SourceLocation &) const override {
2557 return false;
2558 }
2559 bool ignoreUnsafeBufferInContainer(const SourceLocation &) const override {
2560 return false;
2561 }
2562 bool ignoreUnsafeBufferInLibcCall(const SourceLocation &) const override {
2563 return false;
2564 }
2565 std::string getUnsafeBufferUsageAttributeTextAt(
2566 SourceLocation, StringRef WSSuffix = "") const override {
2567 return "";
2568 }
2569 };
2570
2571 FixableGadgetList FixableGadgets;
2572 WarningGadgetList WarningGadgets;
2573 DeclUseTracker Tracker;
2574 MockReporter IgnoreHandler;
2575
2576 findGadgets(S: FD->getBody(), Ctx&: FD->getASTContext(), Handler: IgnoreHandler, EmitSuggestions: false,
2577 FixableGadgets, WarningGadgets, Tracker);
2578
2579 std::set<const Expr *> Result;
2580 for (auto &G : WarningGadgets) {
2581 for (const Expr *E : G->getUnsafePtrs()) {
2582 Result.insert(x: E);
2583 }
2584 }
2585
2586 return Result;
2587}
2588
2589struct WarningGadgetSets {
2590 std::map<const VarDecl *, std::set<const WarningGadget *>,
2591 // To keep keys sorted by their locations in the map so that the
2592 // order is deterministic:
2593 CompareNode<VarDecl>>
2594 byVar;
2595 // These Gadgets are not related to pointer variables (e. g. temporaries).
2596 llvm::SmallVector<const WarningGadget *, 16> noVar;
2597};
2598
2599static WarningGadgetSets
2600groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
2601 WarningGadgetSets result;
2602 // If some gadgets cover more than one
2603 // variable, they'll appear more than once in the map.
2604 for (auto &G : AllUnsafeOperations) {
2605 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
2606
2607 bool AssociatedWithVarDecl = false;
2608 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
2609 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2610 result.byVar[VD].insert(x: G.get());
2611 AssociatedWithVarDecl = true;
2612 }
2613 }
2614
2615 if (!AssociatedWithVarDecl) {
2616 result.noVar.push_back(Elt: G.get());
2617 continue;
2618 }
2619 }
2620 return result;
2621}
2622
2623struct FixableGadgetSets {
2624 std::map<const VarDecl *, std::set<const FixableGadget *>,
2625 // To keep keys sorted by their locations in the map so that the
2626 // order is deterministic:
2627 CompareNode<VarDecl>>
2628 byVar;
2629};
2630
2631static FixableGadgetSets
2632groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
2633 FixableGadgetSets FixablesForUnsafeVars;
2634 for (auto &F : AllFixableOperations) {
2635 DeclUseList DREs = F->getClaimedVarUseSites();
2636
2637 for (const DeclRefExpr *DRE : DREs) {
2638 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2639 FixablesForUnsafeVars.byVar[VD].insert(x: F.get());
2640 }
2641 }
2642 }
2643 return FixablesForUnsafeVars;
2644}
2645
2646bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
2647 const SourceManager &SM) {
2648 // A simple interval overlap detection algorithm. Sorts all ranges by their
2649 // begin location then finds the first overlap in one pass.
2650 std::vector<const FixItHint *> All; // a copy of `FixIts`
2651
2652 for (const FixItHint &H : FixIts)
2653 All.push_back(x: &H);
2654 std::sort(first: All.begin(), last: All.end(),
2655 comp: [&SM](const FixItHint *H1, const FixItHint *H2) {
2656 return SM.isBeforeInTranslationUnit(LHS: H1->RemoveRange.getBegin(),
2657 RHS: H2->RemoveRange.getBegin());
2658 });
2659
2660 const FixItHint *CurrHint = nullptr;
2661
2662 for (const FixItHint *Hint : All) {
2663 if (!CurrHint ||
2664 SM.isBeforeInTranslationUnit(LHS: CurrHint->RemoveRange.getEnd(),
2665 RHS: Hint->RemoveRange.getBegin())) {
2666 // Either to initialize `CurrHint` or `CurrHint` does not
2667 // overlap with `Hint`:
2668 CurrHint = Hint;
2669 } else
2670 // In case `Hint` overlaps the `CurrHint`, we found at least one
2671 // conflict:
2672 return true;
2673 }
2674 return false;
2675}
2676
2677std::optional<FixItList>
2678PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
2679 const auto *LeftVD = cast<VarDecl>(Val: PtrLHS->getDecl());
2680 const auto *RightVD = cast<VarDecl>(Val: PtrRHS->getDecl());
2681 switch (S.lookup(VD: LeftVD)) {
2682 case FixitStrategy::Kind::Span:
2683 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
2684 return FixItList{};
2685 return std::nullopt;
2686 case FixitStrategy::Kind::Wontfix:
2687 return std::nullopt;
2688 case FixitStrategy::Kind::Iterator:
2689 case FixitStrategy::Kind::Array:
2690 return std::nullopt;
2691 case FixitStrategy::Kind::Vector:
2692 llvm_unreachable("unsupported strategies for FixableGadgets");
2693 }
2694 return std::nullopt;
2695}
2696
2697/// \returns fixit that adds .data() call after \DRE.
2698static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
2699 const DeclRefExpr *DRE);
2700
2701std::optional<FixItList>
2702CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
2703 const auto *LeftVD = cast<VarDecl>(Val: PtrLHS->getDecl());
2704 const auto *RightVD = cast<VarDecl>(Val: PtrRHS->getDecl());
2705 // TLDR: Implementing fixits for non-Wontfix strategy on both LHS and RHS is
2706 // non-trivial.
2707 //
2708 // CArrayToPtrAssignmentGadget doesn't have strategy implications because
2709 // constant size array propagates its bounds. Because of that LHS and RHS are
2710 // addressed by two different fixits.
2711 //
2712 // At the same time FixitStrategy S doesn't reflect what group a fixit belongs
2713 // to and can't be generally relied on in multi-variable Fixables!
2714 //
2715 // E. g. If an instance of this gadget is fixing variable on LHS then the
2716 // variable on RHS is fixed by a different fixit and its strategy for LHS
2717 // fixit is as if Wontfix.
2718 //
2719 // The only exception is Wontfix strategy for a given variable as that is
2720 // valid for any fixit produced for the given input source code.
2721 if (S.lookup(VD: LeftVD) == FixitStrategy::Kind::Span) {
2722 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Wontfix) {
2723 return FixItList{};
2724 }
2725 } else if (S.lookup(VD: LeftVD) == FixitStrategy::Kind::Wontfix) {
2726 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Array) {
2727 return createDataFixit(Ctx: RightVD->getASTContext(), DRE: PtrRHS);
2728 }
2729 }
2730 return std::nullopt;
2731}
2732
2733std::optional<FixItList>
2734PointerInitGadget::getFixits(const FixitStrategy &S) const {
2735 const auto *LeftVD = PtrInitLHS;
2736 const auto *RightVD = cast<VarDecl>(Val: PtrInitRHS->getDecl());
2737 switch (S.lookup(VD: LeftVD)) {
2738 case FixitStrategy::Kind::Span:
2739 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
2740 return FixItList{};
2741 return std::nullopt;
2742 case FixitStrategy::Kind::Wontfix:
2743 return std::nullopt;
2744 case FixitStrategy::Kind::Iterator:
2745 case FixitStrategy::Kind::Array:
2746 return std::nullopt;
2747 case FixitStrategy::Kind::Vector:
2748 llvm_unreachable("unsupported strategies for FixableGadgets");
2749 }
2750 return std::nullopt;
2751}
2752
2753static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD,
2754 const ASTContext &Ctx) {
2755 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) {
2756 if (ConstVal->isNegative())
2757 return false;
2758 } else if (!Expr->getType()->isUnsignedIntegerType())
2759 return false;
2760 return true;
2761}
2762
2763std::optional<FixItList>
2764ULCArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
2765 if (const auto *DRE =
2766 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts()))
2767 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2768 switch (S.lookup(VD)) {
2769 case FixitStrategy::Kind::Span: {
2770
2771 // If the index has a negative constant value, we give up as no valid
2772 // fix-it can be generated:
2773 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
2774 VD->getASTContext();
2775 if (!isNonNegativeIntegerExpr(Expr: Node->getIdx(), VD, Ctx))
2776 return std::nullopt;
2777 // no-op is a good fix-it, otherwise
2778 return FixItList{};
2779 }
2780 case FixitStrategy::Kind::Array:
2781 return FixItList{};
2782 case FixitStrategy::Kind::Wontfix:
2783 case FixitStrategy::Kind::Iterator:
2784 case FixitStrategy::Kind::Vector:
2785 llvm_unreachable("unsupported strategies for FixableGadgets");
2786 }
2787 }
2788 return std::nullopt;
2789}
2790
2791static std::optional<FixItList> // forward declaration
2792fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
2793
2794std::optional<FixItList>
2795UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
2796 auto DREs = getClaimedVarUseSites();
2797 const auto *VD = cast<VarDecl>(Val: DREs.front()->getDecl());
2798
2799 switch (S.lookup(VD)) {
2800 case FixitStrategy::Kind::Span:
2801 return fixUPCAddressofArraySubscriptWithSpan(Node);
2802 case FixitStrategy::Kind::Wontfix:
2803 case FixitStrategy::Kind::Iterator:
2804 case FixitStrategy::Kind::Array:
2805 return std::nullopt;
2806 case FixitStrategy::Kind::Vector:
2807 llvm_unreachable("unsupported strategies for FixableGadgets");
2808 }
2809 return std::nullopt; // something went wrong, no fix-it
2810}
2811
2812// FIXME: this function should be customizable through format
2813static StringRef getEndOfLine() {
2814 static const char *const EOL = "\n";
2815 return EOL;
2816}
2817
2818// Returns the text indicating that the user needs to provide input there:
2819static std::string
2820getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
2821 std::string s = std::string("<# ");
2822 s += HintTextToUser;
2823 s += " #>";
2824 return s;
2825}
2826
2827// Return the source location of the last character of the AST `Node`.
2828template <typename NodeTy>
2829static std::optional<SourceLocation>
2830getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
2831 const LangOptions &LangOpts) {
2832 if (unsigned TkLen =
2833 Lexer::MeasureTokenLength(Loc: Node->getEndLoc(), SM, LangOpts)) {
2834 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
2835
2836 if (Loc.isValid())
2837 return Loc;
2838 }
2839 return std::nullopt;
2840}
2841
2842// We cannot fix a variable declaration if it has some other specifiers than the
2843// type specifier. Because the source ranges of those specifiers could overlap
2844// with the source range that is being replaced using fix-its. Especially when
2845// we often cannot obtain accurate source ranges of cv-qualified type
2846// specifiers.
2847// FIXME: also deal with type attributes
2848static bool hasUnsupportedSpecifiers(const VarDecl *VD,
2849 const SourceManager &SM) {
2850 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
2851 // source range of `VD`:
2852 bool AttrRangeOverlapping = llvm::any_of(Range: VD->attrs(), P: [&](Attr *At) -> bool {
2853 return !(SM.isBeforeInTranslationUnit(LHS: At->getRange().getEnd(),
2854 RHS: VD->getBeginLoc())) &&
2855 !(SM.isBeforeInTranslationUnit(LHS: VD->getEndLoc(),
2856 RHS: At->getRange().getBegin()));
2857 });
2858 return VD->isInlineSpecified() || VD->isConstexpr() ||
2859 VD->hasConstantInitialization() || !VD->hasLocalStorage() ||
2860 AttrRangeOverlapping;
2861}
2862
2863// Returns the `SourceRange` of `D`. The reason why this function exists is
2864// that `D->getSourceRange()` may return a range where the end location is the
2865// starting location of the last token. The end location of the source range
2866// returned by this function is the last location of the last token.
2867static SourceRange getSourceRangeToTokenEnd(const Decl *D,
2868 const SourceManager &SM,
2869 const LangOptions &LangOpts) {
2870 SourceLocation Begin = D->getBeginLoc();
2871 SourceLocation
2872 End = // `D->getEndLoc` should always return the starting location of the
2873 // last token, so we should get the end of the token
2874 Lexer::getLocForEndOfToken(Loc: D->getEndLoc(), Offset: 0, SM, LangOpts);
2875
2876 return SourceRange(Begin, End);
2877}
2878
2879// Returns the text of the name (with qualifiers) of a `FunctionDecl`.
2880static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
2881 const SourceManager &SM,
2882 const LangOptions &LangOpts) {
2883 SourceLocation BeginLoc = FD->getQualifier()
2884 ? FD->getQualifierLoc().getBeginLoc()
2885 : FD->getNameInfo().getBeginLoc();
2886 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
2887 // last token:
2888 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
2889 Loc: FD->getNameInfo().getEndLoc(), Offset: 0, SM, LangOpts);
2890 SourceRange NameRange{BeginLoc, EndLoc};
2891
2892 return getRangeText(SR: NameRange, SM, LangOpts);
2893}
2894
2895// Returns the text representing a `std::span` type where the element type is
2896// represented by `EltTyText`.
2897//
2898// Note the optional parameter `Qualifiers`: one needs to pass qualifiers
2899// explicitly if the element type needs to be qualified.
2900static std::string
2901getSpanTypeText(StringRef EltTyText,
2902 std::optional<Qualifiers> Quals = std::nullopt) {
2903 const char *const SpanOpen = "std::span<";
2904
2905 if (Quals)
2906 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>';
2907 return SpanOpen + EltTyText.str() + '>';
2908}
2909
2910std::optional<FixItList>
2911DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy &s) const {
2912 const VarDecl *VD = dyn_cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
2913
2914 if (VD && s.lookup(VD) == FixitStrategy::Kind::Span) {
2915 ASTContext &Ctx = VD->getASTContext();
2916 // std::span can't represent elements before its begin()
2917 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
2918 if (ConstVal->isNegative())
2919 return std::nullopt;
2920
2921 // note that the expr may (oddly) has multiple layers of parens
2922 // example:
2923 // *((..(pointer + 123)..))
2924 // goal:
2925 // pointer[123]
2926 // Fix-It:
2927 // remove '*('
2928 // replace ' + ' with '['
2929 // replace ')' with ']'
2930
2931 // example:
2932 // *((..(123 + pointer)..))
2933 // goal:
2934 // 123[pointer]
2935 // Fix-It:
2936 // remove '*('
2937 // replace ' + ' with '['
2938 // replace ')' with ']'
2939
2940 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
2941 const SourceManager &SM = Ctx.getSourceManager();
2942 const LangOptions &LangOpts = Ctx.getLangOpts();
2943 CharSourceRange StarWithTrailWhitespace =
2944 clang::CharSourceRange::getCharRange(B: DerefOp->getOperatorLoc(),
2945 E: LHS->getBeginLoc());
2946
2947 std::optional<SourceLocation> LHSLocation = getPastLoc(Node: LHS, SM, LangOpts);
2948 if (!LHSLocation)
2949 return std::nullopt;
2950
2951 CharSourceRange PlusWithSurroundingWhitespace =
2952 clang::CharSourceRange::getCharRange(B: *LHSLocation, E: RHS->getBeginLoc());
2953
2954 std::optional<SourceLocation> AddOpLocation =
2955 getPastLoc(Node: AddOp, SM, LangOpts);
2956 std::optional<SourceLocation> DerefOpLocation =
2957 getPastLoc(Node: DerefOp, SM, LangOpts);
2958
2959 if (!AddOpLocation || !DerefOpLocation)
2960 return std::nullopt;
2961
2962 CharSourceRange ClosingParenWithPrecWhitespace =
2963 clang::CharSourceRange::getCharRange(B: *AddOpLocation, E: *DerefOpLocation);
2964
2965 return FixItList{
2966 {FixItHint::CreateRemoval(RemoveRange: StarWithTrailWhitespace),
2967 FixItHint::CreateReplacement(RemoveRange: PlusWithSurroundingWhitespace, Code: "["),
2968 FixItHint::CreateReplacement(RemoveRange: ClosingParenWithPrecWhitespace, Code: "]")}};
2969 }
2970 return std::nullopt; // something wrong or unsupported, give up
2971}
2972
2973std::optional<FixItList>
2974PointerDereferenceGadget::getFixits(const FixitStrategy &S) const {
2975 const VarDecl *VD = cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
2976 switch (S.lookup(VD)) {
2977 case FixitStrategy::Kind::Span: {
2978 ASTContext &Ctx = VD->getASTContext();
2979 SourceManager &SM = Ctx.getSourceManager();
2980 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
2981 // Deletes the *operand
2982 CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
2983 B: Op->getBeginLoc(), E: Op->getBeginLoc().getLocWithOffset(Offset: 1));
2984 // Inserts the [0]
2985 if (auto LocPastOperand =
2986 getPastLoc(Node: BaseDeclRefExpr, SM, LangOpts: Ctx.getLangOpts())) {
2987 return FixItList{{FixItHint::CreateRemoval(RemoveRange: derefRange),
2988 FixItHint::CreateInsertion(InsertionLoc: *LocPastOperand, Code: "[0]")}};
2989 }
2990 break;
2991 }
2992 case FixitStrategy::Kind::Iterator:
2993 case FixitStrategy::Kind::Array:
2994 return std::nullopt;
2995 case FixitStrategy::Kind::Vector:
2996 llvm_unreachable("FixitStrategy not implemented yet!");
2997 case FixitStrategy::Kind::Wontfix:
2998 llvm_unreachable("Invalid strategy!");
2999 }
3000
3001 return std::nullopt;
3002}
3003
3004static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
3005 const DeclRefExpr *DRE) {
3006 const SourceManager &SM = Ctx.getSourceManager();
3007 // Inserts the .data() after the DRE
3008 std::optional<SourceLocation> EndOfOperand =
3009 getPastLoc(Node: DRE, SM, LangOpts: Ctx.getLangOpts());
3010
3011 if (EndOfOperand)
3012 return FixItList{{FixItHint::CreateInsertion(InsertionLoc: *EndOfOperand, Code: ".data()")}};
3013
3014 return std::nullopt;
3015}
3016
3017// Generates fix-its replacing an expression of the form UPC(DRE) with
3018// `DRE.data()`
3019std::optional<FixItList>
3020UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const {
3021 const auto VD = cast<VarDecl>(Val: Node->getDecl());
3022 switch (S.lookup(VD)) {
3023 case FixitStrategy::Kind::Array:
3024 case FixitStrategy::Kind::Span: {
3025 return createDataFixit(Ctx: VD->getASTContext(), DRE: Node);
3026 // FIXME: Points inside a macro expansion.
3027 break;
3028 }
3029 case FixitStrategy::Kind::Wontfix:
3030 case FixitStrategy::Kind::Iterator:
3031 return std::nullopt;
3032 case FixitStrategy::Kind::Vector:
3033 llvm_unreachable("unsupported strategies for FixableGadgets");
3034 }
3035
3036 return std::nullopt;
3037}
3038
3039// Generates fix-its replacing an expression of the form `&DRE[e]` with
3040// `&DRE.data()[e]`:
3041static std::optional<FixItList>
3042fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
3043 const auto *ArraySub = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
3044 const auto *DRE = cast<DeclRefExpr>(Val: ArraySub->getBase()->IgnoreImpCasts());
3045 // FIXME: this `getASTContext` call is costly, we should pass the
3046 // ASTContext in:
3047 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
3048 const Expr *Idx = ArraySub->getIdx();
3049 const SourceManager &SM = Ctx.getSourceManager();
3050 const LangOptions &LangOpts = Ctx.getLangOpts();
3051 std::stringstream SS;
3052 bool IdxIsLitZero = false;
3053
3054 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
3055 if ((*ICE).isZero())
3056 IdxIsLitZero = true;
3057 std::optional<StringRef> DreString = getExprText(E: DRE, SM, LangOpts);
3058 if (!DreString)
3059 return std::nullopt;
3060
3061 if (IdxIsLitZero) {
3062 // If the index is literal zero, we produce the most concise fix-it:
3063 SS << (*DreString).str() << ".data()";
3064 } else {
3065 std::optional<StringRef> IndexString = getExprText(E: Idx, SM, LangOpts);
3066 if (!IndexString)
3067 return std::nullopt;
3068
3069 SS << "&" << (*DreString).str() << ".data()"
3070 << "[" << (*IndexString).str() << "]";
3071 }
3072 return FixItList{
3073 FixItHint::CreateReplacement(RemoveRange: Node->getSourceRange(), Code: SS.str())};
3074}
3075
3076std::optional<FixItList>
3077UUCAddAssignGadget::getFixits(const FixitStrategy &S) const {
3078 DeclUseList DREs = getClaimedVarUseSites();
3079
3080 if (DREs.size() != 1)
3081 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
3082 // give up
3083 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
3084 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
3085 FixItList Fixes;
3086
3087 const Stmt *AddAssignNode = Node;
3088 StringRef varName = VD->getName();
3089 const ASTContext &Ctx = VD->getASTContext();
3090
3091 if (!isNonNegativeIntegerExpr(Expr: Offset, VD, Ctx))
3092 return std::nullopt;
3093
3094 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
3095 bool NotParenExpr =
3096 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc());
3097 std::string SS = varName.str() + " = " + varName.str() + ".subspan";
3098 if (NotParenExpr)
3099 SS += "(";
3100
3101 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc(
3102 Node: AddAssignNode, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3103 if (!AddAssignLocation)
3104 return std::nullopt;
3105
3106 Fixes.push_back(Elt: FixItHint::CreateReplacement(
3107 RemoveRange: SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()),
3108 Code: SS));
3109 if (NotParenExpr)
3110 Fixes.push_back(Elt: FixItHint::CreateInsertion(
3111 InsertionLoc: Offset->getEndLoc().getLocWithOffset(Offset: 1), Code: ")"));
3112 return Fixes;
3113 }
3114 }
3115 return std::nullopt; // Not in the cases that we can handle for now, give up.
3116}
3117
3118std::optional<FixItList>
3119UPCPreIncrementGadget::getFixits(const FixitStrategy &S) const {
3120 DeclUseList DREs = getClaimedVarUseSites();
3121
3122 if (DREs.size() != 1)
3123 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
3124 // give up
3125 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
3126 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
3127 FixItList Fixes;
3128 std::stringstream SS;
3129 StringRef varName = VD->getName();
3130 const ASTContext &Ctx = VD->getASTContext();
3131
3132 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
3133 SS << "(" << varName.data() << " = " << varName.data()
3134 << ".subspan(1)).data()";
3135 std::optional<SourceLocation> PreIncLocation =
3136 getEndCharLoc(Node, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3137 if (!PreIncLocation)
3138 return std::nullopt;
3139
3140 Fixes.push_back(Elt: FixItHint::CreateReplacement(
3141 RemoveRange: SourceRange(Node->getBeginLoc(), *PreIncLocation), Code: SS.str()));
3142 return Fixes;
3143 }
3144 }
3145 return std::nullopt; // Not in the cases that we can handle for now, give up.
3146}
3147
3148// For a non-null initializer `Init` of `T *` type, this function returns
3149// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
3150// to output stream.
3151// In many cases, this function cannot figure out the actual extent `S`. It
3152// then will use a place holder to replace `S` to ask users to fill `S` in. The
3153// initializer shall be used to initialize a variable of type `std::span<T>`.
3154// In some cases (e. g. constant size array) the initializer should remain
3155// unchanged and the function returns empty list. In case the function can't
3156// provide the right fixit it will return nullopt.
3157//
3158// FIXME: Support multi-level pointers
3159//
3160// Parameters:
3161// `Init` a pointer to the initializer expression
3162// `Ctx` a reference to the ASTContext
3163static std::optional<FixItList>
3164FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
3165 const StringRef UserFillPlaceHolder) {
3166 const SourceManager &SM = Ctx.getSourceManager();
3167 const LangOptions &LangOpts = Ctx.getLangOpts();
3168
3169 // If `Init` has a constant value that is (or equivalent to) a
3170 // NULL pointer, we use the default constructor to initialize the span
3171 // object, i.e., a `std:span` variable declaration with no initializer.
3172 // So the fix-it is just to remove the initializer.
3173 if (Init->isNullPointerConstant(
3174 Ctx,
3175 // FIXME: Why does this function not ask for `const ASTContext
3176 // &`? It should. Maybe worth an NFC patch later.
3177 NPC: Expr::NullPointerConstantValueDependence::
3178 NPC_ValueDependentIsNotNull)) {
3179 std::optional<SourceLocation> InitLocation =
3180 getEndCharLoc(Node: Init, SM, LangOpts);
3181 if (!InitLocation)
3182 return std::nullopt;
3183
3184 SourceRange SR(Init->getBeginLoc(), *InitLocation);
3185
3186 return FixItList{FixItHint::CreateRemoval(RemoveRange: SR)};
3187 }
3188
3189 FixItList FixIts{};
3190 std::string ExtentText = UserFillPlaceHolder.data();
3191 StringRef One = "1";
3192
3193 // Insert `{` before `Init`:
3194 FixIts.push_back(Elt: FixItHint::CreateInsertion(InsertionLoc: Init->getBeginLoc(), Code: "{"));
3195 // Try to get the data extent. Break into different cases:
3196 if (auto CxxNew = dyn_cast<CXXNewExpr>(Val: Init->IgnoreImpCasts())) {
3197 // In cases `Init` is `new T[n]` and there is no explicit cast over
3198 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
3199 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
3200 // simpler for the case where `Init` is `new T`.
3201 if (const Expr *Ext = CxxNew->getArraySize().value_or(u: nullptr)) {
3202 if (!Ext->HasSideEffects(Ctx)) {
3203 std::optional<StringRef> ExtentString = getExprText(E: Ext, SM, LangOpts);
3204 if (!ExtentString)
3205 return std::nullopt;
3206 ExtentText = *ExtentString;
3207 }
3208 } else if (!CxxNew->isArray())
3209 // Although the initializer is not allocating a buffer, the pointer
3210 // variable could still be used in buffer access operations.
3211 ExtentText = One;
3212 } else if (Ctx.getAsConstantArrayType(T: Init->IgnoreImpCasts()->getType())) {
3213 // std::span has a single parameter constructor for initialization with
3214 // constant size array. The size is auto-deduced as the constructor is a
3215 // function template. The correct fixit is empty - no changes should happen.
3216 return FixItList{};
3217 } else {
3218 // In cases `Init` is of the form `&Var` after stripping of implicit
3219 // casts, where `&` is the built-in operator, the extent is 1.
3220 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Val: Init->IgnoreImpCasts()))
3221 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
3222 isa_and_present<DeclRefExpr>(Val: AddrOfExpr->getSubExpr()))
3223 ExtentText = One;
3224 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
3225 // and explicit casting, etc. etc.
3226 }
3227
3228 SmallString<32> StrBuffer{};
3229 std::optional<SourceLocation> LocPassInit = getPastLoc(Node: Init, SM, LangOpts);
3230
3231 if (!LocPassInit)
3232 return std::nullopt;
3233
3234 StrBuffer.append(RHS: ", ");
3235 StrBuffer.append(RHS: ExtentText);
3236 StrBuffer.append(RHS: "}");
3237 FixIts.push_back(Elt: FixItHint::CreateInsertion(InsertionLoc: *LocPassInit, Code: StrBuffer.str()));
3238 return FixIts;
3239}
3240
3241#ifndef NDEBUG
3242#define DEBUG_NOTE_DECL_FAIL(D, Msg) \
3243 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), \
3244 "failed to produce fixit for declaration '" + \
3245 (D)->getNameAsString() + "'" + (Msg))
3246#else
3247#define DEBUG_NOTE_DECL_FAIL(D, Msg)
3248#endif
3249
3250// For the given variable declaration with a pointer-to-T type, returns the text
3251// `std::span<T>`. If it is unable to generate the text, returns
3252// `std::nullopt`.
3253static std::optional<std::string>
3254createSpanTypeForVarDecl(const VarDecl *VD, const ASTContext &Ctx) {
3255 assert(VD->getType()->isPointerType());
3256
3257 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
3258 std::optional<std::string> PteTyText = getPointeeTypeText(
3259 VD, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts(), QualifiersToAppend: &PteTyQualifiers);
3260
3261 if (!PteTyText)
3262 return std::nullopt;
3263
3264 std::string SpanTyText = "std::span<";
3265
3266 SpanTyText.append(str: *PteTyText);
3267 // Append qualifiers to span element type if any:
3268 if (PteTyQualifiers) {
3269 SpanTyText.append(s: " ");
3270 SpanTyText.append(str: PteTyQualifiers->getAsString());
3271 }
3272 SpanTyText.append(s: ">");
3273 return SpanTyText;
3274}
3275
3276// For a `VarDecl` of the form `T * var (= Init)?`, this
3277// function generates fix-its that
3278// 1) replace `T * var` with `std::span<T> var`; and
3279// 2) change `Init` accordingly to a span constructor, if it exists.
3280//
3281// FIXME: support Multi-level pointers
3282//
3283// Parameters:
3284// `D` a pointer the variable declaration node
3285// `Ctx` a reference to the ASTContext
3286// `UserFillPlaceHolder` the user-input placeholder text
3287// Returns:
3288// the non-empty fix-it list, if fix-its are successfuly generated; empty
3289// list otherwise.
3290static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
3291 const StringRef UserFillPlaceHolder,
3292 UnsafeBufferUsageHandler &Handler) {
3293 if (hasUnsupportedSpecifiers(VD: D, SM: Ctx.getSourceManager()))
3294 return {};
3295
3296 FixItList FixIts{};
3297 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(VD: D, Ctx);
3298
3299 if (!SpanTyText) {
3300 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type");
3301 return {};
3302 }
3303
3304 // Will hold the text for `std::span<T> Ident`:
3305 std::stringstream SS;
3306
3307 SS << *SpanTyText;
3308 // Fix the initializer if it exists:
3309 if (const Expr *Init = D->getInit()) {
3310 std::optional<FixItList> InitFixIts =
3311 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder);
3312 if (!InitFixIts)
3313 return {};
3314 FixIts.insert(I: FixIts.end(), From: std::make_move_iterator(i: InitFixIts->begin()),
3315 To: std::make_move_iterator(i: InitFixIts->end()));
3316 }
3317 // For declaration of the form `T * ident = init;`, we want to replace
3318 // `T * ` with `std::span<T>`.
3319 // We ignore CV-qualifiers so for `T * const ident;` we also want to replace
3320 // just `T *` with `std::span<T>`.
3321 const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc();
3322 if (!EndLocForReplacement.isValid()) {
3323 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
3324 return {};
3325 }
3326 // The only exception is that for `T *ident` we'll add a single space between
3327 // "std::span<T>" and "ident".
3328 // FIXME: The condition is false for identifiers expended from macros.
3329 if (EndLocForReplacement.getLocWithOffset(Offset: 1) == getVarDeclIdentifierLoc(VD: D))
3330 SS << " ";
3331
3332 FixIts.push_back(Elt: FixItHint::CreateReplacement(
3333 RemoveRange: SourceRange(D->getBeginLoc(), EndLocForReplacement), Code: SS.str()));
3334 return FixIts;
3335}
3336
3337static bool hasConflictingOverload(const FunctionDecl *FD) {
3338 return !FD->getDeclContext()->lookup(Name: FD->getDeclName()).isSingleResult();
3339}
3340
3341// For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
3342// types, this function produces fix-its to make the change self-contained. Let
3343// 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
3344// entity defined by the `FunctionDecl` after the change to the parameters.
3345// Fix-its produced by this function are
3346// 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
3347// of 'F';
3348// 2. Create a declaration of "NewF" next to each declaration of `F`;
3349// 3. Create a definition of "F" (as its' original definition is now belongs
3350// to "NewF") next to its original definition. The body of the creating
3351// definition calls to "NewF".
3352//
3353// Example:
3354//
3355// void f(int *p); // original declaration
3356// void f(int *p) { // original definition
3357// p[5];
3358// }
3359//
3360// To change the parameter `p` to be of `std::span<int>` type, we
3361// also add overloads:
3362//
3363// [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
3364// void f(std::span<int> p); // added overload decl
3365// void f(std::span<int> p) { // original def where param is changed
3366// p[5];
3367// }
3368// [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
3369// return f(std::span(p, <# size #>));
3370// }
3371//
3372static std::optional<FixItList>
3373createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD,
3374 const ASTContext &Ctx,
3375 UnsafeBufferUsageHandler &Handler) {
3376 // FIXME: need to make this conflict checking better:
3377 if (hasConflictingOverload(FD))
3378 return std::nullopt;
3379
3380 const SourceManager &SM = Ctx.getSourceManager();
3381 const LangOptions &LangOpts = Ctx.getLangOpts();
3382 const unsigned NumParms = FD->getNumParams();
3383 std::vector<std::string> NewTysTexts(NumParms);
3384 std::vector<bool> ParmsMask(NumParms, false);
3385 bool AtLeastOneParmToFix = false;
3386
3387 for (unsigned i = 0; i < NumParms; i++) {
3388 const ParmVarDecl *PVD = FD->getParamDecl(i);
3389
3390 if (S.lookup(VD: PVD) == FixitStrategy::Kind::Wontfix)
3391 continue;
3392 if (S.lookup(VD: PVD) != FixitStrategy::Kind::Span)
3393 // Not supported, not suppose to happen:
3394 return std::nullopt;
3395
3396 std::optional<Qualifiers> PteTyQuals = std::nullopt;
3397 std::optional<std::string> PteTyText =
3398 getPointeeTypeText(VD: PVD, SM, LangOpts, QualifiersToAppend: &PteTyQuals);
3399
3400 if (!PteTyText)
3401 // something wrong in obtaining the text of the pointee type, give up
3402 return std::nullopt;
3403 // FIXME: whether we should create std::span type depends on the
3404 // FixitStrategy.
3405 NewTysTexts[i] = getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQuals);
3406 ParmsMask[i] = true;
3407 AtLeastOneParmToFix = true;
3408 }
3409 if (!AtLeastOneParmToFix)
3410 // No need to create function overloads:
3411 return {};
3412 // FIXME Respect indentation of the original code.
3413
3414 // A lambda that creates the text representation of a function declaration
3415 // with the new type signatures:
3416 const auto NewOverloadSignatureCreator =
3417 [&SM, &LangOpts, &NewTysTexts,
3418 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
3419 std::stringstream SS;
3420
3421 SS << ";";
3422 SS << getEndOfLine().str();
3423 // Append: ret-type func-name "("
3424 if (auto Prefix = getRangeText(
3425 SR: SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()),
3426 SM, LangOpts))
3427 SS << Prefix->str();
3428 else
3429 return std::nullopt; // give up
3430 // Append: parameter-type-list
3431 const unsigned NumParms = FD->getNumParams();
3432
3433 for (unsigned i = 0; i < NumParms; i++) {
3434 const ParmVarDecl *Parm = FD->getParamDecl(i);
3435
3436 if (Parm->isImplicit())
3437 continue;
3438 if (ParmsMask[i]) {
3439 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
3440 // new type:
3441 SS << NewTysTexts[i];
3442 // print parameter name if provided:
3443 if (IdentifierInfo *II = Parm->getIdentifier())
3444 SS << ' ' << II->getName().str();
3445 } else if (auto ParmTypeText =
3446 getRangeText(SR: getSourceRangeToTokenEnd(D: Parm, SM, LangOpts),
3447 SM, LangOpts)) {
3448 // print the whole `Parm` without modification:
3449 SS << ParmTypeText->str();
3450 } else
3451 return std::nullopt; // something wrong, give up
3452 if (i != NumParms - 1)
3453 SS << ", ";
3454 }
3455 SS << ")";
3456 return SS.str();
3457 };
3458
3459 // A lambda that creates the text representation of a function definition with
3460 // the original signature:
3461 const auto OldOverloadDefCreator =
3462 [&Handler, &SM, &LangOpts, &NewTysTexts,
3463 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
3464 std::stringstream SS;
3465
3466 SS << getEndOfLine().str();
3467 // Append: attr-name ret-type func-name "(" param-list ")" "{"
3468 if (auto FDPrefix = getRangeText(
3469 SR: SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM,
3470 LangOpts))
3471 SS << Handler.getUnsafeBufferUsageAttributeTextAt(Loc: FD->getBeginLoc(), WSSuffix: " ")
3472 << FDPrefix->str() << "{";
3473 else
3474 return std::nullopt;
3475 // Append: "return" func-name "("
3476 if (auto FunQualName = getFunNameText(FD, SM, LangOpts))
3477 SS << "return " << FunQualName->str() << "(";
3478 else
3479 return std::nullopt;
3480
3481 // Append: arg-list
3482 const unsigned NumParms = FD->getNumParams();
3483 for (unsigned i = 0; i < NumParms; i++) {
3484 const ParmVarDecl *Parm = FD->getParamDecl(i);
3485
3486 if (Parm->isImplicit())
3487 continue;
3488 // FIXME: If a parameter has no name, it is unused in the
3489 // definition. So we could just leave it as it is.
3490 if (!Parm->getIdentifier())
3491 // If a parameter of a function definition has no name:
3492 return std::nullopt;
3493 if (ParmsMask[i])
3494 // This is our spanified paramter!
3495 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str()
3496 << ", " << getUserFillPlaceHolder(HintTextToUser: "size") << ")";
3497 else
3498 SS << Parm->getIdentifier()->getName().str();
3499 if (i != NumParms - 1)
3500 SS << ", ";
3501 }
3502 // finish call and the body
3503 SS << ");}" << getEndOfLine().str();
3504 // FIXME: 80-char line formatting?
3505 return SS.str();
3506 };
3507
3508 FixItList FixIts{};
3509 for (FunctionDecl *FReDecl : FD->redecls()) {
3510 std::optional<SourceLocation> Loc = getPastLoc(Node: FReDecl, SM, LangOpts);
3511
3512 if (!Loc)
3513 return {};
3514 if (FReDecl->isThisDeclarationADefinition()) {
3515 assert(FReDecl == FD && "inconsistent function definition");
3516 // Inserts a definition with the old signature to the end of
3517 // `FReDecl`:
3518 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl))
3519 FixIts.emplace_back(Args: FixItHint::CreateInsertion(InsertionLoc: *Loc, Code: *OldOverloadDef));
3520 else
3521 return {}; // give up
3522 } else {
3523 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
3524 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) {
3525 FixIts.emplace_back(Args: FixItHint::CreateInsertion(
3526 InsertionLoc: FReDecl->getBeginLoc(), Code: Handler.getUnsafeBufferUsageAttributeTextAt(
3527 Loc: FReDecl->getBeginLoc(), WSSuffix: " ")));
3528 }
3529 // Inserts a declaration with the new signature to the end of `FReDecl`:
3530 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl))
3531 FixIts.emplace_back(Args: FixItHint::CreateInsertion(InsertionLoc: *Loc, Code: *NewOverloadDecl));
3532 else
3533 return {};
3534 }
3535 }
3536 return FixIts;
3537}
3538
3539// To fix a `ParmVarDecl` to be of `std::span` type.
3540static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
3541 UnsafeBufferUsageHandler &Handler) {
3542 if (hasUnsupportedSpecifiers(VD: PVD, SM: Ctx.getSourceManager())) {
3543 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)");
3544 return {};
3545 }
3546 if (PVD->hasDefaultArg()) {
3547 // FIXME: generate fix-its for default values:
3548 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg");
3549 return {};
3550 }
3551
3552 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
3553 std::optional<std::string> PteTyText = getPointeeTypeText(
3554 VD: PVD, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts(), QualifiersToAppend: &PteTyQualifiers);
3555
3556 if (!PteTyText) {
3557 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type");
3558 return {};
3559 }
3560
3561 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName();
3562
3563 if (!PVDNameText) {
3564 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name");
3565 return {};
3566 }
3567
3568 std::stringstream SS;
3569 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(VD: PVD, Ctx);
3570
3571 if (PteTyQualifiers)
3572 // Append qualifiers if they exist:
3573 SS << getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQualifiers);
3574 else
3575 SS << getSpanTypeText(EltTyText: *PteTyText);
3576 // Append qualifiers to the type of the parameter:
3577 if (PVD->getType().hasQualifiers())
3578 SS << ' ' << PVD->getType().getQualifiers().getAsString();
3579 // Append parameter's name:
3580 SS << ' ' << PVDNameText->str();
3581 // Add replacement fix-it:
3582 return {FixItHint::CreateReplacement(RemoveRange: PVD->getSourceRange(), Code: SS.str())};
3583}
3584
3585static FixItList fixVariableWithSpan(const VarDecl *VD,
3586 const DeclUseTracker &Tracker,
3587 ASTContext &Ctx,
3588 UnsafeBufferUsageHandler &Handler) {
3589 const DeclStmt *DS = Tracker.lookupDecl(VD);
3590 if (!DS) {
3591 DEBUG_NOTE_DECL_FAIL(VD,
3592 " : variables declared this way not implemented yet");
3593 return {};
3594 }
3595 if (!DS->isSingleDecl()) {
3596 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3597 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls");
3598 return {};
3599 }
3600 // Currently DS is an unused variable but we'll need it when
3601 // non-single decls are implemented, where the pointee type name
3602 // and the '*' are spread around the place.
3603 (void)DS;
3604
3605 // FIXME: handle cases where DS has multiple declarations
3606 return fixLocalVarDeclWithSpan(D: VD, Ctx, UserFillPlaceHolder: getUserFillPlaceHolder(), Handler);
3607}
3608
3609static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
3610 UnsafeBufferUsageHandler &Handler) {
3611 FixItList FixIts{};
3612
3613 // Note: the code below expects the declaration to not use any type sugar like
3614 // typedef.
3615 if (auto CAT = Ctx.getAsConstantArrayType(T: D->getType())) {
3616 const QualType &ArrayEltT = CAT->getElementType();
3617 assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
3618 // FIXME: support multi-dimensional arrays
3619 if (isa<clang::ArrayType>(Val: ArrayEltT.getCanonicalType()))
3620 return {};
3621
3622 const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(VD: D);
3623
3624 // Get the spelling of the element type as written in the source file
3625 // (including macros, etc.).
3626 auto MaybeElemTypeTxt =
3627 getRangeText(SR: {D->getBeginLoc(), IdentifierLoc}, SM: Ctx.getSourceManager(),
3628 LangOpts: Ctx.getLangOpts());
3629 if (!MaybeElemTypeTxt)
3630 return {};
3631 const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
3632
3633 // Find the '[' token.
3634 std::optional<Token> NextTok = Lexer::findNextToken(
3635 Loc: IdentifierLoc, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3636 while (NextTok && !NextTok->is(K: tok::l_square) &&
3637 NextTok->getLocation() <= D->getSourceRange().getEnd())
3638 NextTok = Lexer::findNextToken(Loc: NextTok->getLocation(),
3639 SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3640 if (!NextTok)
3641 return {};
3642 const SourceLocation LSqBracketLoc = NextTok->getLocation();
3643
3644 // Get the spelling of the array size as written in the source file
3645 // (including macros, etc.).
3646 auto MaybeArraySizeTxt = getRangeText(
3647 SR: {LSqBracketLoc.getLocWithOffset(Offset: 1), D->getTypeSpecEndLoc()},
3648 SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3649 if (!MaybeArraySizeTxt)
3650 return {};
3651 const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim();
3652 if (ArraySizeTxt.empty()) {
3653 // FIXME: Support array size getting determined from the initializer.
3654 // Examples:
3655 // int arr1[] = {0, 1, 2};
3656 // int arr2{3, 4, 5};
3657 // We might be able to preserve the non-specified size with `auto` and
3658 // `std::to_array`:
3659 // auto arr1 = std::to_array<int>({0, 1, 2});
3660 return {};
3661 }
3662
3663 std::optional<StringRef> IdentText =
3664 getVarDeclIdentifierText(VD: D, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3665
3666 if (!IdentText) {
3667 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
3668 return {};
3669 }
3670
3671 SmallString<32> Replacement;
3672 llvm::raw_svector_ostream OS(Replacement);
3673 OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> "
3674 << IdentText->str();
3675
3676 FixIts.push_back(Elt: FixItHint::CreateReplacement(
3677 RemoveRange: SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, Code: OS.str()));
3678 }
3679
3680 return FixIts;
3681}
3682
3683static FixItList fixVariableWithArray(const VarDecl *VD,
3684 const DeclUseTracker &Tracker,
3685 const ASTContext &Ctx,
3686 UnsafeBufferUsageHandler &Handler) {
3687 const DeclStmt *DS = Tracker.lookupDecl(VD);
3688 assert(DS && "Fixing non-local variables not implemented yet!");
3689 if (!DS->isSingleDecl()) {
3690 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3691 return {};
3692 }
3693 // Currently DS is an unused variable but we'll need it when
3694 // non-single decls are implemented, where the pointee type name
3695 // and the '*' are spread around the place.
3696 (void)DS;
3697
3698 // FIXME: handle cases where DS has multiple declarations
3699 return fixVarDeclWithArray(D: VD, Ctx, Handler);
3700}
3701
3702// TODO: we should be consistent to use `std::nullopt` to represent no-fix due
3703// to any unexpected problem.
3704static FixItList
3705fixVariable(const VarDecl *VD, FixitStrategy::Kind K,
3706 /* The function decl under analysis */ const Decl *D,
3707 const DeclUseTracker &Tracker, ASTContext &Ctx,
3708 UnsafeBufferUsageHandler &Handler) {
3709 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD)) {
3710 auto *FD = dyn_cast<clang::FunctionDecl>(Val: PVD->getDeclContext());
3711 if (!FD || FD != D) {
3712 // `FD != D` means that `PVD` belongs to a function that is not being
3713 // analyzed currently. Thus `FD` may not be complete.
3714 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed");
3715 return {};
3716 }
3717
3718 // TODO If function has a try block we can't change params unless we check
3719 // also its catch block for their use.
3720 // FIXME We might support static class methods, some select methods,
3721 // operators and possibly lamdas.
3722 if (FD->isMain() || FD->isConstexpr() ||
3723 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate ||
3724 FD->isVariadic() ||
3725 // also covers call-operator of lamdas
3726 isa<CXXMethodDecl>(Val: FD) ||
3727 // skip when the function body is a try-block
3728 (FD->hasBody() && isa<CXXTryStmt>(Val: FD->getBody())) ||
3729 FD->isOverloadedOperator()) {
3730 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl");
3731 return {}; // TODO test all these cases
3732 }
3733 }
3734
3735 switch (K) {
3736 case FixitStrategy::Kind::Span: {
3737 if (VD->getType()->isPointerType()) {
3738 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD))
3739 return fixParamWithSpan(PVD, Ctx, Handler);
3740
3741 if (VD->isLocalVarDecl())
3742 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
3743 }
3744 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer");
3745 return {};
3746 }
3747 case FixitStrategy::Kind::Array: {
3748 if (VD->isLocalVarDecl() && Ctx.getAsConstantArrayType(T: VD->getType()))
3749 return fixVariableWithArray(VD, Tracker, Ctx, Handler);
3750
3751 DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array");
3752 return {};
3753 }
3754 case FixitStrategy::Kind::Iterator:
3755 case FixitStrategy::Kind::Vector:
3756 llvm_unreachable("FixitStrategy not implemented yet!");
3757 case FixitStrategy::Kind::Wontfix:
3758 llvm_unreachable("Invalid strategy!");
3759 }
3760 llvm_unreachable("Unknown strategy!");
3761}
3762
3763// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
3764// `RemoveRange` of 'h' overlaps with a macro use.
3765static bool overlapWithMacro(const FixItList &FixIts) {
3766 // FIXME: For now we only check if the range (or the first token) is (part of)
3767 // a macro expansion. Ideally, we want to check for all tokens in the range.
3768 return llvm::any_of(Range: FixIts, P: [](const FixItHint &Hint) {
3769 auto Range = Hint.RemoveRange;
3770 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
3771 // If the range (or the first token) is (part of) a macro expansion:
3772 return true;
3773 return false;
3774 });
3775}
3776
3777// Returns true iff `VD` is a parameter of the declaration `D`:
3778static bool isParameterOf(const VarDecl *VD, const Decl *D) {
3779 return isa<ParmVarDecl>(Val: VD) &&
3780 VD->getDeclContext() == dyn_cast<DeclContext>(Val: D);
3781}
3782
3783// Erases variables in `FixItsForVariable`, if such a variable has an unfixable
3784// group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
3785// contain `v`.
3786static void eraseVarsForUnfixableGroupMates(
3787 std::map<const VarDecl *, FixItList> &FixItsForVariable,
3788 const VariableGroupsManager &VarGrpMgr) {
3789 // Variables will be removed from `FixItsForVariable`:
3790 SmallVector<const VarDecl *, 8> ToErase;
3791
3792 for (const auto &[VD, Ignore] : FixItsForVariable) {
3793 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(Var: VD);
3794 if (llvm::any_of(Range&: Grp,
3795 P: [&FixItsForVariable](const VarDecl *GrpMember) -> bool {
3796 return !FixItsForVariable.count(x: GrpMember);
3797 })) {
3798 // At least one group member cannot be fixed, so we have to erase the
3799 // whole group:
3800 for (const VarDecl *Member : Grp)
3801 ToErase.push_back(Elt: Member);
3802 }
3803 }
3804 for (auto *VarToErase : ToErase)
3805 FixItsForVariable.erase(x: VarToErase);
3806}
3807
3808// Returns the fix-its that create bounds-safe function overloads for the
3809// function `D`, if `D`'s parameters will be changed to safe-types through
3810// fix-its in `FixItsForVariable`.
3811//
3812// NOTE: In case `D`'s parameters will be changed but bounds-safe function
3813// overloads cannot created, the whole group that contains the parameters will
3814// be erased from `FixItsForVariable`.
3815static FixItList createFunctionOverloadsForParms(
3816 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
3817 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
3818 const FixitStrategy &S, ASTContext &Ctx,
3819 UnsafeBufferUsageHandler &Handler) {
3820 FixItList FixItsSharedByParms{};
3821
3822 std::optional<FixItList> OverloadFixes =
3823 createOverloadsForFixedParams(S, FD, Ctx, Handler);
3824
3825 if (OverloadFixes) {
3826 FixItsSharedByParms.append(RHS: *OverloadFixes);
3827 } else {
3828 // Something wrong in generating `OverloadFixes`, need to remove the
3829 // whole group, where parameters are in, from `FixItsForVariable` (Note
3830 // that all parameters should be in the same group):
3831 for (auto *Member : VarGrpMgr.getGroupOfParms())
3832 FixItsForVariable.erase(x: Member);
3833 }
3834 return FixItsSharedByParms;
3835}
3836
3837// Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
3838static std::map<const VarDecl *, FixItList>
3839getFixIts(FixableGadgetSets &FixablesForAllVars, const FixitStrategy &S,
3840 ASTContext &Ctx,
3841 /* The function decl under analysis */ const Decl *D,
3842 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
3843 const VariableGroupsManager &VarGrpMgr) {
3844 // `FixItsForVariable` will map each variable to a set of fix-its directly
3845 // associated to the variable itself. Fix-its of distinct variables in
3846 // `FixItsForVariable` are disjoint.
3847 std::map<const VarDecl *, FixItList> FixItsForVariable;
3848
3849 // Populate `FixItsForVariable` with fix-its directly associated with each
3850 // variable. Fix-its directly associated to a variable 'v' are the ones
3851 // produced by the `FixableGadget`s whose claimed variable is 'v'.
3852 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) {
3853 FixItsForVariable[VD] =
3854 fixVariable(VD, K: S.lookup(VD), D, Tracker, Ctx, Handler);
3855 // If we fail to produce Fix-It for the declaration we have to skip the
3856 // variable entirely.
3857 if (FixItsForVariable[VD].empty()) {
3858 FixItsForVariable.erase(x: VD);
3859 continue;
3860 }
3861 for (const auto &F : Fixables) {
3862 std::optional<FixItList> Fixits = F->getFixits(S);
3863
3864 if (Fixits) {
3865 FixItsForVariable[VD].insert(I: FixItsForVariable[VD].end(),
3866 From: Fixits->begin(), To: Fixits->end());
3867 continue;
3868 }
3869#ifndef NDEBUG
3870 Handler.addDebugNoteForVar(
3871 VD, F->getSourceLoc(),
3872 ("gadget '" + F->getDebugName() + "' refused to produce a fix")
3873 .str());
3874#endif
3875 FixItsForVariable.erase(x: VD);
3876 break;
3877 }
3878 }
3879
3880 // `FixItsForVariable` now contains only variables that can be
3881 // fixed. A variable can be fixed if its' declaration and all Fixables
3882 // associated to it can all be fixed.
3883
3884 // To further remove from `FixItsForVariable` variables whose group mates
3885 // cannot be fixed...
3886 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr);
3887 // Now `FixItsForVariable` gets further reduced: a variable is in
3888 // `FixItsForVariable` iff it can be fixed and all its group mates can be
3889 // fixed.
3890
3891 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
3892 // That is, when fixing multiple parameters in one step, these fix-its will
3893 // be applied only once (instead of being applied per parameter).
3894 FixItList FixItsSharedByParms{};
3895
3896 if (auto *FD = dyn_cast<FunctionDecl>(Val: D))
3897 FixItsSharedByParms = createFunctionOverloadsForParms(
3898 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler);
3899
3900 // The map that maps each variable `v` to fix-its for the whole group where
3901 // `v` is in:
3902 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{
3903 FixItsForVariable};
3904
3905 for (auto &[Var, Ignore] : FixItsForVariable) {
3906 bool AnyParm = false;
3907 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, HasParm: &AnyParm);
3908
3909 for (const VarDecl *GrpMate : VarGroupForVD) {
3910 if (Var == GrpMate)
3911 continue;
3912 if (FixItsForVariable.count(x: GrpMate))
3913 FinalFixItsForVariable[Var].append(RHS: FixItsForVariable[GrpMate]);
3914 }
3915 if (AnyParm) {
3916 // This assertion should never fail. Otherwise we have a bug.
3917 assert(!FixItsSharedByParms.empty() &&
3918 "Should not try to fix a parameter that does not belong to a "
3919 "FunctionDecl");
3920 FinalFixItsForVariable[Var].append(RHS: FixItsSharedByParms);
3921 }
3922 }
3923 // Fix-its that will be applied in one step shall NOT:
3924 // 1. overlap with macros or/and templates; or
3925 // 2. conflict with each other.
3926 // Otherwise, the fix-its will be dropped.
3927 for (auto Iter = FinalFixItsForVariable.begin();
3928 Iter != FinalFixItsForVariable.end();)
3929 if (overlapWithMacro(FixIts: Iter->second) ||
3930 clang::internal::anyConflict(FixIts: Iter->second, SM: Ctx.getSourceManager())) {
3931 Iter = FinalFixItsForVariable.erase(position: Iter);
3932 } else
3933 Iter++;
3934 return FinalFixItsForVariable;
3935}
3936
3937template <typename VarDeclIterTy>
3938static FixitStrategy
3939getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {
3940 FixitStrategy S;
3941 for (const VarDecl *VD : UnsafeVars) {
3942 if (isa<ConstantArrayType>(Val: VD->getType().getCanonicalType()))
3943 S.set(VD, K: FixitStrategy::Kind::Array);
3944 else
3945 S.set(VD, K: FixitStrategy::Kind::Span);
3946 }
3947 return S;
3948}
3949
3950// Manages variable groups:
3951class VariableGroupsManagerImpl : public VariableGroupsManager {
3952 const std::vector<VarGrpTy> Groups;
3953 const std::map<const VarDecl *, unsigned> &VarGrpMap;
3954 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms;
3955
3956public:
3957 VariableGroupsManagerImpl(
3958 const std::vector<VarGrpTy> &Groups,
3959 const std::map<const VarDecl *, unsigned> &VarGrpMap,
3960 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms)
3961 : Groups(Groups), VarGrpMap(VarGrpMap),
3962 GrpsUnionForParms(GrpsUnionForParms) {}
3963
3964 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override {
3965 if (GrpsUnionForParms.contains(key: Var)) {
3966 if (HasParm)
3967 *HasParm = true;
3968 return GrpsUnionForParms.getArrayRef();
3969 }
3970 if (HasParm)
3971 *HasParm = false;
3972
3973 auto It = VarGrpMap.find(x: Var);
3974
3975 if (It == VarGrpMap.end())
3976 return {};
3977 return Groups[It->second];
3978 }
3979
3980 VarGrpRef getGroupOfParms() const override {
3981 return GrpsUnionForParms.getArrayRef();
3982 }
3983};
3984
3985static void applyGadgets(const Decl *D, FixableGadgetList FixableGadgets,
3986 WarningGadgetList WarningGadgets,
3987 DeclUseTracker Tracker,
3988 UnsafeBufferUsageHandler &Handler,
3989 bool EmitSuggestions) {
3990 if (!EmitSuggestions) {
3991 // Our job is very easy without suggestions. Just warn about
3992 // every problematic operation and consider it done. No need to deal
3993 // with fixable gadgets, no need to group operations by variable.
3994 for (const auto &G : WarningGadgets) {
3995 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false,
3996 Ctx&: D->getASTContext());
3997 }
3998
3999 // This return guarantees that most of the machine doesn't run when
4000 // suggestions aren't requested.
4001 assert(FixableGadgets.empty() &&
4002 "Fixable gadgets found but suggestions not requested!");
4003 return;
4004 }
4005
4006 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
4007 // function under the analysis. No need to fix any Fixables.
4008 if (!WarningGadgets.empty()) {
4009 // Gadgets "claim" variables they're responsible for. Once this loop
4010 // finishes, the tracker will only track DREs that weren't claimed by any
4011 // gadgets, i.e. not understood by the analysis.
4012 for (const auto &G : FixableGadgets) {
4013 for (const auto *DRE : G->getClaimedVarUseSites()) {
4014 Tracker.claimUse(DRE);
4015 }
4016 }
4017 }
4018
4019 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
4020 // function under the analysis. Thus, it early returns here as there is
4021 // nothing needs to be fixed.
4022 //
4023 // Note this claim is based on the assumption that there is no unsafe
4024 // variable whose declaration is invisible from the analyzing function.
4025 // Otherwise, we need to consider if the uses of those unsafe varuables needs
4026 // fix.
4027 // So far, we are not fixing any global variables or class members. And,
4028 // lambdas will be analyzed along with the enclosing function. So this early
4029 // return is correct for now.
4030 if (WarningGadgets.empty())
4031 return;
4032
4033 WarningGadgetSets UnsafeOps =
4034 groupWarningGadgetsByVar(AllUnsafeOperations: std::move(WarningGadgets));
4035 FixableGadgetSets FixablesForAllVars =
4036 groupFixablesByVar(AllFixableOperations: std::move(FixableGadgets));
4037
4038 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
4039
4040 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
4041 for (auto it = FixablesForAllVars.byVar.cbegin();
4042 it != FixablesForAllVars.byVar.cend();) {
4043 // FIXME: need to deal with global variables later
4044 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(Val: it->first))) {
4045#ifndef NDEBUG
4046 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
4047 ("failed to produce fixit for '" +
4048 it->first->getNameAsString() +
4049 "' : neither local nor a parameter"));
4050#endif
4051 it = FixablesForAllVars.byVar.erase(position: it);
4052 } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
4053#ifndef NDEBUG
4054 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
4055 ("failed to produce fixit for '" +
4056 it->first->getNameAsString() +
4057 "' : has a reference type"));
4058#endif
4059 it = FixablesForAllVars.byVar.erase(position: it);
4060 } else if (Tracker.hasUnclaimedUses(VD: it->first)) {
4061 it = FixablesForAllVars.byVar.erase(position: it);
4062 } else if (it->first->isInitCapture()) {
4063#ifndef NDEBUG
4064 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
4065 ("failed to produce fixit for '" +
4066 it->first->getNameAsString() +
4067 "' : init capture"));
4068#endif
4069 it = FixablesForAllVars.byVar.erase(position: it);
4070 } else {
4071 ++it;
4072 }
4073 }
4074
4075#ifndef NDEBUG
4076 for (const auto &it : UnsafeOps.byVar) {
4077 const VarDecl *const UnsafeVD = it.first;
4078 auto UnclaimedDREs = Tracker.getUnclaimedUses(UnsafeVD);
4079 if (UnclaimedDREs.empty())
4080 continue;
4081 const auto UnfixedVDName = UnsafeVD->getNameAsString();
4082 for (const clang::DeclRefExpr *UnclaimedDRE : UnclaimedDREs) {
4083 std::string UnclaimedUseTrace =
4084 getDREAncestorString(UnclaimedDRE, D->getASTContext());
4085
4086 Handler.addDebugNoteForVar(
4087 UnsafeVD, UnclaimedDRE->getBeginLoc(),
4088 ("failed to produce fixit for '" + UnfixedVDName +
4089 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
4090 UnclaimedUseTrace));
4091 }
4092 }
4093#endif
4094
4095 // Fixpoint iteration for pointer assignments
4096 using DepMapTy =
4097 llvm::DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>;
4098 DepMapTy DependenciesMap{};
4099 DepMapTy PtrAssignmentGraph{};
4100
4101 for (const auto &it : FixablesForAllVars.byVar) {
4102 for (const FixableGadget *fixable : it.second) {
4103 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
4104 fixable->getStrategyImplications();
4105 if (ImplPair) {
4106 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair);
4107 PtrAssignmentGraph[Impl.first].insert(X: Impl.second);
4108 }
4109 }
4110 }
4111
4112 /*
4113 The following code does a BFS traversal of the `PtrAssignmentGraph`
4114 considering all unsafe vars as starting nodes and constructs an undirected
4115 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
4116 elimiates all variables that are unreachable from any unsafe var. In other
4117 words, this removes all dependencies that don't include any unsafe variable
4118 and consequently don't need any fixit generation.
4119 Note: A careful reader would observe that the code traverses
4120 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
4121 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
4122 achieve the same result but the one used here dramatically cuts the
4123 amount of hoops the second part of the algorithm needs to jump, given that
4124 a lot of these connections become "direct". The reader is advised not to
4125 imagine how the graph is transformed because of using `Var` instead of
4126 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
4127 and think about why it's equivalent later.
4128 */
4129 std::set<const VarDecl *> VisitedVarsDirected{};
4130 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
4131 if (VisitedVarsDirected.find(x: Var) == VisitedVarsDirected.end()) {
4132
4133 std::queue<const VarDecl *> QueueDirected{};
4134 QueueDirected.push(x: Var);
4135 while (!QueueDirected.empty()) {
4136 const VarDecl *CurrentVar = QueueDirected.front();
4137 QueueDirected.pop();
4138 VisitedVarsDirected.insert(x: CurrentVar);
4139 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
4140 for (const VarDecl *Adj : AdjacentNodes) {
4141 if (VisitedVarsDirected.find(x: Adj) == VisitedVarsDirected.end()) {
4142 QueueDirected.push(x: Adj);
4143 }
4144 DependenciesMap[Var].insert(X: Adj);
4145 DependenciesMap[Adj].insert(X: Var);
4146 }
4147 }
4148 }
4149 }
4150
4151 // `Groups` stores the set of Connected Components in the graph.
4152 std::vector<VarGrpTy> Groups;
4153 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
4154 // variables belong to. Group indexes refer to the elements in `Groups`.
4155 // `VarGrpMap` is complete in that every variable that needs fix is in it.
4156 std::map<const VarDecl *, unsigned> VarGrpMap;
4157 // The union group over the ones in "Groups" that contain parameters of `D`:
4158 llvm::SetVector<const VarDecl *>
4159 GrpsUnionForParms; // these variables need to be fixed in one step
4160
4161 // Group Connected Components for Unsafe Vars
4162 // (Dependencies based on pointer assignments)
4163 std::set<const VarDecl *> VisitedVars{};
4164 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
4165 if (VisitedVars.find(x: Var) == VisitedVars.end()) {
4166 VarGrpTy &VarGroup = Groups.emplace_back();
4167 std::queue<const VarDecl *> Queue{};
4168
4169 Queue.push(x: Var);
4170 while (!Queue.empty()) {
4171 const VarDecl *CurrentVar = Queue.front();
4172 Queue.pop();
4173 VisitedVars.insert(x: CurrentVar);
4174 VarGroup.push_back(x: CurrentVar);
4175 auto AdjacentNodes = DependenciesMap[CurrentVar];
4176 for (const VarDecl *Adj : AdjacentNodes) {
4177 if (VisitedVars.find(x: Adj) == VisitedVars.end()) {
4178 Queue.push(x: Adj);
4179 }
4180 }
4181 }
4182
4183 bool HasParm = false;
4184 unsigned GrpIdx = Groups.size() - 1;
4185
4186 for (const VarDecl *V : VarGroup) {
4187 VarGrpMap[V] = GrpIdx;
4188 if (!HasParm && isParameterOf(VD: V, D))
4189 HasParm = true;
4190 }
4191 if (HasParm)
4192 GrpsUnionForParms.insert_range(R&: VarGroup);
4193 }
4194 }
4195
4196 // Remove a `FixableGadget` if the associated variable is not in the graph
4197 // computed above. We do not want to generate fix-its for such variables,
4198 // since they are neither warned nor reachable from a warned one.
4199 //
4200 // Note a variable is not warned if it is not directly used in any unsafe
4201 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
4202 // does not exist another variable `u` such that `u` is warned and fixing `u`
4203 // (transitively) implicates fixing `v`.
4204 //
4205 // For example,
4206 // ```
4207 // void f(int * p) {
4208 // int * a = p; *p = 0;
4209 // }
4210 // ```
4211 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
4212 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
4213 // the function above, `p` becomes reachable from a warned variable.
4214 for (auto I = FixablesForAllVars.byVar.begin();
4215 I != FixablesForAllVars.byVar.end();) {
4216 // Note `VisitedVars` contain all the variables in the graph:
4217 if (!VisitedVars.count(x: (*I).first)) {
4218 // no such var in graph:
4219 I = FixablesForAllVars.byVar.erase(position: I);
4220 } else
4221 ++I;
4222 }
4223
4224 // We assign strategies to variables that are 1) in the graph and 2) can be
4225 // fixed. Other variables have the default "Won't fix" strategy.
4226 FixitStrategy NaiveStrategy = getNaiveStrategy(UnsafeVars: llvm::make_filter_range(
4227 Range&: VisitedVars, Pred: [&FixablesForAllVars](const VarDecl *V) {
4228 // If a warned variable has no "Fixable", it is considered unfixable:
4229 return FixablesForAllVars.byVar.count(x: V);
4230 }));
4231 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms);
4232
4233 if (isa<NamedDecl>(Val: D))
4234 // The only case where `D` is not a `NamedDecl` is when `D` is a
4235 // `BlockDecl`. Let's not fix variables in blocks for now
4236 FixItsForVariableGroup =
4237 getFixIts(FixablesForAllVars, S: NaiveStrategy, Ctx&: D->getASTContext(), D,
4238 Tracker, Handler, VarGrpMgr);
4239
4240 for (const auto &G : UnsafeOps.noVar) {
4241 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false,
4242 Ctx&: D->getASTContext());
4243 }
4244
4245 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
4246 auto FixItsIt = FixItsForVariableGroup.find(x: VD);
4247 Handler.handleUnsafeVariableGroup(Variable: VD, VarGrpMgr,
4248 Fixes: FixItsIt != FixItsForVariableGroup.end()
4249 ? std::move(FixItsIt->second)
4250 : FixItList{},
4251 D, VarTargetTypes: NaiveStrategy);
4252 for (const auto &G : WarningGadgets) {
4253 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/true,
4254 Ctx&: D->getASTContext());
4255 }
4256 }
4257}
4258
4259void clang::checkUnsafeBufferUsage(const Decl *D,
4260 UnsafeBufferUsageHandler &Handler,
4261 bool EmitSuggestions) {
4262#ifndef NDEBUG
4263 Handler.clearDebugNotes();
4264#endif
4265
4266 assert(D);
4267
4268 SmallVector<Stmt *> Stmts;
4269
4270 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D)) {
4271 // We do not want to visit a Lambda expression defined inside a method
4272 // independently. Instead, it should be visited along with the outer method.
4273 // FIXME: do we want to do the same thing for `BlockDecl`s?
4274 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: D)) {
4275 if (MD->getParent()->isLambda() && MD->getParent()->isLocalClass())
4276 return;
4277 }
4278
4279 for (FunctionDecl *FReDecl : FD->redecls()) {
4280 if (FReDecl->isExternC()) {
4281 // Do not emit fixit suggestions for functions declared in an
4282 // extern "C" block.
4283 EmitSuggestions = false;
4284 break;
4285 }
4286 }
4287
4288 Stmts.push_back(Elt: FD->getBody());
4289
4290 if (const auto *ID = dyn_cast<CXXConstructorDecl>(Val: D)) {
4291 for (const CXXCtorInitializer *CI : ID->inits()) {
4292 Stmts.push_back(Elt: CI->getInit());
4293 }
4294 }
4295 } else if (isa<BlockDecl>(Val: D) || isa<ObjCMethodDecl>(Val: D)) {
4296 Stmts.push_back(Elt: D->getBody());
4297 }
4298
4299 assert(!Stmts.empty());
4300
4301 FixableGadgetList FixableGadgets;
4302 WarningGadgetList WarningGadgets;
4303 DeclUseTracker Tracker;
4304 for (Stmt *S : Stmts) {
4305 findGadgets(S, Ctx&: D->getASTContext(), Handler, EmitSuggestions, FixableGadgets,
4306 WarningGadgets, Tracker);
4307 }
4308 applyGadgets(D, FixableGadgets: std::move(FixableGadgets), WarningGadgets: std::move(WarningGadgets),
4309 Tracker: std::move(Tracker), Handler, EmitSuggestions);
4310}
4311