1 | //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This checker defines the attack surface for generic taint propagation. |
10 | // |
11 | // The taint information produced by it might be useful to other checkers. For |
12 | // example, checkers should report errors which involve tainted data more |
13 | // aggressively, even if the involved symbols are under constrained. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "Yaml.h" |
18 | #include "clang/AST/Attr.h" |
19 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
20 | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
21 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
22 | #include "clang/StaticAnalyzer/Core/Checker.h" |
23 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
24 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
25 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
26 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
27 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
28 | #include "llvm/ADT/StringExtras.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/Support/YAMLTraits.h" |
31 | |
32 | #include <limits> |
33 | #include <memory> |
34 | #include <optional> |
35 | #include <utility> |
36 | #include <vector> |
37 | |
38 | #define DEBUG_TYPE "taint-checker" |
39 | |
40 | using namespace clang; |
41 | using namespace ento; |
42 | using namespace taint; |
43 | |
44 | using llvm::ImmutableSet; |
45 | |
46 | namespace { |
47 | |
48 | class GenericTaintChecker; |
49 | |
50 | /// Check for CWE-134: Uncontrolled Format String. |
51 | constexpr llvm::StringLiteral MsgUncontrolledFormatString = |
52 | "Untrusted data is used as a format string " |
53 | "(CWE-134: Uncontrolled Format String)" ; |
54 | |
55 | /// Check for: |
56 | /// CERT/STR02-C. "Sanitize data passed to complex subsystems" |
57 | /// CWE-78, "Failure to Sanitize Data into an OS Command" |
58 | constexpr llvm::StringLiteral MsgSanitizeSystemArgs = |
59 | "Untrusted data is passed to a system call " |
60 | "(CERT/STR02-C. Sanitize data passed to complex subsystems)" ; |
61 | |
62 | /// Check if tainted data is used as a custom sink's parameter. |
63 | constexpr llvm::StringLiteral MsgCustomSink = |
64 | "Untrusted data is passed to a user-defined sink" ; |
65 | |
66 | using ArgIdxTy = int; |
67 | using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; |
68 | |
69 | /// Denotes the return value. |
70 | constexpr ArgIdxTy ReturnValueIndex{-1}; |
71 | |
72 | static ArgIdxTy fromArgumentCount(unsigned Count) { |
73 | assert(Count <= |
74 | static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && |
75 | "ArgIdxTy is not large enough to represent the number of arguments." ); |
76 | return Count; |
77 | } |
78 | |
79 | /// Check if the region the expression evaluates to is the standard input, |
80 | /// and thus, is tainted. |
81 | /// FIXME: Move this to Taint.cpp. |
82 | bool isStdin(SVal Val, const ASTContext &ACtx) { |
83 | // FIXME: What if Val is NonParamVarRegion? |
84 | |
85 | // The region should be symbolic, we do not know it's value. |
86 | const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val: Val.getAsRegion()); |
87 | if (!SymReg) |
88 | return false; |
89 | |
90 | // Get it's symbol and find the declaration region it's pointing to. |
91 | const auto *DeclReg = |
92 | dyn_cast_or_null<DeclRegion>(Val: SymReg->getSymbol()->getOriginRegion()); |
93 | if (!DeclReg) |
94 | return false; |
95 | |
96 | // This region corresponds to a declaration, find out if it's a global/extern |
97 | // variable named stdin with the proper type. |
98 | if (const auto *D = dyn_cast_or_null<VarDecl>(Val: DeclReg->getDecl())) { |
99 | D = D->getCanonicalDecl(); |
100 | if (D->getName() == "stdin" && D->hasExternalStorage() && D->isExternC()) { |
101 | const QualType FILETy = ACtx.getFILEType().getCanonicalType(); |
102 | const QualType Ty = D->getType().getCanonicalType(); |
103 | |
104 | if (Ty->isPointerType()) |
105 | return Ty->getPointeeType() == FILETy; |
106 | } |
107 | } |
108 | return false; |
109 | } |
110 | |
111 | SVal getPointeeOf(ProgramStateRef State, Loc LValue) { |
112 | const QualType ArgTy = LValue.getType(State->getStateManager().getContext()); |
113 | if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) |
114 | return State->getSVal(LV: LValue); |
115 | |
116 | // Do not dereference void pointers. Treat them as byte pointers instead. |
117 | // FIXME: we might want to consider more than just the first byte. |
118 | return State->getSVal(LV: LValue, T: State->getStateManager().getContext().CharTy); |
119 | } |
120 | |
121 | /// Given a pointer/reference argument, return the value it refers to. |
122 | std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) { |
123 | if (auto LValue = Arg.getAs<Loc>()) |
124 | return getPointeeOf(State, LValue: *LValue); |
125 | return std::nullopt; |
126 | } |
127 | |
128 | /// Given a pointer, return the SVal of its pointee or if it is tainted, |
129 | /// otherwise return the pointer's SVal if tainted. |
130 | /// Also considers stdin as a taint source. |
131 | std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State, |
132 | SVal Arg) { |
133 | if (auto Pointee = getPointeeOf(State, Arg)) |
134 | if (isTainted(State, V: *Pointee)) // FIXME: isTainted(...) ? Pointee : None; |
135 | return Pointee; |
136 | |
137 | if (isTainted(State, V: Arg)) |
138 | return Arg; |
139 | return std::nullopt; |
140 | } |
141 | |
142 | bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) { |
143 | return getTaintedPointeeOrPointer(State, Arg: ExprSVal).has_value(); |
144 | } |
145 | |
146 | /// Helps in printing taint diagnostics. |
147 | /// Marks the incoming parameters of a function interesting (to be printed) |
148 | /// when the return value, or the outgoing parameters are tainted. |
149 | const NoteTag *taintOriginTrackerTag(CheckerContext &C, |
150 | std::vector<SymbolRef> TaintedSymbols, |
151 | std::vector<ArgIdxTy> TaintedArgs, |
152 | const LocationContext *CallLocation) { |
153 | return C.getNoteTag(Cb: [TaintedSymbols = std::move(TaintedSymbols), |
154 | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
155 | PathSensitiveBugReport &BR) -> std::string { |
156 | // We give diagnostics only for taint related reports |
157 | if (!BR.isInteresting(LC: CallLocation) || |
158 | BR.getBugType().getCategory() != categories::TaintedData) { |
159 | return "" ; |
160 | } |
161 | if (TaintedSymbols.empty()) |
162 | return "Taint originated here" ; |
163 | |
164 | for (auto Sym : TaintedSymbols) { |
165 | BR.markInteresting(sym: Sym); |
166 | } |
167 | LLVM_DEBUG(for (auto Arg |
168 | : TaintedArgs) { |
169 | llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n" ; |
170 | }); |
171 | return "" ; |
172 | }); |
173 | } |
174 | |
175 | /// Helps in printing taint diagnostics. |
176 | /// Marks the function interesting (to be printed) |
177 | /// when the return value, or the outgoing parameters are tainted. |
178 | const NoteTag *taintPropagationExplainerTag( |
179 | CheckerContext &C, std::vector<SymbolRef> TaintedSymbols, |
180 | std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) { |
181 | assert(TaintedSymbols.size() == TaintedArgs.size()); |
182 | return C.getNoteTag(Cb: [TaintedSymbols = std::move(TaintedSymbols), |
183 | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
184 | PathSensitiveBugReport &BR) -> std::string { |
185 | SmallString<256> Msg; |
186 | llvm::raw_svector_ostream Out(Msg); |
187 | // We give diagnostics only for taint related reports |
188 | if (TaintedSymbols.empty() || |
189 | BR.getBugType().getCategory() != categories::TaintedData) { |
190 | return "" ; |
191 | } |
192 | int nofTaintedArgs = 0; |
193 | for (auto [Idx, Sym] : llvm::enumerate(First: TaintedSymbols)) { |
194 | if (BR.isInteresting(sym: Sym)) { |
195 | BR.markInteresting(LC: CallLocation); |
196 | if (TaintedArgs[Idx] != ReturnValueIndex) { |
197 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument " |
198 | << TaintedArgs[Idx] + 1 << "\n" ); |
199 | if (nofTaintedArgs == 0) |
200 | Out << "Taint propagated to the " ; |
201 | else |
202 | Out << ", " ; |
203 | Out << TaintedArgs[Idx] + 1 |
204 | << llvm::getOrdinalSuffix(Val: TaintedArgs[Idx] + 1) << " argument" ; |
205 | nofTaintedArgs++; |
206 | } else { |
207 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n" ); |
208 | Out << "Taint propagated to the return value" ; |
209 | } |
210 | } |
211 | } |
212 | return std::string(Out.str()); |
213 | }); |
214 | } |
215 | |
216 | /// ArgSet is used to describe arguments relevant for taint detection or |
217 | /// taint application. A discrete set of argument indexes and a variadic |
218 | /// argument list signified by a starting index are supported. |
219 | class ArgSet { |
220 | public: |
221 | ArgSet() = default; |
222 | ArgSet(ArgVecTy &&DiscreteArgs, |
223 | std::optional<ArgIdxTy> VariadicIndex = std::nullopt) |
224 | : DiscreteArgs(std::move(DiscreteArgs)), |
225 | VariadicIndex(std::move(VariadicIndex)) {} |
226 | |
227 | bool contains(ArgIdxTy ArgIdx) const { |
228 | if (llvm::is_contained(Range: DiscreteArgs, Element: ArgIdx)) |
229 | return true; |
230 | |
231 | return VariadicIndex && ArgIdx >= *VariadicIndex; |
232 | } |
233 | |
234 | bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } |
235 | |
236 | private: |
237 | ArgVecTy DiscreteArgs; |
238 | std::optional<ArgIdxTy> VariadicIndex; |
239 | }; |
240 | |
241 | /// A struct used to specify taint propagation rules for a function. |
242 | /// |
243 | /// If any of the possible taint source arguments is tainted, all of the |
244 | /// destination arguments should also be tainted. If ReturnValueIndex is added |
245 | /// to the dst list, the return value will be tainted. |
246 | class GenericTaintRule { |
247 | /// Arguments which are taints sinks and should be checked, and a report |
248 | /// should be emitted if taint reaches these. |
249 | ArgSet SinkArgs; |
250 | /// Arguments which should be sanitized on function return. |
251 | ArgSet FilterArgs; |
252 | /// Arguments which can participate in taint propagation. If any of the |
253 | /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should |
254 | /// be tainted. |
255 | ArgSet PropSrcArgs; |
256 | ArgSet PropDstArgs; |
257 | |
258 | /// A message that explains why the call is sensitive to taint. |
259 | std::optional<StringRef> SinkMsg; |
260 | |
261 | GenericTaintRule() = default; |
262 | |
263 | GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, |
264 | std::optional<StringRef> SinkMsg = std::nullopt) |
265 | : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), |
266 | PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), |
267 | SinkMsg(SinkMsg) {} |
268 | |
269 | public: |
270 | /// Make a rule that reports a warning if taint reaches any of \p FilterArgs |
271 | /// arguments. |
272 | static GenericTaintRule Sink(ArgSet &&SinkArgs, |
273 | std::optional<StringRef> Msg = std::nullopt) { |
274 | return {std::move(SinkArgs), {}, {}, {}, Msg}; |
275 | } |
276 | |
277 | /// Make a rule that sanitizes all FilterArgs arguments. |
278 | static GenericTaintRule Filter(ArgSet &&FilterArgs) { |
279 | return {{}, std::move(FilterArgs), {}, {}}; |
280 | } |
281 | |
282 | /// Make a rule that unconditionally taints all Args. |
283 | /// If Func is provided, it must also return true for taint to propagate. |
284 | static GenericTaintRule Source(ArgSet &&SourceArgs) { |
285 | return {{}, {}, {}, std::move(SourceArgs)}; |
286 | } |
287 | |
288 | /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. |
289 | static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { |
290 | return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; |
291 | } |
292 | |
293 | /// Process a function which could either be a taint source, a taint sink, a |
294 | /// taint filter or a taint propagator. |
295 | void process(const GenericTaintChecker &Checker, const CallEvent &Call, |
296 | CheckerContext &C) const; |
297 | |
298 | /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. |
299 | static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { |
300 | return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() |
301 | : Call.getArgExpr(Index: ArgIdx); |
302 | }; |
303 | |
304 | /// Functions for custom taintedness propagation. |
305 | static bool UntrustedEnv(CheckerContext &C); |
306 | }; |
307 | |
308 | using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; |
309 | |
310 | /// Used to parse the configuration file. |
311 | struct TaintConfiguration { |
312 | using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; |
313 | enum class VariadicType { None, Src, Dst }; |
314 | |
315 | struct Common { |
316 | std::string Name; |
317 | std::string Scope; |
318 | }; |
319 | |
320 | struct Sink : Common { |
321 | ArgVecTy SinkArgs; |
322 | }; |
323 | |
324 | struct Filter : Common { |
325 | ArgVecTy FilterArgs; |
326 | }; |
327 | |
328 | struct Propagation : Common { |
329 | ArgVecTy SrcArgs; |
330 | ArgVecTy DstArgs; |
331 | VariadicType VarType; |
332 | ArgIdxTy VarIndex; |
333 | }; |
334 | |
335 | std::vector<Propagation> Propagations; |
336 | std::vector<Filter> Filters; |
337 | std::vector<Sink> Sinks; |
338 | |
339 | TaintConfiguration() = default; |
340 | TaintConfiguration(const TaintConfiguration &) = default; |
341 | TaintConfiguration(TaintConfiguration &&) = default; |
342 | TaintConfiguration &operator=(const TaintConfiguration &) = default; |
343 | TaintConfiguration &operator=(TaintConfiguration &&) = default; |
344 | }; |
345 | |
346 | struct GenericTaintRuleParser { |
347 | GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} |
348 | /// Container type used to gather call identification objects grouped into |
349 | /// pairs with their corresponding taint rules. It is temporary as it is used |
350 | /// to finally initialize RuleLookupTy, which is considered to be immutable. |
351 | using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; |
352 | RulesContTy parseConfiguration(const std::string &Option, |
353 | TaintConfiguration &&Config) const; |
354 | |
355 | private: |
356 | using NamePartsTy = llvm::SmallVector<StringRef, 2>; |
357 | |
358 | /// Validate part of the configuration, which contains a list of argument |
359 | /// indexes. |
360 | void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; |
361 | |
362 | template <typename Config> static NamePartsTy parseNameParts(const Config &C); |
363 | |
364 | // Takes the config and creates a CallDescription for it and associates a Rule |
365 | // with that. |
366 | template <typename Config> |
367 | static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, |
368 | RulesContTy &Rules); |
369 | |
370 | void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, |
371 | RulesContTy &Rules) const; |
372 | void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, |
373 | RulesContTy &Rules) const; |
374 | void parseConfig(const std::string &Option, |
375 | TaintConfiguration::Propagation &&P, |
376 | RulesContTy &Rules) const; |
377 | |
378 | CheckerManager &Mgr; |
379 | }; |
380 | |
381 | class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { |
382 | public: |
383 | void checkPreCall(const CallEvent &Call, CheckerContext &C) const; |
384 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
385 | |
386 | void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, |
387 | const char *Sep) const override; |
388 | |
389 | /// Generate a report if the expression is tainted or points to tainted data. |
390 | bool generateReportIfTainted(const Expr *E, StringRef Msg, |
391 | CheckerContext &C) const; |
392 | |
393 | bool isTaintReporterCheckerEnabled = false; |
394 | std::optional<BugType> BT; |
395 | |
396 | private: |
397 | bool checkUncontrolledFormatString(const CallEvent &Call, |
398 | CheckerContext &C) const; |
399 | |
400 | void taintUnsafeSocketProtocol(const CallEvent &Call, |
401 | CheckerContext &C) const; |
402 | |
403 | /// The taint rules are initalized with the help of a CheckerContext to |
404 | /// access user-provided configuration. |
405 | void initTaintRules(CheckerContext &C) const; |
406 | |
407 | // TODO: The two separate `CallDescriptionMap`s were introduced when |
408 | // `CallDescription` was unable to restrict matches to the global namespace |
409 | // only. This limitation no longer exists, so the following two maps should |
410 | // be unified. |
411 | mutable std::optional<RuleLookupTy> StaticTaintRules; |
412 | mutable std::optional<RuleLookupTy> DynamicTaintRules; |
413 | }; |
414 | } // end of anonymous namespace |
415 | |
416 | /// YAML serialization mapping. |
417 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) |
418 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) |
419 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) |
420 | |
421 | namespace llvm { |
422 | namespace yaml { |
423 | template <> struct MappingTraits<TaintConfiguration> { |
424 | static void mapping(IO &IO, TaintConfiguration &Config) { |
425 | IO.mapOptional(Key: "Propagations" , Val&: Config.Propagations); |
426 | IO.mapOptional(Key: "Filters" , Val&: Config.Filters); |
427 | IO.mapOptional(Key: "Sinks" , Val&: Config.Sinks); |
428 | } |
429 | }; |
430 | |
431 | template <> struct MappingTraits<TaintConfiguration::Sink> { |
432 | static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { |
433 | IO.mapRequired(Key: "Name" , Val&: Sink.Name); |
434 | IO.mapOptional(Key: "Scope" , Val&: Sink.Scope); |
435 | IO.mapRequired(Key: "Args" , Val&: Sink.SinkArgs); |
436 | } |
437 | }; |
438 | |
439 | template <> struct MappingTraits<TaintConfiguration::Filter> { |
440 | static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { |
441 | IO.mapRequired(Key: "Name" , Val&: Filter.Name); |
442 | IO.mapOptional(Key: "Scope" , Val&: Filter.Scope); |
443 | IO.mapRequired(Key: "Args" , Val&: Filter.FilterArgs); |
444 | } |
445 | }; |
446 | |
447 | template <> struct MappingTraits<TaintConfiguration::Propagation> { |
448 | static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { |
449 | IO.mapRequired(Key: "Name" , Val&: Propagation.Name); |
450 | IO.mapOptional(Key: "Scope" , Val&: Propagation.Scope); |
451 | IO.mapOptional(Key: "SrcArgs" , Val&: Propagation.SrcArgs); |
452 | IO.mapOptional(Key: "DstArgs" , Val&: Propagation.DstArgs); |
453 | IO.mapOptional(Key: "VariadicType" , Val&: Propagation.VarType); |
454 | IO.mapOptional(Key: "VariadicIndex" , Val&: Propagation.VarIndex); |
455 | } |
456 | }; |
457 | |
458 | template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { |
459 | static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { |
460 | IO.enumCase(Val&: Value, Str: "None" , ConstVal: TaintConfiguration::VariadicType::None); |
461 | IO.enumCase(Val&: Value, Str: "Src" , ConstVal: TaintConfiguration::VariadicType::Src); |
462 | IO.enumCase(Val&: Value, Str: "Dst" , ConstVal: TaintConfiguration::VariadicType::Dst); |
463 | } |
464 | }; |
465 | } // namespace yaml |
466 | } // namespace llvm |
467 | |
468 | /// A set which is used to pass information from call pre-visit instruction |
469 | /// to the call post-visit. The values are signed integers, which are either |
470 | /// ReturnValueIndex, or indexes of the pointer/reference argument, which |
471 | /// points to data, which should be tainted on return. |
472 | REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, |
473 | ImmutableSet<ArgIdxTy>) |
474 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) |
475 | |
476 | void GenericTaintRuleParser::validateArgVector(const std::string &Option, |
477 | const ArgVecTy &Args) const { |
478 | for (ArgIdxTy Arg : Args) { |
479 | if (Arg < ReturnValueIndex) { |
480 | Mgr.reportInvalidCheckerOptionValue( |
481 | Checker: Mgr.getChecker<GenericTaintChecker>(), OptionName: Option, |
482 | ExpectedValueDesc: "an argument number for propagation rules greater or equal to -1" ); |
483 | } |
484 | } |
485 | } |
486 | |
487 | template <typename Config> |
488 | GenericTaintRuleParser::NamePartsTy |
489 | GenericTaintRuleParser::parseNameParts(const Config &C) { |
490 | NamePartsTy NameParts; |
491 | if (!C.Scope.empty()) { |
492 | // If the Scope argument contains multiple "::" parts, those are considered |
493 | // namespace identifiers. |
494 | StringRef{C.Scope}.split(A&: NameParts, Separator: "::" , /*MaxSplit*/ -1, |
495 | /*KeepEmpty*/ false); |
496 | } |
497 | NameParts.emplace_back(C.Name); |
498 | return NameParts; |
499 | } |
500 | |
501 | template <typename Config> |
502 | void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, |
503 | GenericTaintRule &&Rule, |
504 | RulesContTy &Rules) { |
505 | NamePartsTy NameParts = parseNameParts(C); |
506 | Rules.emplace_back(args: CallDescription(CDM::Unspecified, NameParts), |
507 | args: std::move(Rule)); |
508 | } |
509 | |
510 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
511 | TaintConfiguration::Sink &&S, |
512 | RulesContTy &Rules) const { |
513 | validateArgVector(Option, Args: S.SinkArgs); |
514 | consumeRulesFromConfig(C: S, Rule: GenericTaintRule::Sink(SinkArgs: std::move(S.SinkArgs)), |
515 | Rules); |
516 | } |
517 | |
518 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
519 | TaintConfiguration::Filter &&S, |
520 | RulesContTy &Rules) const { |
521 | validateArgVector(Option, Args: S.FilterArgs); |
522 | consumeRulesFromConfig(C: S, Rule: GenericTaintRule::Filter(FilterArgs: std::move(S.FilterArgs)), |
523 | Rules); |
524 | } |
525 | |
526 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
527 | TaintConfiguration::Propagation &&P, |
528 | RulesContTy &Rules) const { |
529 | validateArgVector(Option, Args: P.SrcArgs); |
530 | validateArgVector(Option, Args: P.DstArgs); |
531 | bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; |
532 | bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; |
533 | std::optional<ArgIdxTy> JustVarIndex = P.VarIndex; |
534 | |
535 | ArgSet SrcDesc(std::move(P.SrcArgs), |
536 | IsSrcVariadic ? JustVarIndex : std::nullopt); |
537 | ArgSet DstDesc(std::move(P.DstArgs), |
538 | IsDstVariadic ? JustVarIndex : std::nullopt); |
539 | |
540 | consumeRulesFromConfig( |
541 | C: P, Rule: GenericTaintRule::Prop(SrcArgs: std::move(SrcDesc), DstArgs: std::move(DstDesc)), Rules); |
542 | } |
543 | |
544 | GenericTaintRuleParser::RulesContTy |
545 | GenericTaintRuleParser::parseConfiguration(const std::string &Option, |
546 | TaintConfiguration &&Config) const { |
547 | |
548 | RulesContTy Rules; |
549 | |
550 | for (auto &F : Config.Filters) |
551 | parseConfig(Option, S: std::move(F), Rules); |
552 | |
553 | for (auto &S : Config.Sinks) |
554 | parseConfig(Option, S: std::move(S), Rules); |
555 | |
556 | for (auto &P : Config.Propagations) |
557 | parseConfig(Option, P: std::move(P), Rules); |
558 | |
559 | return Rules; |
560 | } |
561 | |
562 | void GenericTaintChecker::initTaintRules(CheckerContext &C) const { |
563 | // Check for exact name match for functions without builtin substitutes. |
564 | // Use qualified name, because these are C functions without namespace. |
565 | |
566 | if (StaticTaintRules || DynamicTaintRules) |
567 | return; |
568 | |
569 | using RulesConstructionTy = |
570 | std::vector<std::pair<CallDescription, GenericTaintRule>>; |
571 | using TR = GenericTaintRule; |
572 | |
573 | RulesConstructionTy GlobalCRules{ |
574 | // Sources |
575 | {{CDM::CLibrary, {"fdopen" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
576 | {{CDM::CLibrary, {"fopen" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
577 | {{CDM::CLibrary, {"freopen" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
578 | {{CDM::CLibrary, {"getch" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
579 | {{CDM::CLibrary, {"getchar" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
580 | {{CDM::CLibrary, {"getchar_unlocked" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
581 | {{CDM::CLibrary, {"gets" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
582 | {{CDM::CLibrary, {"gets_s" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
583 | {{CDM::CLibrary, {"scanf" }}, TR::Source(SourceArgs: {{}, 1})}, |
584 | {{CDM::CLibrary, {"scanf_s" }}, TR::Source(SourceArgs: {{}, 1})}, |
585 | {{CDM::CLibrary, {"wgetch" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
586 | // Sometimes the line between taint sources and propagators is blurry. |
587 | // _IO_getc is choosen to be a source, but could also be a propagator. |
588 | // This way it is simpler, as modeling it as a propagator would require |
589 | // to model the possible sources of _IO_FILE * values, which the _IO_getc |
590 | // function takes as parameters. |
591 | {{CDM::CLibrary, {"_IO_getc" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
592 | {{CDM::CLibrary, {"getcwd" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
593 | {{CDM::CLibrary, {"getwd" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
594 | {{CDM::CLibrary, {"readlink" }}, TR::Source(SourceArgs: {{1, ReturnValueIndex}})}, |
595 | {{CDM::CLibrary, {"readlinkat" }}, TR::Source(SourceArgs: {{2, ReturnValueIndex}})}, |
596 | {{CDM::CLibrary, {"get_current_dir_name" }}, |
597 | TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
598 | {{CDM::CLibrary, {"gethostname" }}, TR::Source(SourceArgs: {{0}})}, |
599 | {{CDM::CLibrary, {"getnameinfo" }}, TR::Source(SourceArgs: {{2, 4}})}, |
600 | {{CDM::CLibrary, {"getseuserbyname" }}, TR::Source(SourceArgs: {{1, 2}})}, |
601 | {{CDM::CLibrary, {"getgroups" }}, TR::Source(SourceArgs: {{1, ReturnValueIndex}})}, |
602 | {{CDM::CLibrary, {"getlogin" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
603 | {{CDM::CLibrary, {"getlogin_r" }}, TR::Source(SourceArgs: {{0}})}, |
604 | |
605 | // Props |
606 | {{CDM::CLibrary, {"accept" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
607 | {{CDM::CLibrary, {"atoi" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
608 | {{CDM::CLibrary, {"atol" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
609 | {{CDM::CLibrary, {"atoll" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
610 | {{CDM::CLibrary, {"fgetc" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
611 | {{CDM::CLibrary, {"fgetln" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
612 | {{CDM::CLibraryMaybeHardened, {"fgets" }}, |
613 | TR::Prop(SrcArgs: {{2}}, DstArgs: {{0, ReturnValueIndex}})}, |
614 | {{CDM::CLibraryMaybeHardened, {"fgetws" }}, |
615 | TR::Prop(SrcArgs: {{2}}, DstArgs: {{0, ReturnValueIndex}})}, |
616 | {{CDM::CLibrary, {"fscanf" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
617 | {{CDM::CLibrary, {"fscanf_s" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
618 | {{CDM::CLibrary, {"sscanf" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
619 | {{CDM::CLibrary, {"sscanf_s" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
620 | |
621 | {{CDM::CLibrary, {"getc" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
622 | {{CDM::CLibrary, {"getc_unlocked" }}, |
623 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
624 | {{CDM::CLibrary, {"getdelim" }}, TR::Prop(SrcArgs: {{3}}, DstArgs: {{0}})}, |
625 | // TODO: this intends to match the C function `getline()`, but the call |
626 | // description also matches the C++ function `std::getline()`; it should |
627 | // be ruled out by some additional logic. |
628 | {{CDM::CLibrary, {"getline" }}, TR::Prop(SrcArgs: {{2}}, DstArgs: {{0}})}, |
629 | {{CDM::CLibrary, {"getw" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
630 | {{CDM::CLibraryMaybeHardened, {"pread" }}, |
631 | TR::Prop(SrcArgs: {{0, 1, 2, 3}}, DstArgs: {{1, ReturnValueIndex}})}, |
632 | {{CDM::CLibraryMaybeHardened, {"read" }}, |
633 | TR::Prop(SrcArgs: {{0, 2}}, DstArgs: {{1, ReturnValueIndex}})}, |
634 | {{CDM::CLibraryMaybeHardened, {"fread" }}, |
635 | TR::Prop(SrcArgs: {{3}}, DstArgs: {{0, ReturnValueIndex}})}, |
636 | {{CDM::CLibraryMaybeHardened, {"recv" }}, |
637 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
638 | {{CDM::CLibraryMaybeHardened, {"recvfrom" }}, |
639 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
640 | |
641 | {{CDM::CLibrary, {"ttyname" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
642 | {{CDM::CLibrary, {"ttyname_r" }}, |
643 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
644 | |
645 | {{CDM::CLibrary, {"basename" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
646 | {{CDM::CLibrary, {"dirname" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
647 | {{CDM::CLibrary, {"fnmatch" }}, TR::Prop(SrcArgs: {{1}}, DstArgs: {{ReturnValueIndex}})}, |
648 | |
649 | {{CDM::CLibrary, {"mbtowc" }}, TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
650 | {{CDM::CLibrary, {"wctomb" }}, TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
651 | {{CDM::CLibrary, {"wcwidth" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
652 | |
653 | {{CDM::CLibrary, {"memcmp" }}, |
654 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{ReturnValueIndex}})}, |
655 | {{CDM::CLibraryMaybeHardened, {"memcpy" }}, |
656 | TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
657 | {{CDM::CLibraryMaybeHardened, {"memmove" }}, |
658 | TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
659 | {{CDM::CLibraryMaybeHardened, {"bcopy" }}, TR::Prop(SrcArgs: {{0, 2}}, DstArgs: {{1}})}, |
660 | |
661 | // Note: "memmem" and its variants search for a byte sequence ("needle") |
662 | // in a larger area ("haystack"). Currently we only propagate taint from |
663 | // the haystack to the result, but in theory tampering with the needle |
664 | // could also produce incorrect results. |
665 | {{CDM::CLibrary, {"memmem" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
666 | {{CDM::CLibrary, {"strstr" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
667 | {{CDM::CLibrary, {"strcasestr" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
668 | |
669 | // Analogously, the following functions search for a byte within a buffer |
670 | // and we only propagate taint from the buffer to the result. |
671 | {{CDM::CLibraryMaybeHardened, {"memchr" }}, |
672 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
673 | {{CDM::CLibraryMaybeHardened, {"memrchr" }}, |
674 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
675 | {{CDM::CLibrary, {"rawmemchr" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
676 | {{CDM::CLibraryMaybeHardened, {"strchr" }}, |
677 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
678 | {{CDM::CLibraryMaybeHardened, {"strrchr" }}, |
679 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
680 | {{CDM::CLibraryMaybeHardened, {"strchrnul" }}, |
681 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
682 | {{CDM::CLibrary, {"index" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
683 | {{CDM::CLibrary, {"rindex" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
684 | |
685 | // FIXME: In case of arrays, only the first element of the array gets |
686 | // tainted. |
687 | {{CDM::CLibrary, {"qsort" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{0}})}, |
688 | {{CDM::CLibrary, {"qsort_r" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{0}})}, |
689 | |
690 | {{CDM::CLibrary, {"strcmp" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
691 | {{CDM::CLibrary, {"strcasecmp" }}, |
692 | TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
693 | {{CDM::CLibrary, {"strncmp" }}, |
694 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{ReturnValueIndex}})}, |
695 | {{CDM::CLibrary, {"strncasecmp" }}, |
696 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{ReturnValueIndex}})}, |
697 | {{CDM::CLibrary, {"strspn" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
698 | {{CDM::CLibrary, {"strcspn" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
699 | {{CDM::CLibrary, {"strpbrk" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
700 | |
701 | {{CDM::CLibrary, {"strndup" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
702 | {{CDM::CLibrary, {"strndupa" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
703 | {{CDM::CLibrary, {"strdup" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
704 | {{CDM::CLibrary, {"strdupa" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
705 | {{CDM::CLibrary, {"wcsdup" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
706 | |
707 | // strlen, wcslen, strnlen and alike intentionally don't propagate taint. |
708 | // See the details here: https://github.com/llvm/llvm-project/pull/66086 |
709 | |
710 | {{CDM::CLibrary, {"strtol" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
711 | {{CDM::CLibrary, {"strtoll" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
712 | {{CDM::CLibrary, {"strtoul" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
713 | {{CDM::CLibrary, {"strtoull" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
714 | |
715 | {{CDM::CLibrary, {"tolower" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
716 | {{CDM::CLibrary, {"toupper" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
717 | |
718 | {{CDM::CLibrary, {"isalnum" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
719 | {{CDM::CLibrary, {"isalpha" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
720 | {{CDM::CLibrary, {"isascii" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
721 | {{CDM::CLibrary, {"isblank" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
722 | {{CDM::CLibrary, {"iscntrl" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
723 | {{CDM::CLibrary, {"isdigit" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
724 | {{CDM::CLibrary, {"isgraph" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
725 | {{CDM::CLibrary, {"islower" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
726 | {{CDM::CLibrary, {"isprint" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
727 | {{CDM::CLibrary, {"ispunct" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
728 | {{CDM::CLibrary, {"isspace" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
729 | {{CDM::CLibrary, {"isupper" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
730 | {{CDM::CLibrary, {"isxdigit" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
731 | |
732 | {{CDM::CLibraryMaybeHardened, {"strcpy" }}, |
733 | TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
734 | {{CDM::CLibraryMaybeHardened, {"stpcpy" }}, |
735 | TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
736 | {{CDM::CLibraryMaybeHardened, {"strcat" }}, |
737 | TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{0, ReturnValueIndex}})}, |
738 | {{CDM::CLibraryMaybeHardened, {"wcsncat" }}, |
739 | TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{0, ReturnValueIndex}})}, |
740 | {{CDM::CLibraryMaybeHardened, {"strncpy" }}, |
741 | TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
742 | {{CDM::CLibraryMaybeHardened, {"strncat" }}, |
743 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
744 | {{CDM::CLibraryMaybeHardened, {"strlcpy" }}, TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0}})}, |
745 | {{CDM::CLibraryMaybeHardened, {"strlcat" }}, TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{0}})}, |
746 | |
747 | // Usually the matching mode `CDM::CLibraryMaybeHardened` is sufficient |
748 | // for unified handling of a function `FOO()` and its hardened variant |
749 | // `__FOO_chk()`, but in the "sprintf" family the extra parameters of the |
750 | // hardened variants are inserted into the middle of the parameter list, |
751 | // so that would not work in their case. |
752 | // int snprintf(char * str, size_t maxlen, const char * format, ...); |
753 | {{CDM::CLibrary, {"snprintf" }}, |
754 | TR::Prop(SrcArgs: {{1, 2}, 3}, DstArgs: {{0, ReturnValueIndex}})}, |
755 | // int sprintf(char * str, const char * format, ...); |
756 | {{CDM::CLibrary, {"sprintf" }}, |
757 | TR::Prop(SrcArgs: {{1}, 2}, DstArgs: {{0, ReturnValueIndex}})}, |
758 | // int __snprintf_chk(char * str, size_t maxlen, int flag, size_t strlen, |
759 | // const char * format, ...); |
760 | {{CDM::CLibrary, {"__snprintf_chk" }}, |
761 | TR::Prop(SrcArgs: {{1, 4}, 5}, DstArgs: {{0, ReturnValueIndex}})}, |
762 | // int __sprintf_chk(char * str, int flag, size_t strlen, const char * |
763 | // format, ...); |
764 | {{CDM::CLibrary, {"__sprintf_chk" }}, |
765 | TR::Prop(SrcArgs: {{3}, 4}, DstArgs: {{0, ReturnValueIndex}})}, |
766 | |
767 | // Sinks |
768 | {{CDM::CLibrary, {"system" }}, TR::Sink(SinkArgs: {{0}}, Msg: MsgSanitizeSystemArgs)}, |
769 | {{CDM::CLibrary, {"popen" }}, TR::Sink(SinkArgs: {{0}}, Msg: MsgSanitizeSystemArgs)}, |
770 | {{CDM::CLibrary, {"execl" }}, TR::Sink(SinkArgs: {{}, {0}}, Msg: MsgSanitizeSystemArgs)}, |
771 | {{CDM::CLibrary, {"execle" }}, TR::Sink(SinkArgs: {{}, {0}}, Msg: MsgSanitizeSystemArgs)}, |
772 | {{CDM::CLibrary, {"execlp" }}, TR::Sink(SinkArgs: {{}, {0}}, Msg: MsgSanitizeSystemArgs)}, |
773 | {{CDM::CLibrary, {"execv" }}, TR::Sink(SinkArgs: {{0, 1}}, Msg: MsgSanitizeSystemArgs)}, |
774 | {{CDM::CLibrary, {"execve" }}, |
775 | TR::Sink(SinkArgs: {{0, 1, 2}}, Msg: MsgSanitizeSystemArgs)}, |
776 | {{CDM::CLibrary, {"fexecve" }}, |
777 | TR::Sink(SinkArgs: {{0, 1, 2}}, Msg: MsgSanitizeSystemArgs)}, |
778 | {{CDM::CLibrary, {"execvp" }}, TR::Sink(SinkArgs: {{0, 1}}, Msg: MsgSanitizeSystemArgs)}, |
779 | {{CDM::CLibrary, {"execvpe" }}, |
780 | TR::Sink(SinkArgs: {{0, 1, 2}}, Msg: MsgSanitizeSystemArgs)}, |
781 | {{CDM::CLibrary, {"dlopen" }}, TR::Sink(SinkArgs: {{0}}, Msg: MsgSanitizeSystemArgs)}, |
782 | |
783 | // malloc, calloc, alloca, realloc, memccpy |
784 | // are intentionally not marked as taint sinks because unconditional |
785 | // reporting for these functions generates many false positives. |
786 | // These taint sinks should be implemented in other checkers with more |
787 | // sophisticated sanitation heuristics. |
788 | |
789 | {{CDM::CLibrary, {"setproctitle" }}, |
790 | TR::Sink(SinkArgs: {{0}, 1}, Msg: MsgUncontrolledFormatString)}, |
791 | {{CDM::CLibrary, {"setproctitle_fast" }}, |
792 | TR::Sink(SinkArgs: {{0}, 1}, Msg: MsgUncontrolledFormatString)}}; |
793 | |
794 | if (TR::UntrustedEnv(C)) { |
795 | // void setproctitle_init(int argc, char *argv[], char *envp[]) |
796 | // TODO: replace `MsgCustomSink` with a message that fits this situation. |
797 | GlobalCRules.push_back(x: {{CDM::CLibrary, {"setproctitle_init" }}, |
798 | TR::Sink(SinkArgs: {{1, 2}}, Msg: MsgCustomSink)}); |
799 | |
800 | // `getenv` returns taint only in untrusted environments. |
801 | GlobalCRules.push_back( |
802 | x: {{CDM::CLibrary, {"getenv" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}); |
803 | } |
804 | |
805 | StaticTaintRules.emplace(args: std::make_move_iterator(i: GlobalCRules.begin()), |
806 | args: std::make_move_iterator(i: GlobalCRules.end())); |
807 | |
808 | // User-provided taint configuration. |
809 | CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); |
810 | assert(Mgr); |
811 | GenericTaintRuleParser ConfigParser{*Mgr}; |
812 | std::string Option{"Config" }; |
813 | StringRef ConfigFile = |
814 | Mgr->getAnalyzerOptions().getCheckerStringOption(C: this, OptionName: Option); |
815 | std::optional<TaintConfiguration> Config = |
816 | getConfiguration<TaintConfiguration>(Mgr&: *Mgr, Chk: this, Option, ConfigFile); |
817 | if (!Config) { |
818 | // We don't have external taint config, no parsing required. |
819 | DynamicTaintRules = RuleLookupTy{}; |
820 | return; |
821 | } |
822 | |
823 | GenericTaintRuleParser::RulesContTy Rules{ |
824 | ConfigParser.parseConfiguration(Option, Config: std::move(*Config))}; |
825 | |
826 | DynamicTaintRules.emplace(args: std::make_move_iterator(i: Rules.begin()), |
827 | args: std::make_move_iterator(i: Rules.end())); |
828 | } |
829 | |
830 | void GenericTaintChecker::checkPreCall(const CallEvent &Call, |
831 | CheckerContext &C) const { |
832 | initTaintRules(C); |
833 | |
834 | // FIXME: this should be much simpler. |
835 | if (const auto *Rule = |
836 | Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) |
837 | Rule->process(Checker: *this, Call, C); |
838 | else if (const auto *Rule = DynamicTaintRules->lookup(Call)) |
839 | Rule->process(Checker: *this, Call, C); |
840 | |
841 | // FIXME: These edge cases are to be eliminated from here eventually. |
842 | // |
843 | // Additional check that is not supported by CallDescription. |
844 | // TODO: Make CallDescription be able to match attributes such as printf-like |
845 | // arguments. |
846 | checkUncontrolledFormatString(Call, C); |
847 | |
848 | // TODO: Modeling sockets should be done in a specific checker. |
849 | // Socket is a source, which taints the return value. |
850 | taintUnsafeSocketProtocol(Call, C); |
851 | } |
852 | |
853 | void GenericTaintChecker::checkPostCall(const CallEvent &Call, |
854 | CheckerContext &C) const { |
855 | // Set the marked values as tainted. The return value only accessible from |
856 | // checkPostStmt. |
857 | ProgramStateRef State = C.getState(); |
858 | const StackFrameContext *CurrentFrame = C.getStackFrame(); |
859 | |
860 | // Depending on what was tainted at pre-visit, we determined a set of |
861 | // arguments which should be tainted after the function returns. These are |
862 | // stored in the state as TaintArgsOnPostVisit set. |
863 | TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); |
864 | |
865 | const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(K: CurrentFrame); |
866 | if (!TaintArgs) |
867 | return; |
868 | assert(!TaintArgs->isEmpty()); |
869 | |
870 | LLVM_DEBUG(for (ArgIdxTy I |
871 | : *TaintArgs) { |
872 | llvm::dbgs() << "PostCall<" ; |
873 | Call.dump(llvm::dbgs()); |
874 | llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; |
875 | }); |
876 | |
877 | const NoteTag *InjectionTag = nullptr; |
878 | std::vector<SymbolRef> TaintedSymbols; |
879 | std::vector<ArgIdxTy> TaintedIndexes; |
880 | for (ArgIdxTy ArgNum : *TaintArgs) { |
881 | // Special handling for the tainted return value. |
882 | if (ArgNum == ReturnValueIndex) { |
883 | State = addTaint(State, V: Call.getReturnValue()); |
884 | std::vector<SymbolRef> TaintedSyms = |
885 | getTaintedSymbols(State, V: Call.getReturnValue()); |
886 | if (!TaintedSyms.empty()) { |
887 | TaintedSymbols.push_back(x: TaintedSyms[0]); |
888 | TaintedIndexes.push_back(x: ArgNum); |
889 | } |
890 | continue; |
891 | } |
892 | // The arguments are pointer arguments. The data they are pointing at is |
893 | // tainted after the call. |
894 | if (auto V = getPointeeOf(State, Arg: Call.getArgSVal(Index: ArgNum))) { |
895 | State = addTaint(State, V: *V); |
896 | std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, V: *V); |
897 | if (!TaintedSyms.empty()) { |
898 | TaintedSymbols.push_back(x: TaintedSyms[0]); |
899 | TaintedIndexes.push_back(x: ArgNum); |
900 | } |
901 | } |
902 | } |
903 | // Create a NoteTag callback, which prints to the user where the taintedness |
904 | // was propagated to. |
905 | InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedArgs: TaintedIndexes, |
906 | CallLocation: Call.getCalleeStackFrame(BlockCount: 0)); |
907 | // Clear up the taint info from the state. |
908 | State = State->remove<TaintArgsOnPostVisit>(K: CurrentFrame); |
909 | C.addTransition(State, Tag: InjectionTag); |
910 | } |
911 | |
912 | void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, |
913 | const char *NL, const char *Sep) const { |
914 | printTaint(State, Out, nl: NL, sep: Sep); |
915 | } |
916 | |
917 | void GenericTaintRule::process(const GenericTaintChecker &Checker, |
918 | const CallEvent &Call, CheckerContext &C) const { |
919 | ProgramStateRef State = C.getState(); |
920 | const ArgIdxTy CallNumArgs = fromArgumentCount(Count: Call.getNumArgs()); |
921 | |
922 | /// Iterate every call argument, and get their corresponding Expr and SVal. |
923 | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { |
924 | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { |
925 | const Expr *E = GetArgExpr(ArgIdx: I, Call); |
926 | Fun(I, E, C.getSVal(S: E)); |
927 | } |
928 | }; |
929 | |
930 | /// Check for taint sinks. |
931 | ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { |
932 | // Add taintedness to stdin parameters |
933 | if (isStdin(Val: C.getSVal(S: E), ACtx: C.getASTContext())) { |
934 | State = addTaint(State, V: C.getSVal(S: E)); |
935 | } |
936 | if (SinkArgs.contains(ArgIdx: I) && isTaintedOrPointsToTainted(State, ExprSVal: C.getSVal(S: E))) |
937 | Checker.generateReportIfTainted(E, Msg: SinkMsg.value_or(u: MsgCustomSink), C); |
938 | }); |
939 | |
940 | /// Check for taint filters. |
941 | ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) { |
942 | if (FilterArgs.contains(ArgIdx: I)) { |
943 | State = removeTaint(State, V: S); |
944 | if (auto P = getPointeeOf(State, Arg: S)) |
945 | State = removeTaint(State, V: *P); |
946 | } |
947 | }); |
948 | |
949 | /// Check for taint propagation sources. |
950 | /// A rule will make the destination variables tainted if PropSrcArgs |
951 | /// is empty (taints the destination |
952 | /// arguments unconditionally), or if any of its signified |
953 | /// args are tainted in context of the current CallEvent. |
954 | bool IsMatching = PropSrcArgs.isEmpty(); |
955 | std::vector<SymbolRef> TaintedSymbols; |
956 | std::vector<ArgIdxTy> TaintedIndexes; |
957 | ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols, |
958 | &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) { |
959 | std::optional<SVal> TaintedSVal = |
960 | getTaintedPointeeOrPointer(State, Arg: C.getSVal(S: E)); |
961 | IsMatching = |
962 | IsMatching || (PropSrcArgs.contains(ArgIdx: I) && TaintedSVal.has_value()); |
963 | |
964 | // We track back tainted arguments except for stdin |
965 | if (TaintedSVal && !isStdin(Val: *TaintedSVal, ACtx: C.getASTContext())) { |
966 | std::vector<SymbolRef> TaintedArgSyms = |
967 | getTaintedSymbols(State, V: *TaintedSVal); |
968 | if (!TaintedArgSyms.empty()) { |
969 | llvm::append_range(C&: TaintedSymbols, R&: TaintedArgSyms); |
970 | TaintedIndexes.push_back(x: I); |
971 | } |
972 | } |
973 | }); |
974 | |
975 | // Early return for propagation rules which dont match. |
976 | // Matching propagations, Sinks and Filters will pass this point. |
977 | if (!IsMatching) |
978 | return; |
979 | |
980 | const auto WouldEscape = [](SVal V, QualType Ty) -> bool { |
981 | if (!isa<Loc>(Val: V)) |
982 | return false; |
983 | |
984 | const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); |
985 | const bool IsNonConstPtr = |
986 | Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); |
987 | |
988 | return IsNonConstRef || IsNonConstPtr; |
989 | }; |
990 | |
991 | /// Propagate taint where it is necessary. |
992 | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
993 | ImmutableSet<ArgIdxTy> Result = F.getEmptySet(); |
994 | ForEachCallArg( |
995 | [&](ArgIdxTy I, const Expr *E, SVal V) { |
996 | if (PropDstArgs.contains(ArgIdx: I)) { |
997 | LLVM_DEBUG(llvm::dbgs() << "PreCall<" ; Call.dump(llvm::dbgs()); |
998 | llvm::dbgs() |
999 | << "> prepares tainting arg index: " << I << '\n';); |
1000 | Result = F.add(Old: Result, V: I); |
1001 | } |
1002 | |
1003 | // Taint property gets lost if the variable is passed as a |
1004 | // non-const pointer or reference to a function which is |
1005 | // not inlined. For matching rules we want to preserve the taintedness. |
1006 | // TODO: We should traverse all reachable memory regions via the |
1007 | // escaping parameter. Instead of doing that we simply mark only the |
1008 | // referred memory region as tainted. |
1009 | if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, Arg: V)) { |
1010 | LLVM_DEBUG(if (!Result.contains(I)) { |
1011 | llvm::dbgs() << "PreCall<" ; |
1012 | Call.dump(llvm::dbgs()); |
1013 | llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; |
1014 | }); |
1015 | Result = F.add(Old: Result, V: I); |
1016 | } |
1017 | }); |
1018 | |
1019 | if (!Result.isEmpty()) |
1020 | State = State->set<TaintArgsOnPostVisit>(K: C.getStackFrame(), E: Result); |
1021 | const NoteTag *InjectionTag = taintOriginTrackerTag( |
1022 | C, TaintedSymbols: std::move(TaintedSymbols), TaintedArgs: std::move(TaintedIndexes), |
1023 | CallLocation: Call.getCalleeStackFrame(BlockCount: 0)); |
1024 | C.addTransition(State, Tag: InjectionTag); |
1025 | } |
1026 | |
1027 | bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { |
1028 | return !C.getAnalysisManager() |
1029 | .getAnalyzerOptions() |
1030 | .ShouldAssumeControlledEnvironment; |
1031 | } |
1032 | |
1033 | bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, |
1034 | CheckerContext &C) const { |
1035 | assert(E); |
1036 | if (!isTaintReporterCheckerEnabled) |
1037 | return false; |
1038 | std::optional<SVal> TaintedSVal = |
1039 | getTaintedPointeeOrPointer(State: C.getState(), Arg: C.getSVal(S: E)); |
1040 | |
1041 | if (!TaintedSVal) |
1042 | return false; |
1043 | |
1044 | // Generate diagnostic. |
1045 | assert(BT); |
1046 | if (ExplodedNode *N = C.generateNonFatalErrorNode(State: C.getState())) { |
1047 | auto report = std::make_unique<PathSensitiveBugReport>(args: *BT, args&: Msg, args&: N); |
1048 | report->addRange(R: E->getSourceRange()); |
1049 | for (auto TaintedSym : getTaintedSymbols(State: C.getState(), V: *TaintedSVal)) { |
1050 | report->markInteresting(sym: TaintedSym); |
1051 | } |
1052 | C.emitReport(R: std::move(report)); |
1053 | return true; |
1054 | } |
1055 | return false; |
1056 | } |
1057 | |
1058 | /// TODO: remove checking for printf format attributes and socket whitelisting |
1059 | /// from GenericTaintChecker, and that means the following functions: |
1060 | /// getPrintfFormatArgumentNum, |
1061 | /// GenericTaintChecker::checkUncontrolledFormatString, |
1062 | /// GenericTaintChecker::taintUnsafeSocketProtocol |
1063 | |
1064 | static bool getPrintfFormatArgumentNum(const CallEvent &Call, |
1065 | const CheckerContext &C, |
1066 | ArgIdxTy &ArgNum) { |
1067 | // Find if the function contains a format string argument. |
1068 | // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, |
1069 | // vsnprintf, syslog, custom annotated functions. |
1070 | const Decl *CallDecl = Call.getDecl(); |
1071 | if (!CallDecl) |
1072 | return false; |
1073 | const FunctionDecl *FDecl = CallDecl->getAsFunction(); |
1074 | if (!FDecl) |
1075 | return false; |
1076 | |
1077 | const ArgIdxTy CallNumArgs = fromArgumentCount(Count: Call.getNumArgs()); |
1078 | |
1079 | for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { |
1080 | // The format attribute uses 1-based parameter indexing, for example |
1081 | // plain `printf(const char *fmt, ...)` would be annotated with |
1082 | // `__format__(__printf__, 1, 2)`, so we need to subtract 1 to get a |
1083 | // 0-based index. (This checker uses 0-based parameter indices.) |
1084 | ArgNum = Format->getFormatIdx() - 1; |
1085 | // The format attribute also counts the implicit `this` parameter of |
1086 | // methods, so e.g. in `SomeClass::method(const char *fmt, ...)` could be |
1087 | // annotated with `__format__(__printf__, 2, 3)`. This checker doesn't |
1088 | // count the implicit `this` parameter, so in this case we need to subtract |
1089 | // one again. |
1090 | // FIXME: Apparently the implementation of the format attribute doesn't |
1091 | // support methods with an explicit object parameter, so we cannot |
1092 | // implement proper support for that rare case either. |
1093 | const CXXMethodDecl *MDecl = dyn_cast<CXXMethodDecl>(Val: FDecl); |
1094 | if (MDecl && !MDecl->isStatic()) |
1095 | ArgNum--; |
1096 | |
1097 | if ((Format->getType()->getName() == "printf" ) && CallNumArgs > ArgNum) |
1098 | return true; |
1099 | } |
1100 | |
1101 | return false; |
1102 | } |
1103 | |
1104 | bool GenericTaintChecker::checkUncontrolledFormatString( |
1105 | const CallEvent &Call, CheckerContext &C) const { |
1106 | // Check if the function contains a format string argument. |
1107 | ArgIdxTy ArgNum = 0; |
1108 | if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) |
1109 | return false; |
1110 | |
1111 | // If either the format string content or the pointer itself are tainted, |
1112 | // warn. |
1113 | return generateReportIfTainted(E: Call.getArgExpr(Index: ArgNum), |
1114 | Msg: MsgUncontrolledFormatString, C); |
1115 | } |
1116 | |
1117 | void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, |
1118 | CheckerContext &C) const { |
1119 | if (Call.getNumArgs() < 1) |
1120 | return; |
1121 | const IdentifierInfo *ID = Call.getCalleeIdentifier(); |
1122 | if (!ID) |
1123 | return; |
1124 | if (ID->getName() != "socket" ) |
1125 | return; |
1126 | |
1127 | SourceLocation DomLoc = Call.getArgExpr(Index: 0)->getExprLoc(); |
1128 | StringRef DomName = C.getMacroNameOrSpelling(Loc&: DomLoc); |
1129 | // Allow internal communication protocols. |
1130 | bool SafeProtocol = DomName == "AF_SYSTEM" || DomName == "AF_LOCAL" || |
1131 | DomName == "AF_UNIX" || DomName == "AF_RESERVED_36" ; |
1132 | if (SafeProtocol) |
1133 | return; |
1134 | |
1135 | ProgramStateRef State = C.getState(); |
1136 | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
1137 | ImmutableSet<ArgIdxTy> Result = F.add(Old: F.getEmptySet(), V: ReturnValueIndex); |
1138 | State = State->set<TaintArgsOnPostVisit>(K: C.getStackFrame(), E: Result); |
1139 | C.addTransition(State); |
1140 | } |
1141 | |
1142 | /// Checker registration |
1143 | void ento::registerTaintPropagationChecker(CheckerManager &Mgr) { |
1144 | Mgr.registerChecker<GenericTaintChecker>(); |
1145 | } |
1146 | |
1147 | bool ento::shouldRegisterTaintPropagationChecker(const CheckerManager &mgr) { |
1148 | return true; |
1149 | } |
1150 | |
1151 | void ento::registerGenericTaintChecker(CheckerManager &Mgr) { |
1152 | GenericTaintChecker *checker = Mgr.getChecker<GenericTaintChecker>(); |
1153 | checker->isTaintReporterCheckerEnabled = true; |
1154 | checker->BT.emplace(args: Mgr.getCurrentCheckerName(), args: "Use of Untrusted Data" , |
1155 | args: categories::TaintedData); |
1156 | } |
1157 | |
1158 | bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { |
1159 | return true; |
1160 | } |
1161 | |