1 | //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This checker defines the attack surface for generic taint propagation. |
10 | // |
11 | // The taint information produced by it might be useful to other checkers. For |
12 | // example, checkers should report errors which involve tainted data more |
13 | // aggressively, even if the involved symbols are under constrained. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "Yaml.h" |
18 | #include "clang/AST/Attr.h" |
19 | #include "clang/Basic/Builtins.h" |
20 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
21 | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
22 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
23 | #include "clang/StaticAnalyzer/Core/Checker.h" |
24 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
25 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
26 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
27 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
28 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
29 | #include "llvm/ADT/StringExtras.h" |
30 | #include "llvm/ADT/StringRef.h" |
31 | #include "llvm/Support/YAMLTraits.h" |
32 | |
33 | #include <limits> |
34 | #include <memory> |
35 | #include <optional> |
36 | #include <utility> |
37 | #include <vector> |
38 | |
39 | #define DEBUG_TYPE "taint-checker" |
40 | |
41 | using namespace clang; |
42 | using namespace ento; |
43 | using namespace taint; |
44 | |
45 | using llvm::ImmutableSet; |
46 | |
47 | namespace { |
48 | |
49 | class GenericTaintChecker; |
50 | |
51 | /// Check for CWE-134: Uncontrolled Format String. |
52 | constexpr llvm::StringLiteral MsgUncontrolledFormatString = |
53 | "Untrusted data is used as a format string " |
54 | "(CWE-134: Uncontrolled Format String)" ; |
55 | |
56 | /// Check for: |
57 | /// CERT/STR02-C. "Sanitize data passed to complex subsystems" |
58 | /// CWE-78, "Failure to Sanitize Data into an OS Command" |
59 | constexpr llvm::StringLiteral MsgSanitizeSystemArgs = |
60 | "Untrusted data is passed to a system call " |
61 | "(CERT/STR02-C. Sanitize data passed to complex subsystems)" ; |
62 | |
63 | /// Check if tainted data is used as a custom sink's parameter. |
64 | constexpr llvm::StringLiteral MsgCustomSink = |
65 | "Untrusted data is passed to a user-defined sink" ; |
66 | |
67 | using ArgIdxTy = int; |
68 | using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; |
69 | |
70 | /// Denotes the return value. |
71 | constexpr ArgIdxTy ReturnValueIndex{-1}; |
72 | |
73 | static ArgIdxTy fromArgumentCount(unsigned Count) { |
74 | assert(Count <= |
75 | static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && |
76 | "ArgIdxTy is not large enough to represent the number of arguments." ); |
77 | return Count; |
78 | } |
79 | |
80 | /// Check if the region the expression evaluates to is the standard input, |
81 | /// and thus, is tainted. |
82 | /// FIXME: Move this to Taint.cpp. |
83 | bool isStdin(SVal Val, const ASTContext &ACtx) { |
84 | // FIXME: What if Val is NonParamVarRegion? |
85 | |
86 | // The region should be symbolic, we do not know it's value. |
87 | const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val: Val.getAsRegion()); |
88 | if (!SymReg) |
89 | return false; |
90 | |
91 | // Get it's symbol and find the declaration region it's pointing to. |
92 | const auto *DeclReg = |
93 | dyn_cast_or_null<DeclRegion>(Val: SymReg->getSymbol()->getOriginRegion()); |
94 | if (!DeclReg) |
95 | return false; |
96 | |
97 | // This region corresponds to a declaration, find out if it's a global/extern |
98 | // variable named stdin with the proper type. |
99 | if (const auto *D = dyn_cast_or_null<VarDecl>(Val: DeclReg->getDecl())) { |
100 | D = D->getCanonicalDecl(); |
101 | if (D->getName() == "stdin" && D->hasExternalStorage() && D->isExternC()) { |
102 | const QualType FILETy = ACtx.getFILEType().getCanonicalType(); |
103 | const QualType Ty = D->getType().getCanonicalType(); |
104 | |
105 | if (Ty->isPointerType()) |
106 | return Ty->getPointeeType() == FILETy; |
107 | } |
108 | } |
109 | return false; |
110 | } |
111 | |
112 | SVal getPointeeOf(ProgramStateRef State, Loc LValue) { |
113 | const QualType ArgTy = LValue.getType(State->getStateManager().getContext()); |
114 | if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) |
115 | return State->getSVal(LV: LValue); |
116 | |
117 | // Do not dereference void pointers. Treat them as byte pointers instead. |
118 | // FIXME: we might want to consider more than just the first byte. |
119 | return State->getSVal(LV: LValue, T: State->getStateManager().getContext().CharTy); |
120 | } |
121 | |
122 | /// Given a pointer/reference argument, return the value it refers to. |
123 | std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) { |
124 | if (auto LValue = Arg.getAs<Loc>()) |
125 | return getPointeeOf(State, LValue: *LValue); |
126 | return std::nullopt; |
127 | } |
128 | |
129 | /// Given a pointer, return the SVal of its pointee or if it is tainted, |
130 | /// otherwise return the pointer's SVal if tainted. |
131 | /// Also considers stdin as a taint source. |
132 | std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State, |
133 | SVal Arg) { |
134 | if (auto Pointee = getPointeeOf(State, Arg)) |
135 | if (isTainted(State, V: *Pointee)) // FIXME: isTainted(...) ? Pointee : None; |
136 | return Pointee; |
137 | |
138 | if (isTainted(State, V: Arg)) |
139 | return Arg; |
140 | return std::nullopt; |
141 | } |
142 | |
143 | bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) { |
144 | return getTaintedPointeeOrPointer(State, Arg: ExprSVal).has_value(); |
145 | } |
146 | |
147 | /// Helps in printing taint diagnostics. |
148 | /// Marks the incoming parameters of a function interesting (to be printed) |
149 | /// when the return value, or the outgoing parameters are tainted. |
150 | const NoteTag *taintOriginTrackerTag(CheckerContext &C, |
151 | std::vector<SymbolRef> TaintedSymbols, |
152 | std::vector<ArgIdxTy> TaintedArgs, |
153 | const LocationContext *CallLocation) { |
154 | return C.getNoteTag(Cb: [TaintedSymbols = std::move(TaintedSymbols), |
155 | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
156 | PathSensitiveBugReport &BR) -> std::string { |
157 | SmallString<256> Msg; |
158 | // We give diagnostics only for taint related reports |
159 | if (!BR.isInteresting(LC: CallLocation) || |
160 | BR.getBugType().getCategory() != categories::TaintedData) { |
161 | return "" ; |
162 | } |
163 | if (TaintedSymbols.empty()) |
164 | return "Taint originated here" ; |
165 | |
166 | for (auto Sym : TaintedSymbols) { |
167 | BR.markInteresting(sym: Sym); |
168 | } |
169 | LLVM_DEBUG(for (auto Arg |
170 | : TaintedArgs) { |
171 | llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n" ; |
172 | }); |
173 | return "" ; |
174 | }); |
175 | } |
176 | |
177 | /// Helps in printing taint diagnostics. |
178 | /// Marks the function interesting (to be printed) |
179 | /// when the return value, or the outgoing parameters are tainted. |
180 | const NoteTag *taintPropagationExplainerTag( |
181 | CheckerContext &C, std::vector<SymbolRef> TaintedSymbols, |
182 | std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) { |
183 | assert(TaintedSymbols.size() == TaintedArgs.size()); |
184 | return C.getNoteTag(Cb: [TaintedSymbols = std::move(TaintedSymbols), |
185 | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
186 | PathSensitiveBugReport &BR) -> std::string { |
187 | SmallString<256> Msg; |
188 | llvm::raw_svector_ostream Out(Msg); |
189 | // We give diagnostics only for taint related reports |
190 | if (TaintedSymbols.empty() || |
191 | BR.getBugType().getCategory() != categories::TaintedData) { |
192 | return "" ; |
193 | } |
194 | int nofTaintedArgs = 0; |
195 | for (auto [Idx, Sym] : llvm::enumerate(First: TaintedSymbols)) { |
196 | if (BR.isInteresting(sym: Sym)) { |
197 | BR.markInteresting(LC: CallLocation); |
198 | if (TaintedArgs[Idx] != ReturnValueIndex) { |
199 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument " |
200 | << TaintedArgs[Idx] + 1 << "\n" ); |
201 | if (nofTaintedArgs == 0) |
202 | Out << "Taint propagated to the " ; |
203 | else |
204 | Out << ", " ; |
205 | Out << TaintedArgs[Idx] + 1 |
206 | << llvm::getOrdinalSuffix(Val: TaintedArgs[Idx] + 1) << " argument" ; |
207 | nofTaintedArgs++; |
208 | } else { |
209 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n" ); |
210 | Out << "Taint propagated to the return value" ; |
211 | } |
212 | } |
213 | } |
214 | return std::string(Out.str()); |
215 | }); |
216 | } |
217 | |
218 | /// ArgSet is used to describe arguments relevant for taint detection or |
219 | /// taint application. A discrete set of argument indexes and a variadic |
220 | /// argument list signified by a starting index are supported. |
221 | class ArgSet { |
222 | public: |
223 | ArgSet() = default; |
224 | ArgSet(ArgVecTy &&DiscreteArgs, |
225 | std::optional<ArgIdxTy> VariadicIndex = std::nullopt) |
226 | : DiscreteArgs(std::move(DiscreteArgs)), |
227 | VariadicIndex(std::move(VariadicIndex)) {} |
228 | |
229 | bool contains(ArgIdxTy ArgIdx) const { |
230 | if (llvm::is_contained(Range: DiscreteArgs, Element: ArgIdx)) |
231 | return true; |
232 | |
233 | return VariadicIndex && ArgIdx >= *VariadicIndex; |
234 | } |
235 | |
236 | bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } |
237 | |
238 | private: |
239 | ArgVecTy DiscreteArgs; |
240 | std::optional<ArgIdxTy> VariadicIndex; |
241 | }; |
242 | |
243 | /// A struct used to specify taint propagation rules for a function. |
244 | /// |
245 | /// If any of the possible taint source arguments is tainted, all of the |
246 | /// destination arguments should also be tainted. If ReturnValueIndex is added |
247 | /// to the dst list, the return value will be tainted. |
248 | class GenericTaintRule { |
249 | /// Arguments which are taints sinks and should be checked, and a report |
250 | /// should be emitted if taint reaches these. |
251 | ArgSet SinkArgs; |
252 | /// Arguments which should be sanitized on function return. |
253 | ArgSet FilterArgs; |
254 | /// Arguments which can participate in taint propagation. If any of the |
255 | /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should |
256 | /// be tainted. |
257 | ArgSet PropSrcArgs; |
258 | ArgSet PropDstArgs; |
259 | |
260 | /// A message that explains why the call is sensitive to taint. |
261 | std::optional<StringRef> SinkMsg; |
262 | |
263 | GenericTaintRule() = default; |
264 | |
265 | GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, |
266 | std::optional<StringRef> SinkMsg = std::nullopt) |
267 | : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), |
268 | PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), |
269 | SinkMsg(SinkMsg) {} |
270 | |
271 | public: |
272 | /// Make a rule that reports a warning if taint reaches any of \p FilterArgs |
273 | /// arguments. |
274 | static GenericTaintRule Sink(ArgSet &&SinkArgs, |
275 | std::optional<StringRef> Msg = std::nullopt) { |
276 | return {std::move(SinkArgs), {}, {}, {}, Msg}; |
277 | } |
278 | |
279 | /// Make a rule that sanitizes all FilterArgs arguments. |
280 | static GenericTaintRule Filter(ArgSet &&FilterArgs) { |
281 | return {{}, std::move(FilterArgs), {}, {}}; |
282 | } |
283 | |
284 | /// Make a rule that unconditionally taints all Args. |
285 | /// If Func is provided, it must also return true for taint to propagate. |
286 | static GenericTaintRule Source(ArgSet &&SourceArgs) { |
287 | return {{}, {}, {}, std::move(SourceArgs)}; |
288 | } |
289 | |
290 | /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. |
291 | static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { |
292 | return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; |
293 | } |
294 | |
295 | /// Process a function which could either be a taint source, a taint sink, a |
296 | /// taint filter or a taint propagator. |
297 | void process(const GenericTaintChecker &Checker, const CallEvent &Call, |
298 | CheckerContext &C) const; |
299 | |
300 | /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. |
301 | static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { |
302 | return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() |
303 | : Call.getArgExpr(Index: ArgIdx); |
304 | }; |
305 | |
306 | /// Functions for custom taintedness propagation. |
307 | static bool UntrustedEnv(CheckerContext &C); |
308 | }; |
309 | |
310 | using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; |
311 | |
312 | /// Used to parse the configuration file. |
313 | struct TaintConfiguration { |
314 | using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; |
315 | enum class VariadicType { None, Src, Dst }; |
316 | |
317 | struct Common { |
318 | std::string Name; |
319 | std::string Scope; |
320 | }; |
321 | |
322 | struct Sink : Common { |
323 | ArgVecTy SinkArgs; |
324 | }; |
325 | |
326 | struct Filter : Common { |
327 | ArgVecTy FilterArgs; |
328 | }; |
329 | |
330 | struct Propagation : Common { |
331 | ArgVecTy SrcArgs; |
332 | ArgVecTy DstArgs; |
333 | VariadicType VarType; |
334 | ArgIdxTy VarIndex; |
335 | }; |
336 | |
337 | std::vector<Propagation> Propagations; |
338 | std::vector<Filter> Filters; |
339 | std::vector<Sink> Sinks; |
340 | |
341 | TaintConfiguration() = default; |
342 | TaintConfiguration(const TaintConfiguration &) = default; |
343 | TaintConfiguration(TaintConfiguration &&) = default; |
344 | TaintConfiguration &operator=(const TaintConfiguration &) = default; |
345 | TaintConfiguration &operator=(TaintConfiguration &&) = default; |
346 | }; |
347 | |
348 | struct GenericTaintRuleParser { |
349 | GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} |
350 | /// Container type used to gather call identification objects grouped into |
351 | /// pairs with their corresponding taint rules. It is temporary as it is used |
352 | /// to finally initialize RuleLookupTy, which is considered to be immutable. |
353 | using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; |
354 | RulesContTy parseConfiguration(const std::string &Option, |
355 | TaintConfiguration &&Config) const; |
356 | |
357 | private: |
358 | using NamePartsTy = llvm::SmallVector<StringRef, 2>; |
359 | |
360 | /// Validate part of the configuration, which contains a list of argument |
361 | /// indexes. |
362 | void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; |
363 | |
364 | template <typename Config> static NamePartsTy parseNameParts(const Config &C); |
365 | |
366 | // Takes the config and creates a CallDescription for it and associates a Rule |
367 | // with that. |
368 | template <typename Config> |
369 | static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, |
370 | RulesContTy &Rules); |
371 | |
372 | void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, |
373 | RulesContTy &Rules) const; |
374 | void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, |
375 | RulesContTy &Rules) const; |
376 | void parseConfig(const std::string &Option, |
377 | TaintConfiguration::Propagation &&P, |
378 | RulesContTy &Rules) const; |
379 | |
380 | CheckerManager &Mgr; |
381 | }; |
382 | |
383 | class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { |
384 | public: |
385 | void checkPreCall(const CallEvent &Call, CheckerContext &C) const; |
386 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
387 | |
388 | void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, |
389 | const char *Sep) const override; |
390 | |
391 | /// Generate a report if the expression is tainted or points to tainted data. |
392 | bool generateReportIfTainted(const Expr *E, StringRef Msg, |
393 | CheckerContext &C) const; |
394 | |
395 | bool isTaintReporterCheckerEnabled = false; |
396 | std::optional<BugType> BT; |
397 | |
398 | private: |
399 | bool checkUncontrolledFormatString(const CallEvent &Call, |
400 | CheckerContext &C) const; |
401 | |
402 | void taintUnsafeSocketProtocol(const CallEvent &Call, |
403 | CheckerContext &C) const; |
404 | |
405 | /// The taint rules are initalized with the help of a CheckerContext to |
406 | /// access user-provided configuration. |
407 | void initTaintRules(CheckerContext &C) const; |
408 | |
409 | // TODO: The two separate `CallDescriptionMap`s were introduced when |
410 | // `CallDescription` was unable to restrict matches to the global namespace |
411 | // only. This limitation no longer exists, so the following two maps should |
412 | // be unified. |
413 | mutable std::optional<RuleLookupTy> StaticTaintRules; |
414 | mutable std::optional<RuleLookupTy> DynamicTaintRules; |
415 | }; |
416 | } // end of anonymous namespace |
417 | |
418 | /// YAML serialization mapping. |
419 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) |
420 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) |
421 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) |
422 | |
423 | namespace llvm { |
424 | namespace yaml { |
425 | template <> struct MappingTraits<TaintConfiguration> { |
426 | static void mapping(IO &IO, TaintConfiguration &Config) { |
427 | IO.mapOptional(Key: "Propagations" , Val&: Config.Propagations); |
428 | IO.mapOptional(Key: "Filters" , Val&: Config.Filters); |
429 | IO.mapOptional(Key: "Sinks" , Val&: Config.Sinks); |
430 | } |
431 | }; |
432 | |
433 | template <> struct MappingTraits<TaintConfiguration::Sink> { |
434 | static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { |
435 | IO.mapRequired(Key: "Name" , Val&: Sink.Name); |
436 | IO.mapOptional(Key: "Scope" , Val&: Sink.Scope); |
437 | IO.mapRequired(Key: "Args" , Val&: Sink.SinkArgs); |
438 | } |
439 | }; |
440 | |
441 | template <> struct MappingTraits<TaintConfiguration::Filter> { |
442 | static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { |
443 | IO.mapRequired(Key: "Name" , Val&: Filter.Name); |
444 | IO.mapOptional(Key: "Scope" , Val&: Filter.Scope); |
445 | IO.mapRequired(Key: "Args" , Val&: Filter.FilterArgs); |
446 | } |
447 | }; |
448 | |
449 | template <> struct MappingTraits<TaintConfiguration::Propagation> { |
450 | static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { |
451 | IO.mapRequired(Key: "Name" , Val&: Propagation.Name); |
452 | IO.mapOptional(Key: "Scope" , Val&: Propagation.Scope); |
453 | IO.mapOptional(Key: "SrcArgs" , Val&: Propagation.SrcArgs); |
454 | IO.mapOptional(Key: "DstArgs" , Val&: Propagation.DstArgs); |
455 | IO.mapOptional(Key: "VariadicType" , Val&: Propagation.VarType); |
456 | IO.mapOptional(Key: "VariadicIndex" , Val&: Propagation.VarIndex); |
457 | } |
458 | }; |
459 | |
460 | template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { |
461 | static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { |
462 | IO.enumCase(Val&: Value, Str: "None" , ConstVal: TaintConfiguration::VariadicType::None); |
463 | IO.enumCase(Val&: Value, Str: "Src" , ConstVal: TaintConfiguration::VariadicType::Src); |
464 | IO.enumCase(Val&: Value, Str: "Dst" , ConstVal: TaintConfiguration::VariadicType::Dst); |
465 | } |
466 | }; |
467 | } // namespace yaml |
468 | } // namespace llvm |
469 | |
470 | /// A set which is used to pass information from call pre-visit instruction |
471 | /// to the call post-visit. The values are signed integers, which are either |
472 | /// ReturnValueIndex, or indexes of the pointer/reference argument, which |
473 | /// points to data, which should be tainted on return. |
474 | REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, |
475 | ImmutableSet<ArgIdxTy>) |
476 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) |
477 | |
478 | void GenericTaintRuleParser::validateArgVector(const std::string &Option, |
479 | const ArgVecTy &Args) const { |
480 | for (ArgIdxTy Arg : Args) { |
481 | if (Arg < ReturnValueIndex) { |
482 | Mgr.reportInvalidCheckerOptionValue( |
483 | C: Mgr.getChecker<GenericTaintChecker>(), OptionName: Option, |
484 | ExpectedValueDesc: "an argument number for propagation rules greater or equal to -1" ); |
485 | } |
486 | } |
487 | } |
488 | |
489 | template <typename Config> |
490 | GenericTaintRuleParser::NamePartsTy |
491 | GenericTaintRuleParser::parseNameParts(const Config &C) { |
492 | NamePartsTy NameParts; |
493 | if (!C.Scope.empty()) { |
494 | // If the Scope argument contains multiple "::" parts, those are considered |
495 | // namespace identifiers. |
496 | StringRef{C.Scope}.split(A&: NameParts, Separator: "::" , /*MaxSplit*/ -1, |
497 | /*KeepEmpty*/ false); |
498 | } |
499 | NameParts.emplace_back(C.Name); |
500 | return NameParts; |
501 | } |
502 | |
503 | template <typename Config> |
504 | void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, |
505 | GenericTaintRule &&Rule, |
506 | RulesContTy &Rules) { |
507 | NamePartsTy NameParts = parseNameParts(C); |
508 | Rules.emplace_back(args: CallDescription(CDM::Unspecified, NameParts), |
509 | args: std::move(Rule)); |
510 | } |
511 | |
512 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
513 | TaintConfiguration::Sink &&S, |
514 | RulesContTy &Rules) const { |
515 | validateArgVector(Option, Args: S.SinkArgs); |
516 | consumeRulesFromConfig(C: S, Rule: GenericTaintRule::Sink(SinkArgs: std::move(S.SinkArgs)), |
517 | Rules); |
518 | } |
519 | |
520 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
521 | TaintConfiguration::Filter &&S, |
522 | RulesContTy &Rules) const { |
523 | validateArgVector(Option, Args: S.FilterArgs); |
524 | consumeRulesFromConfig(C: S, Rule: GenericTaintRule::Filter(FilterArgs: std::move(S.FilterArgs)), |
525 | Rules); |
526 | } |
527 | |
528 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
529 | TaintConfiguration::Propagation &&P, |
530 | RulesContTy &Rules) const { |
531 | validateArgVector(Option, Args: P.SrcArgs); |
532 | validateArgVector(Option, Args: P.DstArgs); |
533 | bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; |
534 | bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; |
535 | std::optional<ArgIdxTy> JustVarIndex = P.VarIndex; |
536 | |
537 | ArgSet SrcDesc(std::move(P.SrcArgs), |
538 | IsSrcVariadic ? JustVarIndex : std::nullopt); |
539 | ArgSet DstDesc(std::move(P.DstArgs), |
540 | IsDstVariadic ? JustVarIndex : std::nullopt); |
541 | |
542 | consumeRulesFromConfig( |
543 | C: P, Rule: GenericTaintRule::Prop(SrcArgs: std::move(SrcDesc), DstArgs: std::move(DstDesc)), Rules); |
544 | } |
545 | |
546 | GenericTaintRuleParser::RulesContTy |
547 | GenericTaintRuleParser::parseConfiguration(const std::string &Option, |
548 | TaintConfiguration &&Config) const { |
549 | |
550 | RulesContTy Rules; |
551 | |
552 | for (auto &F : Config.Filters) |
553 | parseConfig(Option, S: std::move(F), Rules); |
554 | |
555 | for (auto &S : Config.Sinks) |
556 | parseConfig(Option, S: std::move(S), Rules); |
557 | |
558 | for (auto &P : Config.Propagations) |
559 | parseConfig(Option, P: std::move(P), Rules); |
560 | |
561 | return Rules; |
562 | } |
563 | |
564 | void GenericTaintChecker::initTaintRules(CheckerContext &C) const { |
565 | // Check for exact name match for functions without builtin substitutes. |
566 | // Use qualified name, because these are C functions without namespace. |
567 | |
568 | if (StaticTaintRules || DynamicTaintRules) |
569 | return; |
570 | |
571 | using RulesConstructionTy = |
572 | std::vector<std::pair<CallDescription, GenericTaintRule>>; |
573 | using TR = GenericTaintRule; |
574 | |
575 | RulesConstructionTy GlobalCRules{ |
576 | // Sources |
577 | {{CDM::CLibrary, {"fdopen" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
578 | {{CDM::CLibrary, {"fopen" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
579 | {{CDM::CLibrary, {"freopen" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
580 | {{CDM::CLibrary, {"getch" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
581 | {{CDM::CLibrary, {"getchar" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
582 | {{CDM::CLibrary, {"getchar_unlocked" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
583 | {{CDM::CLibrary, {"gets" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
584 | {{CDM::CLibrary, {"gets_s" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
585 | {{CDM::CLibrary, {"scanf" }}, TR::Source(SourceArgs: {{}, 1})}, |
586 | {{CDM::CLibrary, {"scanf_s" }}, TR::Source(SourceArgs: {{}, 1})}, |
587 | {{CDM::CLibrary, {"wgetch" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
588 | // Sometimes the line between taint sources and propagators is blurry. |
589 | // _IO_getc is choosen to be a source, but could also be a propagator. |
590 | // This way it is simpler, as modeling it as a propagator would require |
591 | // to model the possible sources of _IO_FILE * values, which the _IO_getc |
592 | // function takes as parameters. |
593 | {{CDM::CLibrary, {"_IO_getc" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
594 | {{CDM::CLibrary, {"getcwd" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
595 | {{CDM::CLibrary, {"getwd" }}, TR::Source(SourceArgs: {{0, ReturnValueIndex}})}, |
596 | {{CDM::CLibrary, {"readlink" }}, TR::Source(SourceArgs: {{1, ReturnValueIndex}})}, |
597 | {{CDM::CLibrary, {"readlinkat" }}, TR::Source(SourceArgs: {{2, ReturnValueIndex}})}, |
598 | {{CDM::CLibrary, {"get_current_dir_name" }}, |
599 | TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
600 | {{CDM::CLibrary, {"gethostname" }}, TR::Source(SourceArgs: {{0}})}, |
601 | {{CDM::CLibrary, {"getnameinfo" }}, TR::Source(SourceArgs: {{2, 4}})}, |
602 | {{CDM::CLibrary, {"getseuserbyname" }}, TR::Source(SourceArgs: {{1, 2}})}, |
603 | {{CDM::CLibrary, {"getgroups" }}, TR::Source(SourceArgs: {{1, ReturnValueIndex}})}, |
604 | {{CDM::CLibrary, {"getlogin" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}, |
605 | {{CDM::CLibrary, {"getlogin_r" }}, TR::Source(SourceArgs: {{0}})}, |
606 | |
607 | // Props |
608 | {{CDM::CLibrary, {"accept" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
609 | {{CDM::CLibrary, {"atoi" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
610 | {{CDM::CLibrary, {"atol" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
611 | {{CDM::CLibrary, {"atoll" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
612 | {{CDM::CLibrary, {"fgetc" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
613 | {{CDM::CLibrary, {"fgetln" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
614 | {{CDM::CLibraryMaybeHardened, {"fgets" }}, |
615 | TR::Prop(SrcArgs: {{2}}, DstArgs: {{0, ReturnValueIndex}})}, |
616 | {{CDM::CLibraryMaybeHardened, {"fgetws" }}, |
617 | TR::Prop(SrcArgs: {{2}}, DstArgs: {{0, ReturnValueIndex}})}, |
618 | {{CDM::CLibrary, {"fscanf" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
619 | {{CDM::CLibrary, {"fscanf_s" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
620 | {{CDM::CLibrary, {"sscanf" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
621 | {{CDM::CLibrary, {"sscanf_s" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{}, 2})}, |
622 | |
623 | {{CDM::CLibrary, {"getc" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
624 | {{CDM::CLibrary, {"getc_unlocked" }}, |
625 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
626 | {{CDM::CLibrary, {"getdelim" }}, TR::Prop(SrcArgs: {{3}}, DstArgs: {{0}})}, |
627 | // TODO: this intends to match the C function `getline()`, but the call |
628 | // description also matches the C++ function `std::getline()`; it should |
629 | // be ruled out by some additional logic. |
630 | {{CDM::CLibrary, {"getline" }}, TR::Prop(SrcArgs: {{2}}, DstArgs: {{0}})}, |
631 | {{CDM::CLibrary, {"getw" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
632 | {{CDM::CLibraryMaybeHardened, {"pread" }}, |
633 | TR::Prop(SrcArgs: {{0, 1, 2, 3}}, DstArgs: {{1, ReturnValueIndex}})}, |
634 | {{CDM::CLibraryMaybeHardened, {"read" }}, |
635 | TR::Prop(SrcArgs: {{0, 2}}, DstArgs: {{1, ReturnValueIndex}})}, |
636 | {{CDM::CLibraryMaybeHardened, {"fread" }}, |
637 | TR::Prop(SrcArgs: {{3}}, DstArgs: {{0, ReturnValueIndex}})}, |
638 | {{CDM::CLibraryMaybeHardened, {"recv" }}, |
639 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
640 | {{CDM::CLibraryMaybeHardened, {"recvfrom" }}, |
641 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
642 | |
643 | {{CDM::CLibrary, {"ttyname" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
644 | {{CDM::CLibrary, {"ttyname_r" }}, |
645 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
646 | |
647 | {{CDM::CLibrary, {"basename" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
648 | {{CDM::CLibrary, {"dirname" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
649 | {{CDM::CLibrary, {"fnmatch" }}, TR::Prop(SrcArgs: {{1}}, DstArgs: {{ReturnValueIndex}})}, |
650 | |
651 | {{CDM::CLibrary, {"mbtowc" }}, TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
652 | {{CDM::CLibrary, {"wctomb" }}, TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
653 | {{CDM::CLibrary, {"wcwidth" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
654 | |
655 | {{CDM::CLibrary, {"memcmp" }}, |
656 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{ReturnValueIndex}})}, |
657 | {{CDM::CLibraryMaybeHardened, {"memcpy" }}, |
658 | TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
659 | {{CDM::CLibraryMaybeHardened, {"memmove" }}, |
660 | TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
661 | {{CDM::CLibraryMaybeHardened, {"bcopy" }}, TR::Prop(SrcArgs: {{0, 2}}, DstArgs: {{1}})}, |
662 | |
663 | // Note: "memmem" and its variants search for a byte sequence ("needle") |
664 | // in a larger area ("haystack"). Currently we only propagate taint from |
665 | // the haystack to the result, but in theory tampering with the needle |
666 | // could also produce incorrect results. |
667 | {{CDM::CLibrary, {"memmem" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
668 | {{CDM::CLibrary, {"strstr" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
669 | {{CDM::CLibrary, {"strcasestr" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
670 | |
671 | // Analogously, the following functions search for a byte within a buffer |
672 | // and we only propagate taint from the buffer to the result. |
673 | {{CDM::CLibraryMaybeHardened, {"memchr" }}, |
674 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
675 | {{CDM::CLibraryMaybeHardened, {"memrchr" }}, |
676 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
677 | {{CDM::CLibrary, {"rawmemchr" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
678 | {{CDM::CLibraryMaybeHardened, {"strchr" }}, |
679 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
680 | {{CDM::CLibraryMaybeHardened, {"strrchr" }}, |
681 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
682 | {{CDM::CLibraryMaybeHardened, {"strchrnul" }}, |
683 | TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
684 | {{CDM::CLibrary, {"index" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
685 | {{CDM::CLibrary, {"rindex" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
686 | |
687 | // FIXME: In case of arrays, only the first element of the array gets |
688 | // tainted. |
689 | {{CDM::CLibrary, {"qsort" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{0}})}, |
690 | {{CDM::CLibrary, {"qsort_r" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{0}})}, |
691 | |
692 | {{CDM::CLibrary, {"strcmp" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
693 | {{CDM::CLibrary, {"strcasecmp" }}, |
694 | TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
695 | {{CDM::CLibrary, {"strncmp" }}, |
696 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{ReturnValueIndex}})}, |
697 | {{CDM::CLibrary, {"strncasecmp" }}, |
698 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{ReturnValueIndex}})}, |
699 | {{CDM::CLibrary, {"strspn" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
700 | {{CDM::CLibrary, {"strcspn" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
701 | {{CDM::CLibrary, {"strpbrk" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
702 | |
703 | {{CDM::CLibrary, {"strndup" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
704 | {{CDM::CLibrary, {"strndupa" }}, TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{ReturnValueIndex}})}, |
705 | {{CDM::CLibrary, {"strdup" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
706 | {{CDM::CLibrary, {"strdupa" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
707 | {{CDM::CLibrary, {"wcsdup" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
708 | |
709 | // strlen, wcslen, strnlen and alike intentionally don't propagate taint. |
710 | // See the details here: https://github.com/llvm/llvm-project/pull/66086 |
711 | |
712 | {{CDM::CLibrary, {"strtol" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
713 | {{CDM::CLibrary, {"strtoll" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
714 | {{CDM::CLibrary, {"strtoul" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
715 | {{CDM::CLibrary, {"strtoull" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{1, ReturnValueIndex}})}, |
716 | |
717 | {{CDM::CLibrary, {"tolower" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
718 | {{CDM::CLibrary, {"toupper" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
719 | |
720 | {{CDM::CLibrary, {"isalnum" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
721 | {{CDM::CLibrary, {"isalpha" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
722 | {{CDM::CLibrary, {"isascii" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
723 | {{CDM::CLibrary, {"isblank" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
724 | {{CDM::CLibrary, {"iscntrl" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
725 | {{CDM::CLibrary, {"isdigit" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
726 | {{CDM::CLibrary, {"isgraph" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
727 | {{CDM::CLibrary, {"islower" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
728 | {{CDM::CLibrary, {"isprint" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
729 | {{CDM::CLibrary, {"ispunct" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
730 | {{CDM::CLibrary, {"isspace" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
731 | {{CDM::CLibrary, {"isupper" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
732 | {{CDM::CLibrary, {"isxdigit" }}, TR::Prop(SrcArgs: {{0}}, DstArgs: {{ReturnValueIndex}})}, |
733 | |
734 | {{CDM::CLibraryMaybeHardened, {"strcpy" }}, |
735 | TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
736 | {{CDM::CLibraryMaybeHardened, {"stpcpy" }}, |
737 | TR::Prop(SrcArgs: {{1}}, DstArgs: {{0, ReturnValueIndex}})}, |
738 | {{CDM::CLibraryMaybeHardened, {"strcat" }}, |
739 | TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{0, ReturnValueIndex}})}, |
740 | {{CDM::CLibraryMaybeHardened, {"wcsncat" }}, |
741 | TR::Prop(SrcArgs: {{0, 1}}, DstArgs: {{0, ReturnValueIndex}})}, |
742 | {{CDM::CLibraryMaybeHardened, {"strncpy" }}, |
743 | TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
744 | {{CDM::CLibraryMaybeHardened, {"strncat" }}, |
745 | TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{0, ReturnValueIndex}})}, |
746 | {{CDM::CLibraryMaybeHardened, {"strlcpy" }}, TR::Prop(SrcArgs: {{1, 2}}, DstArgs: {{0}})}, |
747 | {{CDM::CLibraryMaybeHardened, {"strlcat" }}, TR::Prop(SrcArgs: {{0, 1, 2}}, DstArgs: {{0}})}, |
748 | |
749 | // Usually the matching mode `CDM::CLibraryMaybeHardened` is sufficient |
750 | // for unified handling of a function `FOO()` and its hardened variant |
751 | // `__FOO_chk()`, but in the "sprintf" family the extra parameters of the |
752 | // hardened variants are inserted into the middle of the parameter list, |
753 | // so that would not work in their case. |
754 | // int snprintf(char * str, size_t maxlen, const char * format, ...); |
755 | {{CDM::CLibrary, {"snprintf" }}, |
756 | TR::Prop(SrcArgs: {{1, 2}, 3}, DstArgs: {{0, ReturnValueIndex}})}, |
757 | // int sprintf(char * str, const char * format, ...); |
758 | {{CDM::CLibrary, {"sprintf" }}, |
759 | TR::Prop(SrcArgs: {{1}, 2}, DstArgs: {{0, ReturnValueIndex}})}, |
760 | // int __snprintf_chk(char * str, size_t maxlen, int flag, size_t strlen, |
761 | // const char * format, ...); |
762 | {{CDM::CLibrary, {"__snprintf_chk" }}, |
763 | TR::Prop(SrcArgs: {{1, 4}, 5}, DstArgs: {{0, ReturnValueIndex}})}, |
764 | // int __sprintf_chk(char * str, int flag, size_t strlen, const char * |
765 | // format, ...); |
766 | {{CDM::CLibrary, {"__sprintf_chk" }}, |
767 | TR::Prop(SrcArgs: {{3}, 4}, DstArgs: {{0, ReturnValueIndex}})}, |
768 | |
769 | // Sinks |
770 | {{CDM::CLibrary, {"system" }}, TR::Sink(SinkArgs: {{0}}, Msg: MsgSanitizeSystemArgs)}, |
771 | {{CDM::CLibrary, {"popen" }}, TR::Sink(SinkArgs: {{0}}, Msg: MsgSanitizeSystemArgs)}, |
772 | {{CDM::CLibrary, {"execl" }}, TR::Sink(SinkArgs: {{}, {0}}, Msg: MsgSanitizeSystemArgs)}, |
773 | {{CDM::CLibrary, {"execle" }}, TR::Sink(SinkArgs: {{}, {0}}, Msg: MsgSanitizeSystemArgs)}, |
774 | {{CDM::CLibrary, {"execlp" }}, TR::Sink(SinkArgs: {{}, {0}}, Msg: MsgSanitizeSystemArgs)}, |
775 | {{CDM::CLibrary, {"execv" }}, TR::Sink(SinkArgs: {{0, 1}}, Msg: MsgSanitizeSystemArgs)}, |
776 | {{CDM::CLibrary, {"execve" }}, |
777 | TR::Sink(SinkArgs: {{0, 1, 2}}, Msg: MsgSanitizeSystemArgs)}, |
778 | {{CDM::CLibrary, {"fexecve" }}, |
779 | TR::Sink(SinkArgs: {{0, 1, 2}}, Msg: MsgSanitizeSystemArgs)}, |
780 | {{CDM::CLibrary, {"execvp" }}, TR::Sink(SinkArgs: {{0, 1}}, Msg: MsgSanitizeSystemArgs)}, |
781 | {{CDM::CLibrary, {"execvpe" }}, |
782 | TR::Sink(SinkArgs: {{0, 1, 2}}, Msg: MsgSanitizeSystemArgs)}, |
783 | {{CDM::CLibrary, {"dlopen" }}, TR::Sink(SinkArgs: {{0}}, Msg: MsgSanitizeSystemArgs)}, |
784 | |
785 | // malloc, calloc, alloca, realloc, memccpy |
786 | // are intentionally not marked as taint sinks because unconditional |
787 | // reporting for these functions generates many false positives. |
788 | // These taint sinks should be implemented in other checkers with more |
789 | // sophisticated sanitation heuristics. |
790 | |
791 | {{CDM::CLibrary, {"setproctitle" }}, |
792 | TR::Sink(SinkArgs: {{0}, 1}, Msg: MsgUncontrolledFormatString)}, |
793 | {{CDM::CLibrary, {"setproctitle_fast" }}, |
794 | TR::Sink(SinkArgs: {{0}, 1}, Msg: MsgUncontrolledFormatString)}}; |
795 | |
796 | if (TR::UntrustedEnv(C)) { |
797 | // void setproctitle_init(int argc, char *argv[], char *envp[]) |
798 | // TODO: replace `MsgCustomSink` with a message that fits this situation. |
799 | GlobalCRules.push_back(x: {{CDM::CLibrary, {"setproctitle_init" }}, |
800 | TR::Sink(SinkArgs: {{1, 2}}, Msg: MsgCustomSink)}); |
801 | |
802 | // `getenv` returns taint only in untrusted environments. |
803 | GlobalCRules.push_back( |
804 | x: {{CDM::CLibrary, {"getenv" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}); |
805 | } |
806 | |
807 | StaticTaintRules.emplace(args: std::make_move_iterator(i: GlobalCRules.begin()), |
808 | args: std::make_move_iterator(i: GlobalCRules.end())); |
809 | |
810 | // User-provided taint configuration. |
811 | CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); |
812 | assert(Mgr); |
813 | GenericTaintRuleParser ConfigParser{*Mgr}; |
814 | std::string Option{"Config" }; |
815 | StringRef ConfigFile = |
816 | Mgr->getAnalyzerOptions().getCheckerStringOption(C: this, OptionName: Option); |
817 | std::optional<TaintConfiguration> Config = |
818 | getConfiguration<TaintConfiguration>(Mgr&: *Mgr, Chk: this, Option, ConfigFile); |
819 | if (!Config) { |
820 | // We don't have external taint config, no parsing required. |
821 | DynamicTaintRules = RuleLookupTy{}; |
822 | return; |
823 | } |
824 | |
825 | GenericTaintRuleParser::RulesContTy Rules{ |
826 | ConfigParser.parseConfiguration(Option, Config: std::move(*Config))}; |
827 | |
828 | DynamicTaintRules.emplace(args: std::make_move_iterator(i: Rules.begin()), |
829 | args: std::make_move_iterator(i: Rules.end())); |
830 | } |
831 | |
832 | void GenericTaintChecker::checkPreCall(const CallEvent &Call, |
833 | CheckerContext &C) const { |
834 | initTaintRules(C); |
835 | |
836 | // FIXME: this should be much simpler. |
837 | if (const auto *Rule = |
838 | Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) |
839 | Rule->process(Checker: *this, Call, C); |
840 | else if (const auto *Rule = DynamicTaintRules->lookup(Call)) |
841 | Rule->process(Checker: *this, Call, C); |
842 | |
843 | // FIXME: These edge cases are to be eliminated from here eventually. |
844 | // |
845 | // Additional check that is not supported by CallDescription. |
846 | // TODO: Make CallDescription be able to match attributes such as printf-like |
847 | // arguments. |
848 | checkUncontrolledFormatString(Call, C); |
849 | |
850 | // TODO: Modeling sockets should be done in a specific checker. |
851 | // Socket is a source, which taints the return value. |
852 | taintUnsafeSocketProtocol(Call, C); |
853 | } |
854 | |
855 | void GenericTaintChecker::checkPostCall(const CallEvent &Call, |
856 | CheckerContext &C) const { |
857 | // Set the marked values as tainted. The return value only accessible from |
858 | // checkPostStmt. |
859 | ProgramStateRef State = C.getState(); |
860 | const StackFrameContext *CurrentFrame = C.getStackFrame(); |
861 | |
862 | // Depending on what was tainted at pre-visit, we determined a set of |
863 | // arguments which should be tainted after the function returns. These are |
864 | // stored in the state as TaintArgsOnPostVisit set. |
865 | TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); |
866 | |
867 | const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(K: CurrentFrame); |
868 | if (!TaintArgs) |
869 | return; |
870 | assert(!TaintArgs->isEmpty()); |
871 | |
872 | LLVM_DEBUG(for (ArgIdxTy I |
873 | : *TaintArgs) { |
874 | llvm::dbgs() << "PostCall<" ; |
875 | Call.dump(llvm::dbgs()); |
876 | llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; |
877 | }); |
878 | |
879 | const NoteTag *InjectionTag = nullptr; |
880 | std::vector<SymbolRef> TaintedSymbols; |
881 | std::vector<ArgIdxTy> TaintedIndexes; |
882 | for (ArgIdxTy ArgNum : *TaintArgs) { |
883 | // Special handling for the tainted return value. |
884 | if (ArgNum == ReturnValueIndex) { |
885 | State = addTaint(State, V: Call.getReturnValue()); |
886 | std::vector<SymbolRef> TaintedSyms = |
887 | getTaintedSymbols(State, V: Call.getReturnValue()); |
888 | if (!TaintedSyms.empty()) { |
889 | TaintedSymbols.push_back(x: TaintedSyms[0]); |
890 | TaintedIndexes.push_back(x: ArgNum); |
891 | } |
892 | continue; |
893 | } |
894 | // The arguments are pointer arguments. The data they are pointing at is |
895 | // tainted after the call. |
896 | if (auto V = getPointeeOf(State, Arg: Call.getArgSVal(Index: ArgNum))) { |
897 | State = addTaint(State, V: *V); |
898 | std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, V: *V); |
899 | if (!TaintedSyms.empty()) { |
900 | TaintedSymbols.push_back(x: TaintedSyms[0]); |
901 | TaintedIndexes.push_back(x: ArgNum); |
902 | } |
903 | } |
904 | } |
905 | // Create a NoteTag callback, which prints to the user where the taintedness |
906 | // was propagated to. |
907 | InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedArgs: TaintedIndexes, |
908 | CallLocation: Call.getCalleeStackFrame(BlockCount: 0)); |
909 | // Clear up the taint info from the state. |
910 | State = State->remove<TaintArgsOnPostVisit>(K: CurrentFrame); |
911 | C.addTransition(State, Tag: InjectionTag); |
912 | } |
913 | |
914 | void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, |
915 | const char *NL, const char *Sep) const { |
916 | printTaint(State, Out, nl: NL, sep: Sep); |
917 | } |
918 | |
919 | void GenericTaintRule::process(const GenericTaintChecker &Checker, |
920 | const CallEvent &Call, CheckerContext &C) const { |
921 | ProgramStateRef State = C.getState(); |
922 | const ArgIdxTy CallNumArgs = fromArgumentCount(Count: Call.getNumArgs()); |
923 | |
924 | /// Iterate every call argument, and get their corresponding Expr and SVal. |
925 | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { |
926 | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { |
927 | const Expr *E = GetArgExpr(ArgIdx: I, Call); |
928 | Fun(I, E, C.getSVal(S: E)); |
929 | } |
930 | }; |
931 | |
932 | /// Check for taint sinks. |
933 | ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { |
934 | // Add taintedness to stdin parameters |
935 | if (isStdin(Val: C.getSVal(S: E), ACtx: C.getASTContext())) { |
936 | State = addTaint(State, V: C.getSVal(S: E)); |
937 | } |
938 | if (SinkArgs.contains(ArgIdx: I) && isTaintedOrPointsToTainted(State, ExprSVal: C.getSVal(S: E))) |
939 | Checker.generateReportIfTainted(E, Msg: SinkMsg.value_or(u: MsgCustomSink), C); |
940 | }); |
941 | |
942 | /// Check for taint filters. |
943 | ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) { |
944 | if (FilterArgs.contains(ArgIdx: I)) { |
945 | State = removeTaint(State, V: S); |
946 | if (auto P = getPointeeOf(State, Arg: S)) |
947 | State = removeTaint(State, V: *P); |
948 | } |
949 | }); |
950 | |
951 | /// Check for taint propagation sources. |
952 | /// A rule will make the destination variables tainted if PropSrcArgs |
953 | /// is empty (taints the destination |
954 | /// arguments unconditionally), or if any of its signified |
955 | /// args are tainted in context of the current CallEvent. |
956 | bool IsMatching = PropSrcArgs.isEmpty(); |
957 | std::vector<SymbolRef> TaintedSymbols; |
958 | std::vector<ArgIdxTy> TaintedIndexes; |
959 | ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols, |
960 | &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) { |
961 | std::optional<SVal> TaintedSVal = |
962 | getTaintedPointeeOrPointer(State, Arg: C.getSVal(S: E)); |
963 | IsMatching = |
964 | IsMatching || (PropSrcArgs.contains(ArgIdx: I) && TaintedSVal.has_value()); |
965 | |
966 | // We track back tainted arguments except for stdin |
967 | if (TaintedSVal && !isStdin(Val: *TaintedSVal, ACtx: C.getASTContext())) { |
968 | std::vector<SymbolRef> TaintedArgSyms = |
969 | getTaintedSymbols(State, V: *TaintedSVal); |
970 | if (!TaintedArgSyms.empty()) { |
971 | llvm::append_range(C&: TaintedSymbols, R&: TaintedArgSyms); |
972 | TaintedIndexes.push_back(x: I); |
973 | } |
974 | } |
975 | }); |
976 | |
977 | // Early return for propagation rules which dont match. |
978 | // Matching propagations, Sinks and Filters will pass this point. |
979 | if (!IsMatching) |
980 | return; |
981 | |
982 | const auto WouldEscape = [](SVal V, QualType Ty) -> bool { |
983 | if (!isa<Loc>(Val: V)) |
984 | return false; |
985 | |
986 | const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); |
987 | const bool IsNonConstPtr = |
988 | Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); |
989 | |
990 | return IsNonConstRef || IsNonConstPtr; |
991 | }; |
992 | |
993 | /// Propagate taint where it is necessary. |
994 | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
995 | ImmutableSet<ArgIdxTy> Result = F.getEmptySet(); |
996 | ForEachCallArg( |
997 | [&](ArgIdxTy I, const Expr *E, SVal V) { |
998 | if (PropDstArgs.contains(ArgIdx: I)) { |
999 | LLVM_DEBUG(llvm::dbgs() << "PreCall<" ; Call.dump(llvm::dbgs()); |
1000 | llvm::dbgs() |
1001 | << "> prepares tainting arg index: " << I << '\n';); |
1002 | Result = F.add(Old: Result, V: I); |
1003 | } |
1004 | |
1005 | // Taint property gets lost if the variable is passed as a |
1006 | // non-const pointer or reference to a function which is |
1007 | // not inlined. For matching rules we want to preserve the taintedness. |
1008 | // TODO: We should traverse all reachable memory regions via the |
1009 | // escaping parameter. Instead of doing that we simply mark only the |
1010 | // referred memory region as tainted. |
1011 | if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, Arg: V)) { |
1012 | LLVM_DEBUG(if (!Result.contains(I)) { |
1013 | llvm::dbgs() << "PreCall<" ; |
1014 | Call.dump(llvm::dbgs()); |
1015 | llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; |
1016 | }); |
1017 | Result = F.add(Old: Result, V: I); |
1018 | } |
1019 | }); |
1020 | |
1021 | if (!Result.isEmpty()) |
1022 | State = State->set<TaintArgsOnPostVisit>(K: C.getStackFrame(), E: Result); |
1023 | const NoteTag *InjectionTag = taintOriginTrackerTag( |
1024 | C, TaintedSymbols: std::move(TaintedSymbols), TaintedArgs: std::move(TaintedIndexes), |
1025 | CallLocation: Call.getCalleeStackFrame(BlockCount: 0)); |
1026 | C.addTransition(State, Tag: InjectionTag); |
1027 | } |
1028 | |
1029 | bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { |
1030 | return !C.getAnalysisManager() |
1031 | .getAnalyzerOptions() |
1032 | .ShouldAssumeControlledEnvironment; |
1033 | } |
1034 | |
1035 | bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, |
1036 | CheckerContext &C) const { |
1037 | assert(E); |
1038 | if (!isTaintReporterCheckerEnabled) |
1039 | return false; |
1040 | std::optional<SVal> TaintedSVal = |
1041 | getTaintedPointeeOrPointer(State: C.getState(), Arg: C.getSVal(S: E)); |
1042 | |
1043 | if (!TaintedSVal) |
1044 | return false; |
1045 | |
1046 | // Generate diagnostic. |
1047 | assert(BT); |
1048 | static CheckerProgramPointTag Tag(BT->getCheckerName(), Msg); |
1049 | if (ExplodedNode *N = C.generateNonFatalErrorNode(State: C.getState(), Tag: &Tag)) { |
1050 | auto report = std::make_unique<PathSensitiveBugReport>(args: *BT, args&: Msg, args&: N); |
1051 | report->addRange(R: E->getSourceRange()); |
1052 | for (auto TaintedSym : getTaintedSymbols(State: C.getState(), V: *TaintedSVal)) { |
1053 | report->markInteresting(sym: TaintedSym); |
1054 | } |
1055 | C.emitReport(R: std::move(report)); |
1056 | return true; |
1057 | } |
1058 | return false; |
1059 | } |
1060 | |
1061 | /// TODO: remove checking for printf format attributes and socket whitelisting |
1062 | /// from GenericTaintChecker, and that means the following functions: |
1063 | /// getPrintfFormatArgumentNum, |
1064 | /// GenericTaintChecker::checkUncontrolledFormatString, |
1065 | /// GenericTaintChecker::taintUnsafeSocketProtocol |
1066 | |
1067 | static bool getPrintfFormatArgumentNum(const CallEvent &Call, |
1068 | const CheckerContext &C, |
1069 | ArgIdxTy &ArgNum) { |
1070 | // Find if the function contains a format string argument. |
1071 | // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, |
1072 | // vsnprintf, syslog, custom annotated functions. |
1073 | const Decl *CallDecl = Call.getDecl(); |
1074 | if (!CallDecl) |
1075 | return false; |
1076 | const FunctionDecl *FDecl = CallDecl->getAsFunction(); |
1077 | if (!FDecl) |
1078 | return false; |
1079 | |
1080 | const ArgIdxTy CallNumArgs = fromArgumentCount(Count: Call.getNumArgs()); |
1081 | |
1082 | for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { |
1083 | ArgNum = Format->getFormatIdx() - 1; |
1084 | if ((Format->getType()->getName() == "printf" ) && CallNumArgs > ArgNum) |
1085 | return true; |
1086 | } |
1087 | |
1088 | return false; |
1089 | } |
1090 | |
1091 | bool GenericTaintChecker::checkUncontrolledFormatString( |
1092 | const CallEvent &Call, CheckerContext &C) const { |
1093 | // Check if the function contains a format string argument. |
1094 | ArgIdxTy ArgNum = 0; |
1095 | if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) |
1096 | return false; |
1097 | |
1098 | // If either the format string content or the pointer itself are tainted, |
1099 | // warn. |
1100 | return generateReportIfTainted(E: Call.getArgExpr(Index: ArgNum), |
1101 | Msg: MsgUncontrolledFormatString, C); |
1102 | } |
1103 | |
1104 | void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, |
1105 | CheckerContext &C) const { |
1106 | if (Call.getNumArgs() < 1) |
1107 | return; |
1108 | const IdentifierInfo *ID = Call.getCalleeIdentifier(); |
1109 | if (!ID) |
1110 | return; |
1111 | if (ID->getName() != "socket" ) |
1112 | return; |
1113 | |
1114 | SourceLocation DomLoc = Call.getArgExpr(Index: 0)->getExprLoc(); |
1115 | StringRef DomName = C.getMacroNameOrSpelling(Loc&: DomLoc); |
1116 | // Allow internal communication protocols. |
1117 | bool SafeProtocol = DomName == "AF_SYSTEM" || DomName == "AF_LOCAL" || |
1118 | DomName == "AF_UNIX" || DomName == "AF_RESERVED_36" ; |
1119 | if (SafeProtocol) |
1120 | return; |
1121 | |
1122 | ProgramStateRef State = C.getState(); |
1123 | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
1124 | ImmutableSet<ArgIdxTy> Result = F.add(Old: F.getEmptySet(), V: ReturnValueIndex); |
1125 | State = State->set<TaintArgsOnPostVisit>(K: C.getStackFrame(), E: Result); |
1126 | C.addTransition(State); |
1127 | } |
1128 | |
1129 | /// Checker registration |
1130 | void ento::registerTaintPropagationChecker(CheckerManager &Mgr) { |
1131 | Mgr.registerChecker<GenericTaintChecker>(); |
1132 | } |
1133 | |
1134 | bool ento::shouldRegisterTaintPropagationChecker(const CheckerManager &mgr) { |
1135 | return true; |
1136 | } |
1137 | |
1138 | void ento::registerGenericTaintChecker(CheckerManager &Mgr) { |
1139 | GenericTaintChecker *checker = Mgr.getChecker<GenericTaintChecker>(); |
1140 | checker->isTaintReporterCheckerEnabled = true; |
1141 | checker->BT.emplace(args: Mgr.getCurrentCheckerName(), args: "Use of Untrusted Data" , |
1142 | args: categories::TaintedData); |
1143 | } |
1144 | |
1145 | bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { |
1146 | return true; |
1147 | } |
1148 | |