1 | //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Defines basic, non-domain-specific mechanisms for tracking tainted values. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
14 | #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" |
15 | #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" |
16 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
17 | #include <optional> |
18 | |
19 | using namespace clang; |
20 | using namespace ento; |
21 | using namespace taint; |
22 | |
23 | // Fully tainted symbols. |
24 | REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) |
25 | |
26 | // Partially tainted symbols. |
27 | REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, |
28 | TaintTagType) |
29 | REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) |
30 | |
31 | void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, |
32 | const char *Sep) { |
33 | TaintMapTy TM = State->get<TaintMap>(); |
34 | |
35 | if (!TM.isEmpty()) |
36 | Out << "Tainted symbols:" << NL; |
37 | |
38 | for (const auto &I : TM) |
39 | Out << I.first << " : " << I.second << NL; |
40 | } |
41 | |
42 | void taint::dumpTaint(ProgramStateRef State) { |
43 | printTaint(State, Out&: llvm::errs()); |
44 | } |
45 | |
46 | ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, |
47 | const LocationContext *LCtx, |
48 | TaintTagType Kind) { |
49 | return addTaint(State, V: State->getSVal(Ex: S, LCtx), Kind); |
50 | } |
51 | |
52 | ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, |
53 | TaintTagType Kind) { |
54 | SymbolRef Sym = V.getAsSymbol(); |
55 | if (Sym) |
56 | return addTaint(State, Sym, Kind); |
57 | |
58 | // If the SVal represents a structure, try to mass-taint all values within the |
59 | // structure. For now it only works efficiently on lazy compound values that |
60 | // were conjured during a conservative evaluation of a function - either as |
61 | // return values of functions that return structures or arrays by value, or as |
62 | // values of structures or arrays passed into the function by reference, |
63 | // directly or through pointer aliasing. Such lazy compound values are |
64 | // characterized by having exactly one binding in their captured store within |
65 | // their parent region, which is a conjured symbol default-bound to the base |
66 | // region of the parent region. |
67 | if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { |
68 | if (std::optional<SVal> binding = |
69 | State->getStateManager().getStoreManager().getDefaultBinding( |
70 | lcv: *LCV)) { |
71 | if (SymbolRef Sym = binding->getAsSymbol()) |
72 | return addPartialTaint(State, ParentSym: Sym, SubRegion: LCV->getRegion(), Kind); |
73 | } |
74 | } |
75 | |
76 | const MemRegion *R = V.getAsRegion(); |
77 | return addTaint(State, R, Kind); |
78 | } |
79 | |
80 | ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, |
81 | TaintTagType Kind) { |
82 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(Val: R)) |
83 | return addTaint(State, Sym: SR->getSymbol(), Kind); |
84 | return State; |
85 | } |
86 | |
87 | ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, |
88 | TaintTagType Kind) { |
89 | // If this is a symbol cast, remove the cast before adding the taint. Taint |
90 | // is cast agnostic. |
91 | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Val: Sym)) |
92 | Sym = SC->getOperand(); |
93 | |
94 | ProgramStateRef NewState = State->set<TaintMap>(K: Sym, E: Kind); |
95 | assert(NewState); |
96 | return NewState; |
97 | } |
98 | |
99 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { |
100 | SymbolRef Sym = V.getAsSymbol(); |
101 | if (Sym) |
102 | return removeTaint(State, Sym); |
103 | |
104 | const MemRegion *R = V.getAsRegion(); |
105 | return removeTaint(State, R); |
106 | } |
107 | |
108 | ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { |
109 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(Val: R)) |
110 | return removeTaint(State, Sym: SR->getSymbol()); |
111 | return State; |
112 | } |
113 | |
114 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { |
115 | // If this is a symbol cast, remove the cast before adding the taint. Taint |
116 | // is cast agnostic. |
117 | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Val: Sym)) |
118 | Sym = SC->getOperand(); |
119 | |
120 | ProgramStateRef NewState = State->remove<TaintMap>(K: Sym); |
121 | assert(NewState); |
122 | return NewState; |
123 | } |
124 | |
125 | ProgramStateRef taint::addPartialTaint(ProgramStateRef State, |
126 | SymbolRef ParentSym, |
127 | const SubRegion *SubRegion, |
128 | TaintTagType Kind) { |
129 | // Ignore partial taint if the entire parent symbol is already tainted. |
130 | if (const TaintTagType *T = State->get<TaintMap>(key: ParentSym)) |
131 | if (*T == Kind) |
132 | return State; |
133 | |
134 | // Partial taint applies if only a portion of the symbol is tainted. |
135 | if (SubRegion == SubRegion->getBaseRegion()) |
136 | return addTaint(State, Sym: ParentSym, Kind); |
137 | |
138 | const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(key: ParentSym); |
139 | TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); |
140 | TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); |
141 | |
142 | Regs = F.add(Old: Regs, K: SubRegion, D: Kind); |
143 | ProgramStateRef NewState = State->set<DerivedSymTaint>(K: ParentSym, E: Regs); |
144 | assert(NewState); |
145 | return NewState; |
146 | } |
147 | |
148 | bool taint::isTainted(ProgramStateRef State, const Stmt *S, |
149 | const LocationContext *LCtx, TaintTagType Kind) { |
150 | return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
151 | .empty(); |
152 | } |
153 | |
154 | bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { |
155 | return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
156 | .empty(); |
157 | } |
158 | |
159 | bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, |
160 | TaintTagType K) { |
161 | return !getTaintedSymbolsImpl(State, Reg, Kind: K, /*ReturnFirstOnly=*/returnFirstOnly: true) |
162 | .empty(); |
163 | } |
164 | |
165 | bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { |
166 | return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
167 | .empty(); |
168 | } |
169 | |
170 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
171 | const Stmt *S, |
172 | const LocationContext *LCtx, |
173 | TaintTagType Kind) { |
174 | return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
175 | } |
176 | |
177 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V, |
178 | TaintTagType Kind) { |
179 | return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
180 | } |
181 | |
182 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
183 | SymbolRef Sym, |
184 | TaintTagType Kind) { |
185 | return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
186 | } |
187 | |
188 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
189 | const MemRegion *Reg, |
190 | TaintTagType Kind) { |
191 | return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
192 | } |
193 | |
194 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
195 | const Stmt *S, |
196 | const LocationContext *LCtx, |
197 | TaintTagType Kind, |
198 | bool returnFirstOnly) { |
199 | SVal val = State->getSVal(Ex: S, LCtx); |
200 | return getTaintedSymbolsImpl(State, V: val, Kind, returnFirstOnly); |
201 | } |
202 | |
203 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
204 | SVal V, TaintTagType Kind, |
205 | bool returnFirstOnly) { |
206 | if (SymbolRef Sym = V.getAsSymbol()) |
207 | return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly); |
208 | if (const MemRegion *Reg = V.getAsRegion()) |
209 | return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly); |
210 | return {}; |
211 | } |
212 | |
213 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
214 | const MemRegion *Reg, |
215 | TaintTagType K, |
216 | bool returnFirstOnly) { |
217 | std::vector<SymbolRef> TaintedSymbols; |
218 | if (!Reg) |
219 | return TaintedSymbols; |
220 | |
221 | // Element region (array element) is tainted if the offset is tainted. |
222 | if (const ElementRegion *ER = dyn_cast<ElementRegion>(Val: Reg)) { |
223 | std::vector<SymbolRef> TaintedIndex = |
224 | getTaintedSymbolsImpl(State, V: ER->getIndex(), Kind: K, returnFirstOnly); |
225 | llvm::append_range(C&: TaintedSymbols, R&: TaintedIndex); |
226 | if (returnFirstOnly && !TaintedSymbols.empty()) |
227 | return TaintedSymbols; // return early if needed |
228 | } |
229 | |
230 | // Symbolic region is tainted if the corresponding symbol is tainted. |
231 | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Val: Reg)) { |
232 | std::vector<SymbolRef> TaintedRegions = |
233 | getTaintedSymbolsImpl(State, Sym: SR->getSymbol(), Kind: K, returnFirstOnly); |
234 | llvm::append_range(C&: TaintedSymbols, R&: TaintedRegions); |
235 | if (returnFirstOnly && !TaintedSymbols.empty()) |
236 | return TaintedSymbols; // return early if needed |
237 | } |
238 | |
239 | // Any subregion (including Element and Symbolic regions) is tainted if its |
240 | // super-region is tainted. |
241 | if (const SubRegion *ER = dyn_cast<SubRegion>(Val: Reg)) { |
242 | std::vector<SymbolRef> TaintedSubRegions = |
243 | getTaintedSymbolsImpl(State, Reg: ER->getSuperRegion(), K, returnFirstOnly); |
244 | llvm::append_range(C&: TaintedSymbols, R&: TaintedSubRegions); |
245 | if (returnFirstOnly && !TaintedSymbols.empty()) |
246 | return TaintedSymbols; // return early if needed |
247 | } |
248 | |
249 | return TaintedSymbols; |
250 | } |
251 | |
252 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
253 | SymbolRef Sym, |
254 | TaintTagType Kind, |
255 | bool returnFirstOnly) { |
256 | std::vector<SymbolRef> TaintedSymbols; |
257 | if (!Sym) |
258 | return TaintedSymbols; |
259 | |
260 | // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570 |
261 | if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions(); |
262 | Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) { |
263 | return {}; |
264 | } |
265 | |
266 | // Traverse all the symbols this symbol depends on to see if any are tainted. |
267 | for (SymbolRef SubSym : Sym->symbols()) { |
268 | if (!isa<SymbolData>(Val: SubSym)) |
269 | continue; |
270 | |
271 | if (const TaintTagType *Tag = State->get<TaintMap>(key: SubSym)) { |
272 | if (*Tag == Kind) { |
273 | TaintedSymbols.push_back(x: SubSym); |
274 | if (returnFirstOnly) |
275 | return TaintedSymbols; // return early if needed |
276 | } |
277 | } |
278 | |
279 | if (const auto *SD = dyn_cast<SymbolDerived>(Val: SubSym)) { |
280 | // If this is a SymbolDerived with a tainted parent, it's also tainted. |
281 | std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl( |
282 | State, Sym: SD->getParentSymbol(), Kind, returnFirstOnly); |
283 | llvm::append_range(C&: TaintedSymbols, R&: TaintedParents); |
284 | if (returnFirstOnly && !TaintedSymbols.empty()) |
285 | return TaintedSymbols; // return early if needed |
286 | |
287 | // If this is a SymbolDerived with the same parent symbol as another |
288 | // tainted SymbolDerived and a region that's a sub-region of that |
289 | // tainted symbol, it's also tainted. |
290 | if (const TaintedSubRegions *Regs = |
291 | State->get<DerivedSymTaint>(key: SD->getParentSymbol())) { |
292 | const TypedValueRegion *R = SD->getRegion(); |
293 | for (auto I : *Regs) { |
294 | // FIXME: The logic to identify tainted regions could be more |
295 | // complete. For example, this would not currently identify |
296 | // overlapping fields in a union as tainted. To identify this we can |
297 | // check for overlapping/nested byte offsets. |
298 | if (Kind == I.second && R->isSubRegionOf(R: I.first)) { |
299 | TaintedSymbols.push_back(x: SD->getParentSymbol()); |
300 | if (returnFirstOnly && !TaintedSymbols.empty()) |
301 | return TaintedSymbols; // return early if needed |
302 | } |
303 | } |
304 | } |
305 | } |
306 | |
307 | // If memory region is tainted, data is also tainted. |
308 | if (const auto *SRV = dyn_cast<SymbolRegionValue>(Val: SubSym)) { |
309 | std::vector<SymbolRef> TaintedRegions = |
310 | getTaintedSymbolsImpl(State, Reg: SRV->getRegion(), K: Kind, returnFirstOnly); |
311 | llvm::append_range(C&: TaintedSymbols, R&: TaintedRegions); |
312 | if (returnFirstOnly && !TaintedSymbols.empty()) |
313 | return TaintedSymbols; // return early if needed |
314 | } |
315 | |
316 | // If this is a SymbolCast from a tainted value, it's also tainted. |
317 | if (const auto *SC = dyn_cast<SymbolCast>(Val: SubSym)) { |
318 | std::vector<SymbolRef> TaintedCasts = |
319 | getTaintedSymbolsImpl(State, Sym: SC->getOperand(), Kind, returnFirstOnly); |
320 | llvm::append_range(C&: TaintedSymbols, R&: TaintedCasts); |
321 | if (returnFirstOnly && !TaintedSymbols.empty()) |
322 | return TaintedSymbols; // return early if needed |
323 | } |
324 | } |
325 | return TaintedSymbols; |
326 | } |
327 | |