| 1 | //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Defines basic, non-domain-specific mechanisms for tracking tainted values. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
| 14 | #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" |
| 15 | #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" |
| 16 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
| 17 | #include <optional> |
| 18 | |
| 19 | using namespace clang; |
| 20 | using namespace ento; |
| 21 | using namespace taint; |
| 22 | |
| 23 | // Fully tainted symbols. |
| 24 | REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) |
| 25 | |
| 26 | // Partially tainted symbols. |
| 27 | REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, |
| 28 | TaintTagType) |
| 29 | REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) |
| 30 | |
| 31 | void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, |
| 32 | const char *Sep) { |
| 33 | TaintMapTy TM = State->get<TaintMap>(); |
| 34 | |
| 35 | if (!TM.isEmpty()) |
| 36 | Out << "Tainted symbols:" << NL; |
| 37 | |
| 38 | for (const auto &I : TM) |
| 39 | Out << I.first << " : " << I.second << NL; |
| 40 | } |
| 41 | |
| 42 | void taint::dumpTaint(ProgramStateRef State) { |
| 43 | printTaint(State, Out&: llvm::errs()); |
| 44 | } |
| 45 | |
| 46 | ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, |
| 47 | const LocationContext *LCtx, |
| 48 | TaintTagType Kind) { |
| 49 | return addTaint(State, V: State->getSVal(Ex: S, LCtx), Kind); |
| 50 | } |
| 51 | |
| 52 | ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, |
| 53 | TaintTagType Kind) { |
| 54 | SymbolRef Sym = V.getAsSymbol(); |
| 55 | if (Sym) |
| 56 | return addTaint(State, Sym, Kind); |
| 57 | |
| 58 | // If the SVal represents a structure, try to mass-taint all values within the |
| 59 | // structure. For now it only works efficiently on lazy compound values that |
| 60 | // were conjured during a conservative evaluation of a function - either as |
| 61 | // return values of functions that return structures or arrays by value, or as |
| 62 | // values of structures or arrays passed into the function by reference, |
| 63 | // directly or through pointer aliasing. Such lazy compound values are |
| 64 | // characterized by having exactly one binding in their captured store within |
| 65 | // their parent region, which is a conjured symbol default-bound to the base |
| 66 | // region of the parent region. |
| 67 | if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { |
| 68 | if (std::optional<SVal> binding = |
| 69 | State->getStateManager().getStoreManager().getDefaultBinding( |
| 70 | lcv: *LCV)) { |
| 71 | if (SymbolRef Sym = binding->getAsSymbol()) |
| 72 | return addPartialTaint(State, ParentSym: Sym, SubRegion: LCV->getRegion(), Kind); |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | const MemRegion *R = V.getAsRegion(); |
| 77 | return addTaint(State, R, Kind); |
| 78 | } |
| 79 | |
| 80 | ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, |
| 81 | TaintTagType Kind) { |
| 82 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(Val: R)) |
| 83 | return addTaint(State, Sym: SR->getSymbol(), Kind); |
| 84 | return State; |
| 85 | } |
| 86 | |
| 87 | ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, |
| 88 | TaintTagType Kind) { |
| 89 | // If this is a symbol cast, remove the cast before adding the taint. Taint |
| 90 | // is cast agnostic. |
| 91 | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Val: Sym)) |
| 92 | Sym = SC->getOperand(); |
| 93 | |
| 94 | ProgramStateRef NewState = State->set<TaintMap>(K: Sym, E: Kind); |
| 95 | assert(NewState); |
| 96 | return NewState; |
| 97 | } |
| 98 | |
| 99 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { |
| 100 | SymbolRef Sym = V.getAsSymbol(); |
| 101 | if (Sym) |
| 102 | return removeTaint(State, Sym); |
| 103 | |
| 104 | const MemRegion *R = V.getAsRegion(); |
| 105 | return removeTaint(State, R); |
| 106 | } |
| 107 | |
| 108 | ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { |
| 109 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(Val: R)) |
| 110 | return removeTaint(State, Sym: SR->getSymbol()); |
| 111 | return State; |
| 112 | } |
| 113 | |
| 114 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { |
| 115 | // If this is a symbol cast, remove the cast before adding the taint. Taint |
| 116 | // is cast agnostic. |
| 117 | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Val: Sym)) |
| 118 | Sym = SC->getOperand(); |
| 119 | |
| 120 | ProgramStateRef NewState = State->remove<TaintMap>(K: Sym); |
| 121 | assert(NewState); |
| 122 | return NewState; |
| 123 | } |
| 124 | |
| 125 | ProgramStateRef taint::addPartialTaint(ProgramStateRef State, |
| 126 | SymbolRef ParentSym, |
| 127 | const SubRegion *SubRegion, |
| 128 | TaintTagType Kind) { |
| 129 | // Ignore partial taint if the entire parent symbol is already tainted. |
| 130 | if (const TaintTagType *T = State->get<TaintMap>(key: ParentSym)) |
| 131 | if (*T == Kind) |
| 132 | return State; |
| 133 | |
| 134 | // Partial taint applies if only a portion of the symbol is tainted. |
| 135 | if (SubRegion == SubRegion->getBaseRegion()) |
| 136 | return addTaint(State, Sym: ParentSym, Kind); |
| 137 | |
| 138 | const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(key: ParentSym); |
| 139 | TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); |
| 140 | TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); |
| 141 | |
| 142 | Regs = F.add(Old: Regs, K: SubRegion, D: Kind); |
| 143 | ProgramStateRef NewState = State->set<DerivedSymTaint>(K: ParentSym, E: Regs); |
| 144 | assert(NewState); |
| 145 | return NewState; |
| 146 | } |
| 147 | |
| 148 | bool taint::isTainted(ProgramStateRef State, const Stmt *S, |
| 149 | const LocationContext *LCtx, TaintTagType Kind) { |
| 150 | return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
| 151 | .empty(); |
| 152 | } |
| 153 | |
| 154 | bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { |
| 155 | return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
| 156 | .empty(); |
| 157 | } |
| 158 | |
| 159 | bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, |
| 160 | TaintTagType K) { |
| 161 | return !getTaintedSymbolsImpl(State, Reg, Kind: K, /*ReturnFirstOnly=*/returnFirstOnly: true) |
| 162 | .empty(); |
| 163 | } |
| 164 | |
| 165 | bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { |
| 166 | return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
| 167 | .empty(); |
| 168 | } |
| 169 | |
| 170 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
| 171 | const Stmt *S, |
| 172 | const LocationContext *LCtx, |
| 173 | TaintTagType Kind) { |
| 174 | return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
| 175 | } |
| 176 | |
| 177 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V, |
| 178 | TaintTagType Kind) { |
| 179 | return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
| 180 | } |
| 181 | |
| 182 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
| 183 | SymbolRef Sym, |
| 184 | TaintTagType Kind) { |
| 185 | return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
| 186 | } |
| 187 | |
| 188 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
| 189 | const MemRegion *Reg, |
| 190 | TaintTagType Kind) { |
| 191 | return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
| 192 | } |
| 193 | |
| 194 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
| 195 | const Stmt *S, |
| 196 | const LocationContext *LCtx, |
| 197 | TaintTagType Kind, |
| 198 | bool returnFirstOnly) { |
| 199 | SVal val = State->getSVal(Ex: S, LCtx); |
| 200 | return getTaintedSymbolsImpl(State, V: val, Kind, returnFirstOnly); |
| 201 | } |
| 202 | |
| 203 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
| 204 | SVal V, TaintTagType Kind, |
| 205 | bool returnFirstOnly) { |
| 206 | if (SymbolRef Sym = V.getAsSymbol()) |
| 207 | return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly); |
| 208 | if (const MemRegion *Reg = V.getAsRegion()) |
| 209 | return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly); |
| 210 | |
| 211 | if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { |
| 212 | StoreManager &StoreMgr = State->getStateManager().getStoreManager(); |
| 213 | if (auto DefaultVal = StoreMgr.getDefaultBinding(lcv: *LCV)) { |
| 214 | return getTaintedSymbolsImpl(State, V: *DefaultVal, Kind, returnFirstOnly); |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | return {}; |
| 219 | } |
| 220 | |
| 221 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
| 222 | const MemRegion *Reg, |
| 223 | TaintTagType K, |
| 224 | bool returnFirstOnly) { |
| 225 | std::vector<SymbolRef> TaintedSymbols; |
| 226 | if (!Reg) |
| 227 | return TaintedSymbols; |
| 228 | |
| 229 | // Element region (array element) is tainted if the offset is tainted. |
| 230 | if (const ElementRegion *ER = dyn_cast<ElementRegion>(Val: Reg)) { |
| 231 | std::vector<SymbolRef> TaintedIndex = |
| 232 | getTaintedSymbolsImpl(State, V: ER->getIndex(), Kind: K, returnFirstOnly); |
| 233 | llvm::append_range(C&: TaintedSymbols, R&: TaintedIndex); |
| 234 | if (returnFirstOnly && !TaintedSymbols.empty()) |
| 235 | return TaintedSymbols; // return early if needed |
| 236 | } |
| 237 | |
| 238 | // Symbolic region is tainted if the corresponding symbol is tainted. |
| 239 | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Val: Reg)) { |
| 240 | std::vector<SymbolRef> TaintedRegions = |
| 241 | getTaintedSymbolsImpl(State, Sym: SR->getSymbol(), Kind: K, returnFirstOnly); |
| 242 | llvm::append_range(C&: TaintedSymbols, R&: TaintedRegions); |
| 243 | if (returnFirstOnly && !TaintedSymbols.empty()) |
| 244 | return TaintedSymbols; // return early if needed |
| 245 | } |
| 246 | |
| 247 | // Any subregion (including Element and Symbolic regions) is tainted if its |
| 248 | // super-region is tainted. |
| 249 | if (const SubRegion *ER = dyn_cast<SubRegion>(Val: Reg)) { |
| 250 | std::vector<SymbolRef> TaintedSubRegions = |
| 251 | getTaintedSymbolsImpl(State, Reg: ER->getSuperRegion(), K, returnFirstOnly); |
| 252 | llvm::append_range(C&: TaintedSymbols, R&: TaintedSubRegions); |
| 253 | if (returnFirstOnly && !TaintedSymbols.empty()) |
| 254 | return TaintedSymbols; // return early if needed |
| 255 | } |
| 256 | |
| 257 | return TaintedSymbols; |
| 258 | } |
| 259 | |
| 260 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
| 261 | SymbolRef Sym, |
| 262 | TaintTagType Kind, |
| 263 | bool returnFirstOnly) { |
| 264 | std::vector<SymbolRef> TaintedSymbols; |
| 265 | if (!Sym) |
| 266 | return TaintedSymbols; |
| 267 | |
| 268 | // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570 |
| 269 | if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions(); |
| 270 | Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) { |
| 271 | return {}; |
| 272 | } |
| 273 | |
| 274 | // Traverse all the symbols this symbol depends on to see if any are tainted. |
| 275 | for (SymbolRef SubSym : Sym->symbols()) { |
| 276 | if (!isa<SymbolData>(Val: SubSym)) |
| 277 | continue; |
| 278 | |
| 279 | if (const TaintTagType *Tag = State->get<TaintMap>(key: SubSym)) { |
| 280 | if (*Tag == Kind) { |
| 281 | TaintedSymbols.push_back(x: SubSym); |
| 282 | if (returnFirstOnly) |
| 283 | return TaintedSymbols; // return early if needed |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | if (const auto *SD = dyn_cast<SymbolDerived>(Val: SubSym)) { |
| 288 | // If this is a SymbolDerived with a tainted parent, it's also tainted. |
| 289 | std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl( |
| 290 | State, Sym: SD->getParentSymbol(), Kind, returnFirstOnly); |
| 291 | llvm::append_range(C&: TaintedSymbols, R&: TaintedParents); |
| 292 | if (returnFirstOnly && !TaintedSymbols.empty()) |
| 293 | return TaintedSymbols; // return early if needed |
| 294 | |
| 295 | // If this is a SymbolDerived with the same parent symbol as another |
| 296 | // tainted SymbolDerived and a region that's a sub-region of that |
| 297 | // tainted symbol, it's also tainted. |
| 298 | if (const TaintedSubRegions *Regs = |
| 299 | State->get<DerivedSymTaint>(key: SD->getParentSymbol())) { |
| 300 | const TypedValueRegion *R = SD->getRegion(); |
| 301 | for (auto I : *Regs) { |
| 302 | // FIXME: The logic to identify tainted regions could be more |
| 303 | // complete. For example, this would not currently identify |
| 304 | // overlapping fields in a union as tainted. To identify this we can |
| 305 | // check for overlapping/nested byte offsets. |
| 306 | if (Kind == I.second && R->isSubRegionOf(R: I.first)) { |
| 307 | TaintedSymbols.push_back(x: SD->getParentSymbol()); |
| 308 | if (returnFirstOnly && !TaintedSymbols.empty()) |
| 309 | return TaintedSymbols; // return early if needed |
| 310 | } |
| 311 | } |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | // If memory region is tainted, data is also tainted. |
| 316 | if (const auto *SRV = dyn_cast<SymbolRegionValue>(Val: SubSym)) { |
| 317 | std::vector<SymbolRef> TaintedRegions = |
| 318 | getTaintedSymbolsImpl(State, Reg: SRV->getRegion(), K: Kind, returnFirstOnly); |
| 319 | llvm::append_range(C&: TaintedSymbols, R&: TaintedRegions); |
| 320 | if (returnFirstOnly && !TaintedSymbols.empty()) |
| 321 | return TaintedSymbols; // return early if needed |
| 322 | } |
| 323 | |
| 324 | // If this is a SymbolCast from a tainted value, it's also tainted. |
| 325 | if (const auto *SC = dyn_cast<SymbolCast>(Val: SubSym)) { |
| 326 | std::vector<SymbolRef> TaintedCasts = |
| 327 | getTaintedSymbolsImpl(State, Sym: SC->getOperand(), Kind, returnFirstOnly); |
| 328 | llvm::append_range(C&: TaintedSymbols, R&: TaintedCasts); |
| 329 | if (returnFirstOnly && !TaintedSymbols.empty()) |
| 330 | return TaintedSymbols; // return early if needed |
| 331 | } |
| 332 | } |
| 333 | return TaintedSymbols; |
| 334 | } |
| 335 | |