| 1 | //===- UnsafeBufferUsageAnalysis.cpp - WPA for UnsafeBufferUsage ----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // UnsafeBufferUsageAnalysis is a noop analysis. |
| 9 | // |
| 10 | // UnsafeBufferUsageAnalysisResult is a map from EntityIds to |
| 11 | // EntityPointerLevelSets |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h" |
| 15 | #include "SSAFAnalysesCommon.h" |
| 16 | #include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" |
| 17 | #include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" |
| 18 | #include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h" |
| 19 | #include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" |
| 20 | #include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" |
| 21 | #include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" |
| 22 | #include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" |
| 23 | #include "llvm/Support/Error.h" |
| 24 | #include "llvm/Support/JSON.h" |
| 25 | #include <memory> |
| 26 | |
| 27 | using namespace clang::ssaf; |
| 28 | using namespace llvm; |
| 29 | |
| 30 | namespace { |
| 31 | |
| 32 | json::Object serializeUnsafeBufferUsageAnalysisResult( |
| 33 | const UnsafeBufferUsageAnalysisResult &R, |
| 34 | JSONFormat::EntityIdToJSONFn IdToJSON) { |
| 35 | json::Object Result; |
| 36 | |
| 37 | Result[UnsafeBufferUsageAnalysisResultName] = |
| 38 | entityPointerLevelMapToJSON(Map: R.UnsafeBuffers, IdToJSON); |
| 39 | return Result; |
| 40 | } |
| 41 | |
| 42 | Expected<std::unique_ptr<AnalysisResult>> |
| 43 | deserializeUnsafeBufferUsageAnalysisResult( |
| 44 | const json::Object &Obj, JSONFormat::EntityIdFromJSONFn IdFromJSON) { |
| 45 | const json::Array *Content = |
| 46 | Obj.getArray(K: UnsafeBufferUsageAnalysisResultName); |
| 47 | |
| 48 | if (!Content) |
| 49 | return makeSawButExpectedError(Saw: Obj, Expected: "an object with a key %s" , |
| 50 | ExpectedArgs: UnsafeBufferUsageAnalysisResultName.data()); |
| 51 | |
| 52 | auto UnsafeBuffers = entityPointerLevelMapFromJSON(Content: *Content, IdFromJSON); |
| 53 | |
| 54 | if (!UnsafeBuffers) |
| 55 | return UnsafeBuffers.takeError(); |
| 56 | |
| 57 | auto Ret = std::make_unique<UnsafeBufferUsageAnalysisResult>(); |
| 58 | |
| 59 | Ret->UnsafeBuffers = std::move(*UnsafeBuffers); |
| 60 | return Ret; |
| 61 | } |
| 62 | |
| 63 | JSONFormat::AnalysisResultRegistry::Add<UnsafeBufferUsageAnalysisResult> |
| 64 | RegisterUnsafeBufferUsageResultForJSON( |
| 65 | serializeUnsafeBufferUsageAnalysisResult, |
| 66 | deserializeUnsafeBufferUsageAnalysisResult); |
| 67 | |
| 68 | class UnsafeBufferUsageAnalysis final |
| 69 | : public SummaryAnalysis<UnsafeBufferUsageAnalysisResult, |
| 70 | UnsafeBufferUsageEntitySummary> { |
| 71 | public: |
| 72 | llvm::Error add(EntityId Id, |
| 73 | const UnsafeBufferUsageEntitySummary &Summary) override { |
| 74 | auto UnsafeBuffersOfEntity = getUnsafeBuffers(Summary); |
| 75 | |
| 76 | getResult().UnsafeBuffers[Id] = EntityPointerLevelSet( |
| 77 | UnsafeBuffersOfEntity.begin(), UnsafeBuffersOfEntity.end()); |
| 78 | return llvm::Error::success(); |
| 79 | } |
| 80 | }; |
| 81 | |
| 82 | AnalysisRegistry::Add<UnsafeBufferUsageAnalysis> |
| 83 | RegisterUnsafeBufferUsageAnalysis( |
| 84 | "Whole-program unsafe buffer usage analysis" ); |
| 85 | |
| 86 | //===----------------------------------------------------------------------===// |
| 87 | // UnsafeBufferReachableAnalysis---computes reachable unsafe buffer nodes |
| 88 | //===----------------------------------------------------------------------===// |
| 89 | |
| 90 | json::Object serializeUnsafeBufferReachableAnalysisResult( |
| 91 | const UnsafeBufferReachableAnalysisResult &R, |
| 92 | JSONFormat::EntityIdToJSONFn IdToJSON) { |
| 93 | json::Object Result; |
| 94 | |
| 95 | Result[UnsafeBufferReachableAnalysisResultName] = |
| 96 | entityPointerLevelMapToJSON(Map: R.Reachables, IdToJSON); |
| 97 | return Result; |
| 98 | } |
| 99 | |
| 100 | Expected<std::unique_ptr<AnalysisResult>> |
| 101 | deserializeUnsafeBufferReachableAnalysisResult( |
| 102 | const json::Object &Obj, JSONFormat::EntityIdFromJSONFn IdFromJSON) { |
| 103 | const json::Array *Content = |
| 104 | Obj.getArray(K: UnsafeBufferReachableAnalysisResultName); |
| 105 | |
| 106 | if (!Content) |
| 107 | return makeSawButExpectedError( |
| 108 | Saw: Obj, Expected: "an object with a key %s" , |
| 109 | ExpectedArgs: UnsafeBufferReachableAnalysisResultName.data()); |
| 110 | |
| 111 | auto Reachables = entityPointerLevelMapFromJSON(Content: *Content, IdFromJSON); |
| 112 | |
| 113 | if (!Reachables) |
| 114 | return Reachables.takeError(); |
| 115 | |
| 116 | auto Ret = std::make_unique<UnsafeBufferReachableAnalysisResult>(); |
| 117 | |
| 118 | Ret->Reachables = std::move(*Reachables); |
| 119 | return Ret; |
| 120 | } |
| 121 | |
| 122 | JSONFormat::AnalysisResultRegistry::Add<UnsafeBufferReachableAnalysisResult> |
| 123 | RegisterUnsafeBufferReachableResultForJSON( |
| 124 | serializeUnsafeBufferReachableAnalysisResult, |
| 125 | deserializeUnsafeBufferReachableAnalysisResult); |
| 126 | |
| 127 | /// Computes all the reachable "nodes" (pointers) in a pointer flow graph from a |
| 128 | /// provided starter node set. Specifically, the starter set is the unsafe |
| 129 | /// pointers found by `UnsafeBufferUsageAnalysis`. |
| 130 | class UnsafeBufferReachableAnalysis |
| 131 | : public DerivedAnalysis<UnsafeBufferReachableAnalysisResult, |
| 132 | PointerFlowAnalysisResult, |
| 133 | UnsafeBufferUsageAnalysisResult> { |
| 134 | |
| 135 | /// BoundsPropagationGraph adds bounds propagation semantics to the |
| 136 | /// pointer-flow graph, which represents the set of static pointer assignment |
| 137 | /// sites collected from the source code. Consider the following example: |
| 138 | /// |
| 139 | /// void f(int ***p, int **q) { |
| 140 | /// *p = q; |
| 141 | /// (**p)[5] = 0; |
| 142 | /// } |
| 143 | /// |
| 144 | /// There is one static pointer assignment thus one pointer-flow edge: (p, 2) |
| 145 | /// -> (q, 1). In terms of bounds propagation, this assignment implies that if |
| 146 | /// 'p' at pointer level 2 requires bounds, 'q' at pointer level 1 must also |
| 147 | /// have them. Furthermore, this relationship propagates to deeper indirection |
| 148 | /// levels: if 'p' at level 3 requires bounds, so does 'q' at level 2. |
| 149 | /// |
| 150 | /// In the example above, `(**p)` requires bounds (due to the array index), |
| 151 | /// and therefore `*q` must require bounds as well. |
| 152 | /// |
| 153 | /// To generalize the idea, the BoundsPropagationGraph is defined as a super |
| 154 | /// graph of the input pointer-flow graph by: |
| 155 | /// |
| 156 | /// For each edge (src, i) -> (dest, j) in the pointer-flow graph, the |
| 157 | /// BoundsPropagationGraph has a finite set of edges |
| 158 | /// {(src, i + d) -> (dest, j + d) | 0 <= d < UB}, where UB is an upper |
| 159 | /// bound based on the maximum pointer level the pointer type can have. |
| 160 | struct BoundsPropagationGraph { |
| 161 | private: |
| 162 | const std::map<EntityPointerLevel, EntityPointerLevelSet> &PointerFlows; |
| 163 | |
| 164 | public: |
| 165 | BoundsPropagationGraph(const EdgeSet &PointerFlows) |
| 166 | : PointerFlows(PointerFlows) {} |
| 167 | |
| 168 | /// Returns the EntityPointerLevelSet that are reachable from \p Src by |
| 169 | /// one edge in the BoundsPropagationGraph. |
| 170 | EntityPointerLevelSet getDestNodes(const EntityPointerLevel &Src) const { |
| 171 | unsigned SrcPtrLv = Src.getPointerLevel(); |
| 172 | EntityPointerLevelSet Result; |
| 173 | |
| 174 | for (unsigned P = 1; P <= SrcPtrLv; ++P) { |
| 175 | auto I = PointerFlows.find(x: buildEntityPointerLevel(Src.getEntity(), P)); |
| 176 | |
| 177 | if (I != PointerFlows.end()) { |
| 178 | unsigned Delta = SrcPtrLv - P; |
| 179 | for (const auto &EPL : I->second) |
| 180 | Result.insert(x: buildEntityPointerLevel( |
| 181 | EPL.getEntity(), EPL.getPointerLevel() + Delta)); |
| 182 | } |
| 183 | } |
| 184 | return Result; |
| 185 | } |
| 186 | }; |
| 187 | |
| 188 | std::map<EntityId, BoundsPropagationGraph> BPG; |
| 189 | |
| 190 | // Use pointers for efficiency. EPLs are in tree-based containers that only |
| 191 | // grow. So pointers to them are stable. |
| 192 | using EPLPtr = const EntityPointerLevel *; |
| 193 | |
| 194 | // Find all outgoing edges from `EPL` in the `Graph`, insert their |
| 195 | // destination nodes into `Reachables`, and add newly discovered nodes to |
| 196 | // `Worklist`: |
| 197 | void updateReachablesWithOutgoings(EPLPtr EPL, |
| 198 | std::vector<EPLPtr> &WorkList) { |
| 199 | for (auto &[Id, SubGraph] : BPG) { |
| 200 | auto R = SubGraph.getDestNodes(Src: *EPL); |
| 201 | |
| 202 | for (const auto &Dst : R) { |
| 203 | auto [It, Inserted] = getResult().Reachables[Id].insert(x: Dst); |
| 204 | if (Inserted) |
| 205 | WorkList.push_back(x: &*It); |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | public: |
| 211 | llvm::Error |
| 212 | initialize(const PointerFlowAnalysisResult &PtrFlowGraph, |
| 213 | const UnsafeBufferUsageAnalysisResult &Starter) override { |
| 214 | for (auto &[Id, SubGraph] : PtrFlowGraph.Edges) |
| 215 | BPG.try_emplace(k: Id, args: BoundsPropagationGraph(SubGraph)); |
| 216 | assert(getResult().Reachables.empty()); |
| 217 | getResult().Reachables.insert(first: Starter.begin(), last: Starter.end()); |
| 218 | return llvm::Error::success(); |
| 219 | } |
| 220 | |
| 221 | llvm::Expected<bool> step() override { |
| 222 | auto &Reachables = getResult().Reachables; |
| 223 | // Simple DFS: |
| 224 | std::vector<EPLPtr> Worklist; |
| 225 | |
| 226 | for (auto &[Id, EPLs] : Reachables) |
| 227 | for (auto &EPL : EPLs) |
| 228 | Worklist.push_back(x: &EPL); |
| 229 | |
| 230 | while (!Worklist.empty()) { |
| 231 | EPLPtr Node = Worklist.back(); |
| 232 | Worklist.pop_back(); |
| 233 | |
| 234 | updateReachablesWithOutgoings(EPL: Node, WorkList&: Worklist); |
| 235 | } |
| 236 | // This is not an iterative algorithm so stop iteration by retruning false: |
| 237 | return false; |
| 238 | } |
| 239 | }; |
| 240 | |
| 241 | AnalysisRegistry::Add<UnsafeBufferReachableAnalysis> |
| 242 | RegisterUnsafeBufferReachableAnalysis( |
| 243 | "Reachable pointers from unsafe buffer usage in pointer flow graph" ); |
| 244 | |
| 245 | } // namespace |
| 246 | |
| 247 | namespace clang::ssaf { |
| 248 | // NOLINTNEXTLINE(misc-use-internal-linkage) |
| 249 | volatile int UnsafeBufferUsageAnalysisAnchorSource = 0; |
| 250 | } // namespace clang::ssaf |
| 251 | |