1 | //== RangedConstraintManager.h ----------------------------------*- C++ -*--==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Ranged constraint manager, built on SimpleConstraintManager. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H |
14 | #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H |
15 | |
16 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |
17 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
18 | #include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h" |
19 | #include "llvm/ADT/APSInt.h" |
20 | #include "llvm/Support/Allocator.h" |
21 | |
22 | namespace clang { |
23 | |
24 | namespace ento { |
25 | |
26 | /// A Range represents the closed range [from, to]. The caller must |
27 | /// guarantee that from <= to. Note that Range is immutable, so as not |
28 | /// to subvert RangeSet's immutability. |
29 | class Range { |
30 | public: |
31 | Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) { |
32 | assert(From <= To); |
33 | } |
34 | |
35 | Range(const llvm::APSInt &Point) : Range(Point, Point) {} |
36 | |
37 | bool Includes(const llvm::APSInt &Point) const { |
38 | return From() <= Point && Point <= To(); |
39 | } |
40 | const llvm::APSInt &From() const { return *Impl.first; } |
41 | const llvm::APSInt &To() const { return *Impl.second; } |
42 | const llvm::APSInt *getConcreteValue() const { |
43 | return &From() == &To() ? &From() : nullptr; |
44 | } |
45 | |
46 | void Profile(llvm::FoldingSetNodeID &ID) const { |
47 | ID.AddPointer(Ptr: &From()); |
48 | ID.AddPointer(Ptr: &To()); |
49 | } |
50 | void dump(raw_ostream &OS) const; |
51 | void dump() const; |
52 | |
53 | // In order to keep non-overlapping ranges sorted, we can compare only From |
54 | // points. |
55 | bool operator<(const Range &RHS) const { return From() < RHS.From(); } |
56 | |
57 | bool operator==(const Range &RHS) const { return Impl == RHS.Impl; } |
58 | bool operator!=(const Range &RHS) const { return !operator==(RHS); } |
59 | |
60 | private: |
61 | std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl; |
62 | }; |
63 | |
64 | /// @class RangeSet is a persistent set of non-overlapping ranges. |
65 | /// |
66 | /// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which |
67 | /// also supports the most common operations performed on range sets. |
68 | /// |
69 | /// Empty set corresponds to an overly constrained symbol meaning that there |
70 | /// are no possible values for that symbol. |
71 | class RangeSet { |
72 | public: |
73 | class Factory; |
74 | |
75 | private: |
76 | // We use llvm::SmallVector as the underlying container for the following |
77 | // reasons: |
78 | // |
79 | // * Range sets are usually very simple, 1 or 2 ranges. |
80 | // That's why llvm::ImmutableSet is not perfect. |
81 | // |
82 | // * Ranges in sets are NOT overlapping, so it is natural to keep them |
83 | // sorted for efficient operations and queries. For this reason, |
84 | // llvm::SmallSet doesn't fit the requirements, it is not sorted when it |
85 | // is a vector. |
86 | // |
87 | // * Range set operations usually a bit harder than add/remove a range. |
88 | // Complex operations might do many of those for just one range set. |
89 | // Formerly it used to be llvm::ImmutableSet, which is inefficient for our |
90 | // purposes as we want to make these operations BOTH immutable AND |
91 | // efficient. |
92 | // |
93 | // * Iteration over ranges is widespread and a more cache-friendly |
94 | // structure is preferred. |
95 | using ImplType = llvm::SmallVector<Range, 4>; |
96 | |
97 | struct ContainerType : public ImplType, public llvm::FoldingSetNode { |
98 | void Profile(llvm::FoldingSetNodeID &ID) const { |
99 | for (const Range &It : *this) { |
100 | It.Profile(ID); |
101 | } |
102 | } |
103 | }; |
104 | // This is a non-owning pointer to an actual container. |
105 | // The memory is fully managed by the factory and is alive as long as the |
106 | // factory itself is alive. |
107 | // It is a pointer as opposed to a reference, so we can easily reassign |
108 | // RangeSet objects. |
109 | using UnderlyingType = const ContainerType *; |
110 | UnderlyingType Impl; |
111 | |
112 | public: |
113 | using const_iterator = ImplType::const_iterator; |
114 | |
115 | const_iterator begin() const { return Impl->begin(); } |
116 | const_iterator end() const { return Impl->end(); } |
117 | size_t size() const { return Impl->size(); } |
118 | |
119 | bool isEmpty() const { return Impl->empty(); } |
120 | |
121 | class Factory { |
122 | public: |
123 | Factory(BasicValueFactory &BV) : ValueFactory(BV) {} |
124 | |
125 | /// Create a new set with all ranges from both LHS and RHS. |
126 | /// Possible intersections are not checked here. |
127 | /// |
128 | /// Complexity: O(N + M) |
129 | /// where N = size(LHS), M = size(RHS) |
130 | RangeSet add(RangeSet LHS, RangeSet RHS); |
131 | /// Create a new set with all ranges from the original set plus the new one. |
132 | /// Possible intersections are not checked here. |
133 | /// |
134 | /// Complexity: O(N) |
135 | /// where N = size(Original) |
136 | RangeSet add(RangeSet Original, Range Element); |
137 | /// Create a new set with all ranges from the original set plus the point. |
138 | /// Possible intersections are not checked here. |
139 | /// |
140 | /// Complexity: O(N) |
141 | /// where N = size(Original) |
142 | RangeSet add(RangeSet Original, const llvm::APSInt &Point); |
143 | /// Create a new set which is a union of two given ranges. |
144 | /// Possible intersections are not checked here. |
145 | /// |
146 | /// Complexity: O(N + M) |
147 | /// where N = size(LHS), M = size(RHS) |
148 | RangeSet unite(RangeSet LHS, RangeSet RHS); |
149 | /// Create a new set by uniting given range set with the given range. |
150 | /// All intersections and adjacent ranges are handled here. |
151 | /// |
152 | /// Complexity: O(N) |
153 | /// where N = size(Original) |
154 | RangeSet unite(RangeSet Original, Range Element); |
155 | /// Create a new set by uniting given range set with the given point. |
156 | /// All intersections and adjacent ranges are handled here. |
157 | /// |
158 | /// Complexity: O(N) |
159 | /// where N = size(Original) |
160 | RangeSet unite(RangeSet Original, llvm::APSInt Point); |
161 | /// Create a new set by uniting given range set with the given range |
162 | /// between points. All intersections and adjacent ranges are handled here. |
163 | /// |
164 | /// Complexity: O(N) |
165 | /// where N = size(Original) |
166 | RangeSet unite(RangeSet Original, llvm::APSInt From, llvm::APSInt To); |
167 | |
168 | RangeSet getEmptySet() { return &EmptySet; } |
169 | |
170 | /// Create a new set with just one range. |
171 | /// @{ |
172 | RangeSet getRangeSet(Range Origin); |
173 | RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) { |
174 | return getRangeSet(Origin: Range(From, To)); |
175 | } |
176 | RangeSet getRangeSet(const llvm::APSInt &Origin) { |
177 | return getRangeSet(From: Origin, To: Origin); |
178 | } |
179 | /// @} |
180 | |
181 | /// Intersect the given range sets. |
182 | /// |
183 | /// Complexity: O(N + M) |
184 | /// where N = size(LHS), M = size(RHS) |
185 | RangeSet intersect(RangeSet LHS, RangeSet RHS); |
186 | /// Intersect the given set with the closed range [Lower, Upper]. |
187 | /// |
188 | /// Unlike the Range type, this range uses modular arithmetic, corresponding |
189 | /// to the common treatment of C integer overflow. Thus, if the Lower bound |
190 | /// is greater than the Upper bound, the range is taken to wrap around. This |
191 | /// is equivalent to taking the intersection with the two ranges [Min, |
192 | /// Upper] and [Lower, Max], or, alternatively, /removing/ all integers |
193 | /// between Upper and Lower. |
194 | /// |
195 | /// Complexity: O(N) |
196 | /// where N = size(What) |
197 | RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper); |
198 | /// Intersect the given range with the given point. |
199 | /// |
200 | /// The result can be either an empty set or a set containing the given |
201 | /// point depending on whether the point is in the range set. |
202 | /// |
203 | /// Complexity: O(logN) |
204 | /// where N = size(What) |
205 | RangeSet intersect(RangeSet What, llvm::APSInt Point); |
206 | |
207 | /// Delete the given point from the range set. |
208 | /// |
209 | /// Complexity: O(N) |
210 | /// where N = size(From) |
211 | RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point); |
212 | /// Negate the given range set. |
213 | /// |
214 | /// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus |
215 | /// operation under the values of the type. |
216 | /// |
217 | /// We also handle MIN because applying unary minus to MIN does not change |
218 | /// it. |
219 | /// Example 1: |
220 | /// char x = -128; // -128 is a MIN value in a range of 'char' |
221 | /// char y = -x; // y: -128 |
222 | /// |
223 | /// Example 2: |
224 | /// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' |
225 | /// unsigned char y = -x; // y: 0 |
226 | /// |
227 | /// And it makes us to separate the range |
228 | /// like [MIN, N] to [MIN, MIN] U [-N, MAX]. |
229 | /// For instance, whole range is {-128..127} and subrange is [-128,-126], |
230 | /// thus [-128,-127,-126,...] negates to [-128,...,126,127]. |
231 | /// |
232 | /// Negate restores disrupted ranges on bounds, |
233 | /// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. |
234 | /// |
235 | /// Negate is a self-inverse function, i.e. negate(negate(R)) == R. |
236 | /// |
237 | /// Complexity: O(N) |
238 | /// where N = size(What) |
239 | RangeSet negate(RangeSet What); |
240 | /// Performs promotions, truncations and conversions of the given set. |
241 | /// |
242 | /// This function is optimized for each of the six cast cases: |
243 | /// - noop |
244 | /// - conversion |
245 | /// - truncation |
246 | /// - truncation-conversion |
247 | /// - promotion |
248 | /// - promotion-conversion |
249 | /// |
250 | /// NOTE: This function is NOT self-inverse for truncations, because of |
251 | /// the higher bits loss: |
252 | /// - castTo(castTo(OrigRangeOfInt, char), int) != OrigRangeOfInt. |
253 | /// - castTo(castTo(OrigRangeOfChar, int), char) == OrigRangeOfChar. |
254 | /// But it is self-inverse for all the rest casts. |
255 | /// |
256 | /// Complexity: |
257 | /// - Noop O(1); |
258 | /// - Truncation O(N^2); |
259 | /// - Another case O(N); |
260 | /// where N = size(What) |
261 | RangeSet castTo(RangeSet What, APSIntType Ty); |
262 | RangeSet castTo(RangeSet What, QualType T); |
263 | |
264 | /// Return associated value factory. |
265 | BasicValueFactory &getValueFactory() const { return ValueFactory; } |
266 | |
267 | private: |
268 | /// Return a persistent version of the given container. |
269 | RangeSet makePersistent(ContainerType &&From); |
270 | /// Construct a new persistent version of the given container. |
271 | ContainerType *construct(ContainerType &&From); |
272 | |
273 | RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS); |
274 | /// NOTE: This function relies on the fact that all values in the |
275 | /// containers are persistent (created via BasicValueFactory::getValue). |
276 | ContainerType unite(const ContainerType &LHS, const ContainerType &RHS); |
277 | |
278 | /// This is a helper function for `castTo` method. Implies not to be used |
279 | /// separately. |
280 | /// Performs a truncation case of a cast operation. |
281 | ContainerType truncateTo(RangeSet What, APSIntType Ty); |
282 | |
283 | /// This is a helper function for `castTo` method. Implies not to be used |
284 | /// separately. |
285 | /// Performs a conversion case and a promotion-conversion case for signeds |
286 | /// of a cast operation. |
287 | ContainerType convertTo(RangeSet What, APSIntType Ty); |
288 | |
289 | /// This is a helper function for `castTo` method. Implies not to be used |
290 | /// separately. |
291 | /// Performs a promotion for unsigneds only. |
292 | ContainerType promoteTo(RangeSet What, APSIntType Ty); |
293 | |
294 | // Many operations include producing new APSInt values and that's why |
295 | // we need this factory. |
296 | BasicValueFactory &ValueFactory; |
297 | // Allocator for all the created containers. |
298 | // Containers might own their own memory and that's why it is specific |
299 | // for the type, so it calls container destructors upon deletion. |
300 | llvm::SpecificBumpPtrAllocator<ContainerType> Arena; |
301 | // Usually we deal with the same ranges and range sets over and over. |
302 | // Here we track all created containers and try not to repeat ourselves. |
303 | llvm::FoldingSet<ContainerType> Cache; |
304 | static ContainerType EmptySet; |
305 | }; |
306 | |
307 | RangeSet(const RangeSet &) = default; |
308 | RangeSet &operator=(const RangeSet &) = default; |
309 | RangeSet(RangeSet &&) = default; |
310 | RangeSet &operator=(RangeSet &&) = default; |
311 | ~RangeSet() = default; |
312 | |
313 | /// Construct a new RangeSet representing '{ [From, To] }'. |
314 | RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To) |
315 | : RangeSet(F.getRangeSet(From, To)) {} |
316 | |
317 | /// Construct a new RangeSet representing the given point as a range. |
318 | RangeSet(Factory &F, const llvm::APSInt &Point) |
319 | : RangeSet(F.getRangeSet(Origin: Point)) {} |
320 | |
321 | static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) { |
322 | ID.AddPointer(Ptr: RS.Impl); |
323 | } |
324 | |
325 | /// Profile - Generates a hash profile of this RangeSet for use |
326 | /// by FoldingSet. |
327 | void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, RS: *this); } |
328 | |
329 | /// getConcreteValue - If a symbol is constrained to equal a specific integer |
330 | /// constant then this method returns that value. Otherwise, it returns |
331 | /// NULL. |
332 | const llvm::APSInt *getConcreteValue() const { |
333 | return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr; |
334 | } |
335 | |
336 | /// Get the minimal value covered by the ranges in the set. |
337 | /// |
338 | /// Complexity: O(1) |
339 | const llvm::APSInt &getMinValue() const; |
340 | /// Get the maximal value covered by the ranges in the set. |
341 | /// |
342 | /// Complexity: O(1) |
343 | const llvm::APSInt &getMaxValue() const; |
344 | |
345 | bool isUnsigned() const; |
346 | uint32_t getBitWidth() const; |
347 | APSIntType getAPSIntType() const; |
348 | |
349 | /// Test whether the given point is contained by any of the ranges. |
350 | /// |
351 | /// Complexity: O(logN) |
352 | /// where N = size(this) |
353 | bool contains(llvm::APSInt Point) const { return containsImpl(Point); } |
354 | |
355 | bool containsZero() const { |
356 | APSIntType T{getMinValue()}; |
357 | return contains(Point: T.getZeroValue()); |
358 | } |
359 | |
360 | /// Test if the range is the [0,0] range. |
361 | /// |
362 | /// Complexity: O(1) |
363 | bool encodesFalseRange() const { |
364 | const llvm::APSInt *Constant = getConcreteValue(); |
365 | return Constant && Constant->isZero(); |
366 | } |
367 | |
368 | /// Test if the range doesn't contain zero. |
369 | /// |
370 | /// Complexity: O(logN) |
371 | /// where N = size(this) |
372 | bool encodesTrueRange() const { return !containsZero(); } |
373 | |
374 | void dump(raw_ostream &OS) const; |
375 | void dump() const; |
376 | |
377 | bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; } |
378 | bool operator!=(const RangeSet &Other) const { return !(*this == Other); } |
379 | |
380 | private: |
381 | /* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {} |
382 | /* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {} |
383 | |
384 | /// Pin given points to the type represented by the current range set. |
385 | /// |
386 | /// This makes parameter points to be in-out parameters. |
387 | /// In order to maintain consistent types across all of the ranges in the set |
388 | /// and to keep all the operations to compare ONLY points of the same type, we |
389 | /// need to pin every point before any operation. |
390 | /// |
391 | /// @Returns true if the given points can be converted to the target type |
392 | /// without changing the values (i.e. trivially) and false otherwise. |
393 | /// @{ |
394 | bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const; |
395 | bool pin(llvm::APSInt &Point) const; |
396 | /// @} |
397 | |
398 | // This version of this function modifies its arguments (pins it). |
399 | bool containsImpl(llvm::APSInt &Point) const; |
400 | |
401 | friend class Factory; |
402 | }; |
403 | |
404 | using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>; |
405 | ConstraintMap getConstraintMap(ProgramStateRef State); |
406 | |
407 | class RangedConstraintManager : public SimpleConstraintManager { |
408 | public: |
409 | RangedConstraintManager(ExprEngine *EE, SValBuilder &SB) |
410 | : SimpleConstraintManager(EE, SB) {} |
411 | |
412 | ~RangedConstraintManager() override; |
413 | |
414 | //===------------------------------------------------------------------===// |
415 | // Implementation for interface from SimpleConstraintManager. |
416 | //===------------------------------------------------------------------===// |
417 | |
418 | ProgramStateRef assumeSym(ProgramStateRef State, SymbolRef Sym, |
419 | bool Assumption) override; |
420 | |
421 | ProgramStateRef assumeSymInclusiveRange(ProgramStateRef State, SymbolRef Sym, |
422 | const llvm::APSInt &From, |
423 | const llvm::APSInt &To, |
424 | bool InRange) override; |
425 | |
426 | ProgramStateRef assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym, |
427 | bool Assumption) override; |
428 | |
429 | protected: |
430 | /// Assume a constraint between a symbolic expression and a concrete integer. |
431 | virtual ProgramStateRef assumeSymRel(ProgramStateRef State, SymbolRef Sym, |
432 | BinaryOperator::Opcode op, |
433 | const llvm::APSInt &Int); |
434 | |
435 | //===------------------------------------------------------------------===// |
436 | // Interface that subclasses must implement. |
437 | //===------------------------------------------------------------------===// |
438 | |
439 | // Each of these is of the form "$Sym+Adj <> V", where "<>" is the comparison |
440 | // operation for the method being invoked. |
441 | |
442 | virtual ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym, |
443 | const llvm::APSInt &V, |
444 | const llvm::APSInt &Adjustment) = 0; |
445 | |
446 | virtual ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym, |
447 | const llvm::APSInt &V, |
448 | const llvm::APSInt &Adjustment) = 0; |
449 | |
450 | virtual ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym, |
451 | const llvm::APSInt &V, |
452 | const llvm::APSInt &Adjustment) = 0; |
453 | |
454 | virtual ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym, |
455 | const llvm::APSInt &V, |
456 | const llvm::APSInt &Adjustment) = 0; |
457 | |
458 | virtual ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym, |
459 | const llvm::APSInt &V, |
460 | const llvm::APSInt &Adjustment) = 0; |
461 | |
462 | virtual ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym, |
463 | const llvm::APSInt &V, |
464 | const llvm::APSInt &Adjustment) = 0; |
465 | |
466 | virtual ProgramStateRef assumeSymWithinInclusiveRange( |
467 | ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, |
468 | const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0; |
469 | |
470 | virtual ProgramStateRef assumeSymOutsideInclusiveRange( |
471 | ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, |
472 | const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0; |
473 | |
474 | //===------------------------------------------------------------------===// |
475 | // Internal implementation. |
476 | //===------------------------------------------------------------------===// |
477 | private: |
478 | static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment); |
479 | }; |
480 | |
481 | /// Try to simplify a given symbolic expression based on the constraints in |
482 | /// State. This is needed because the Environment bindings are not getting |
483 | /// updated when a new constraint is added to the State. If the symbol is |
484 | /// simplified to a non-symbol (e.g. to a constant) then the original symbol |
485 | /// is returned. We use this function in the family of assumeSymNE/EQ/LT/../GE |
486 | /// functions where we can work only with symbols. Use the other function |
487 | /// (simplifyToSVal) if you are interested in a simplification that may yield |
488 | /// a concrete constant value. |
489 | SymbolRef simplify(ProgramStateRef State, SymbolRef Sym); |
490 | |
491 | /// Try to simplify a given symbolic expression's associated `SVal` based on the |
492 | /// constraints in State. This is very similar to `simplify`, but this function |
493 | /// always returns the simplified SVal. The simplified SVal might be a single |
494 | /// constant (i.e. `ConcreteInt`). |
495 | SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym); |
496 | |
497 | } // namespace ento |
498 | } // namespace clang |
499 | |
500 | REGISTER_FACTORY_WITH_PROGRAMSTATE(ConstraintMap) |
501 | |
502 | #endif |
503 | |