1//===-- dfsan_origin.h ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of DataFlowSanitizer.
10//
11// Origin id utils.
12//===----------------------------------------------------------------------===//
13
14#ifndef DFSAN_ORIGIN_H
15#define DFSAN_ORIGIN_H
16
17#include "dfsan_chained_origin_depot.h"
18#include "dfsan_flags.h"
19#include "sanitizer_common/sanitizer_stackdepot.h"
20
21namespace __dfsan {
22
23// Origin handling.
24//
25// Origin is a 32-bit identifier that is attached to any taint value in the
26// program and describes how this memory came to be tainted.
27//
28// Chained origin id is like:
29// zzzz xxxx xxxx xxxx
30//
31// Chained origin id describes an event of storing a taint value to
32// memory. The xxx part is a value of ChainedOriginDepot, which is a mapping of
33// (stack_id, prev_id) -> id, where
34// * stack_id describes the event.
35// StackDepot keeps a mapping between those and corresponding stack traces.
36// * prev_id is another origin id that describes the earlier part of the
37// taint value history. 0 prev_id indicates the start of a chain.
38// Following a chain of prev_id provides the full recorded history of a taint
39// value.
40//
41// This, effectively, defines a forest where nodes are points in value history
42// marked with origin ids, and edges are events that are marked with stack_id.
43//
44// The "zzzz" bits of chained origin id are used to store the length of the
45// origin chain.
46
47class Origin {
48 public:
49 static bool isValidId(u32 id) { return id != 0; }
50
51 u32 raw_id() const { return raw_id_; }
52
53 bool isChainedOrigin() const { return Origin::isValidId(id: raw_id_); }
54
55 u32 getChainedId() const {
56 CHECK(Origin::isValidId(raw_id_));
57 return raw_id_ & kChainedIdMask;
58 }
59
60 // Returns the next origin in the chain and the current stack trace.
61 //
62 // It scans a partition of StackDepot linearly, and is used only by origin
63 // tracking report.
64 Origin getNextChainedOrigin(StackTrace *stack) const {
65 CHECK(Origin::isValidId(raw_id_));
66 u32 prev_id;
67 u32 stack_id = GetChainedOriginDepot()->Get(id: getChainedId(), other: &prev_id);
68 if (stack)
69 *stack = StackDepotGet(id: stack_id);
70 return Origin(prev_id);
71 }
72
73 static Origin CreateChainedOrigin(Origin prev, StackTrace *stack) {
74 int depth = prev.isChainedOrigin() ? prev.depth() : -1;
75 // depth is the length of the chain minus 1.
76 // origin_history_size of 0 means unlimited depth.
77 if (flags().origin_history_size > 0) {
78 ++depth;
79 if (depth >= flags().origin_history_size || depth > kMaxDepth)
80 return prev;
81 }
82
83 StackDepotHandle h = StackDepotPut_WithHandle(stack: *stack);
84 if (!h.valid())
85 return prev;
86
87 if (flags().origin_history_per_stack_limit > 0) {
88 int use_count = h.use_count();
89 if (use_count > flags().origin_history_per_stack_limit)
90 return prev;
91 }
92
93 u32 chained_id;
94 bool inserted =
95 GetChainedOriginDepot()->Put(here_id: h.id(), prev_id: prev.raw_id(), new_id: &chained_id);
96 CHECK((chained_id & kChainedIdMask) == chained_id);
97
98 if (inserted && flags().origin_history_per_stack_limit > 0)
99 h.inc_use_count_unsafe();
100
101 return Origin((depth << kDepthShift) | chained_id);
102 }
103
104 static Origin FromRawId(u32 id) { return Origin(id); }
105
106 private:
107 static const int kDepthBits = 4;
108 static const int kDepthShift = 32 - kDepthBits;
109
110 static const u32 kChainedIdMask = ((u32)-1) >> kDepthBits;
111
112 u32 raw_id_;
113
114 explicit Origin(u32 raw_id) : raw_id_(raw_id) {}
115
116 int depth() const {
117 CHECK(isChainedOrigin());
118 return (raw_id_ >> kDepthShift) & ((1 << kDepthBits) - 1);
119 }
120
121 public:
122 static const int kMaxDepth = (1 << kDepthBits) - 1;
123};
124
125} // namespace __dfsan
126
127#endif // DFSAN_ORIGIN_H
128