1 | //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file |
11 | /// provides two different array implementations. |
12 | /// |
13 | /// VarStreamArray - Arrays of variable length records. The user specifies |
14 | /// an Extractor type that can extract a record from a given offset and |
15 | /// return the number of bytes consumed by the record. |
16 | /// |
17 | /// FixedStreamArray - Arrays of fixed length records. This is similar in |
18 | /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the |
19 | /// elements of the array need not be laid out in contiguous memory. |
20 | /// |
21 | |
22 | #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H |
23 | #define LLVM_SUPPORT_BINARYSTREAMARRAY_H |
24 | |
25 | #include "llvm/ADT/ArrayRef.h" |
26 | #include "llvm/ADT/iterator.h" |
27 | #include "llvm/Support/Alignment.h" |
28 | #include "llvm/Support/BinaryStreamRef.h" |
29 | #include "llvm/Support/Error.h" |
30 | #include <cassert> |
31 | #include <cstdint> |
32 | |
33 | namespace llvm { |
34 | |
35 | /// VarStreamArrayExtractor is intended to be specialized to provide customized |
36 | /// extraction logic. On input it receives a BinaryStreamRef pointing to the |
37 | /// beginning of the next record, but where the length of the record is not yet |
38 | /// known. Upon completion, it should return an appropriate Error instance if |
39 | /// a record could not be extracted, or if one could be extracted it should |
40 | /// return success and set Len to the number of bytes this record occupied in |
41 | /// the underlying stream, and it should fill out the fields of the value type |
42 | /// Item appropriately to represent the current record. |
43 | /// |
44 | /// You can specialize this template for your own custom value types to avoid |
45 | /// having to specify a second template argument to VarStreamArray (documented |
46 | /// below). |
47 | template <typename T> struct { |
48 | // Method intentionally deleted. You must provide an explicit specialization |
49 | // with the following method implemented. |
50 | Error (BinaryStreamRef Stream, uint32_t &Len, |
51 | T &Item) const = delete; |
52 | }; |
53 | |
54 | /// VarStreamArray represents an array of variable length records backed by a |
55 | /// stream. This could be a contiguous sequence of bytes in memory, it could |
56 | /// be a file on disk, or it could be a PDB stream where bytes are stored as |
57 | /// discontiguous blocks in a file. Usually it is desirable to treat arrays |
58 | /// as contiguous blocks of memory, but doing so with large PDB files, for |
59 | /// example, could mean allocating huge amounts of memory just to allow |
60 | /// re-ordering of stream data to be contiguous before iterating over it. By |
61 | /// abstracting this out, we need not duplicate this memory, and we can |
62 | /// iterate over arrays in arbitrarily formatted streams. Elements are parsed |
63 | /// lazily on iteration, so there is no upfront cost associated with building |
64 | /// or copying a VarStreamArray, no matter how large it may be. |
65 | /// |
66 | /// You create a VarStreamArray by specifying a ValueType and an Extractor type. |
67 | /// If you do not specify an Extractor type, you are expected to specialize |
68 | /// VarStreamArrayExtractor<T> for your ValueType. |
69 | /// |
70 | /// By default an Extractor is default constructed in the class, but in some |
71 | /// cases you might find it useful for an Extractor to maintain state across |
72 | /// extractions. In this case you can provide your own Extractor through a |
73 | /// secondary constructor. The following examples show various ways of |
74 | /// creating a VarStreamArray. |
75 | /// |
76 | /// // Will use VarStreamArrayExtractor<MyType> as the extractor. |
77 | /// VarStreamArray<MyType> MyTypeArray; |
78 | /// |
79 | /// // Will use a default-constructed MyExtractor as the extractor. |
80 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; |
81 | /// |
82 | /// // Will use the specific instance of MyExtractor provided. |
83 | /// // MyExtractor need not be default-constructible in this case. |
84 | /// MyExtractor E(SomeContext); |
85 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); |
86 | /// |
87 | |
88 | template <typename ValueType, typename Extractor> class VarStreamArrayIterator; |
89 | |
90 | template <typename ValueType, |
91 | typename Extractor = VarStreamArrayExtractor<ValueType>> |
92 | class VarStreamArray { |
93 | friend class VarStreamArrayIterator<ValueType, Extractor>; |
94 | |
95 | public: |
96 | typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; |
97 | |
98 | VarStreamArray() = default; |
99 | |
100 | explicit VarStreamArray(const Extractor &E) : E(E) {} |
101 | |
102 | explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0) |
103 | : Stream(Stream), Skew(Skew) {} |
104 | |
105 | VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0) |
106 | : Stream(Stream), E(E), Skew(Skew) {} |
107 | |
108 | Iterator begin(bool *HadError = nullptr) const { |
109 | return Iterator(*this, E, Skew, nullptr); |
110 | } |
111 | |
112 | bool valid() const { return Stream.valid(); } |
113 | |
114 | bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); } |
115 | |
116 | uint32_t skew() const { return Skew; } |
117 | Iterator end() const { return Iterator(E); } |
118 | |
119 | bool empty() const { return Stream.getLength() == 0; } |
120 | |
121 | VarStreamArray<ValueType, Extractor> substream(uint32_t Begin, |
122 | uint32_t End) const { |
123 | assert(Begin >= Skew); |
124 | // We should never cut off the beginning of the stream since it might be |
125 | // skewed, meaning the initial bytes are important. |
126 | BinaryStreamRef NewStream = Stream.slice(Offset: 0, Len: End); |
127 | return {NewStream, E, Begin}; |
128 | } |
129 | |
130 | /// given an offset into the array's underlying stream, return an |
131 | /// iterator to the record at that offset. This is considered unsafe |
132 | /// since the behavior is undefined if \p Offset does not refer to the |
133 | /// beginning of a valid record. |
134 | Iterator at(uint32_t Offset) const { |
135 | return Iterator(*this, E, Offset, nullptr); |
136 | } |
137 | |
138 | const Extractor &() const { return E; } |
139 | Extractor &() { return E; } |
140 | |
141 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
142 | void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) { |
143 | Stream = NewStream; |
144 | Skew = NewSkew; |
145 | } |
146 | |
147 | void drop_front() { Skew += begin()->length(); } |
148 | |
149 | private: |
150 | BinaryStreamRef Stream; |
151 | Extractor E; |
152 | uint32_t Skew = 0; |
153 | }; |
154 | |
155 | template <typename ValueType, typename Extractor> |
156 | class VarStreamArrayIterator |
157 | : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, |
158 | std::forward_iterator_tag, const ValueType> { |
159 | typedef VarStreamArrayIterator<ValueType, Extractor> IterType; |
160 | typedef VarStreamArray<ValueType, Extractor> ArrayType; |
161 | |
162 | public: |
163 | VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, |
164 | uint32_t Offset, bool *HadError) |
165 | : IterRef(Array.Stream.drop_front(Offset)), Extract(E), |
166 | Array(&Array), AbsOffset(Offset), HadError(HadError) { |
167 | if (IterRef.getLength() == 0) |
168 | moveToEnd(); |
169 | else { |
170 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
171 | if (EC) { |
172 | consumeError(std::move(EC)); |
173 | markError(); |
174 | } |
175 | } |
176 | } |
177 | |
178 | VarStreamArrayIterator() = default; |
179 | explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} |
180 | ~VarStreamArrayIterator() = default; |
181 | |
182 | bool operator==(const IterType &R) const { |
183 | if (Array && R.Array) { |
184 | // Both have a valid array, make sure they're same. |
185 | assert(Array == R.Array); |
186 | return IterRef == R.IterRef; |
187 | } |
188 | |
189 | // Both iterators are at the end. |
190 | if (!Array && !R.Array) |
191 | return true; |
192 | |
193 | // One is not at the end and one is. |
194 | return false; |
195 | } |
196 | |
197 | const ValueType &operator*() const { |
198 | assert(Array && !HasError); |
199 | return ThisValue; |
200 | } |
201 | |
202 | IterType &operator+=(unsigned N) { |
203 | for (unsigned I = 0; I < N; ++I) { |
204 | // We are done with the current record, discard it so that we are |
205 | // positioned at the next record. |
206 | AbsOffset += ThisLen; |
207 | IterRef = IterRef.drop_front(N: ThisLen); |
208 | if (IterRef.getLength() == 0) { |
209 | // There is nothing after the current record, we must make this an end |
210 | // iterator. |
211 | moveToEnd(); |
212 | } else { |
213 | // There is some data after the current record. |
214 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
215 | if (EC) { |
216 | consumeError(std::move(EC)); |
217 | markError(); |
218 | } else if (ThisLen == 0) { |
219 | // An empty record? Make this an end iterator. |
220 | moveToEnd(); |
221 | } |
222 | } |
223 | } |
224 | return *this; |
225 | } |
226 | |
227 | uint32_t offset() const { return AbsOffset; } |
228 | uint32_t getRecordLength() const { return ThisLen; } |
229 | |
230 | private: |
231 | void moveToEnd() { |
232 | Array = nullptr; |
233 | ThisLen = 0; |
234 | } |
235 | void markError() { |
236 | moveToEnd(); |
237 | HasError = true; |
238 | if (HadError != nullptr) |
239 | *HadError = true; |
240 | } |
241 | |
242 | ValueType ThisValue; |
243 | BinaryStreamRef IterRef; |
244 | Extractor ; |
245 | const ArrayType *Array{nullptr}; |
246 | uint32_t ThisLen{0}; |
247 | uint32_t AbsOffset{0}; |
248 | bool HasError{false}; |
249 | bool *HadError{nullptr}; |
250 | }; |
251 | |
252 | template <typename T> class FixedStreamArrayIterator; |
253 | |
254 | /// FixedStreamArray is similar to VarStreamArray, except with each record |
255 | /// having a fixed-length. As with VarStreamArray, there is no upfront |
256 | /// cost associated with building or copying a FixedStreamArray, as the |
257 | /// memory for each element is not read from the backing stream until that |
258 | /// element is iterated. |
259 | template <typename T> class FixedStreamArray { |
260 | friend class FixedStreamArrayIterator<T>; |
261 | |
262 | public: |
263 | typedef FixedStreamArrayIterator<T> Iterator; |
264 | |
265 | FixedStreamArray() = default; |
266 | explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { |
267 | assert(Stream.getLength() % sizeof(T) == 0); |
268 | } |
269 | |
270 | bool operator==(const FixedStreamArray<T> &Other) const { |
271 | return Stream == Other.Stream; |
272 | } |
273 | |
274 | bool operator!=(const FixedStreamArray<T> &Other) const { |
275 | return !(*this == Other); |
276 | } |
277 | |
278 | FixedStreamArray(const FixedStreamArray &) = default; |
279 | FixedStreamArray &operator=(const FixedStreamArray &) = default; |
280 | |
281 | const T &operator[](uint32_t Index) const { |
282 | assert(Index < size()); |
283 | uint32_t Off = Index * sizeof(T); |
284 | ArrayRef<uint8_t> Data; |
285 | if (auto EC = Stream.readBytes(Offset: Off, Size: sizeof(T), Buffer&: Data)) { |
286 | assert(false && "Unexpected failure reading from stream" ); |
287 | // This should never happen since we asserted that the stream length was |
288 | // an exact multiple of the element size. |
289 | consumeError(Err: std::move(EC)); |
290 | } |
291 | assert(isAddrAligned(Align::Of<T>(), Data.data())); |
292 | return *reinterpret_cast<const T *>(Data.data()); |
293 | } |
294 | |
295 | uint32_t size() const { return Stream.getLength() / sizeof(T); } |
296 | |
297 | bool empty() const { return size() == 0; } |
298 | |
299 | FixedStreamArrayIterator<T> begin() const { |
300 | return FixedStreamArrayIterator<T>(*this, 0); |
301 | } |
302 | |
303 | FixedStreamArrayIterator<T> end() const { |
304 | return FixedStreamArrayIterator<T>(*this, size()); |
305 | } |
306 | |
307 | const T &front() const { return *begin(); } |
308 | const T &back() const { |
309 | FixedStreamArrayIterator<T> I = end(); |
310 | return *(--I); |
311 | } |
312 | |
313 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
314 | |
315 | private: |
316 | BinaryStreamRef Stream; |
317 | }; |
318 | |
319 | template <typename T> |
320 | class FixedStreamArrayIterator |
321 | : public iterator_facade_base<FixedStreamArrayIterator<T>, |
322 | std::random_access_iterator_tag, const T> { |
323 | |
324 | public: |
325 | FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) |
326 | : Array(Array), Index(Index) {} |
327 | |
328 | FixedStreamArrayIterator(const FixedStreamArrayIterator<T> &Other) |
329 | : Array(Other.Array), Index(Other.Index) {} |
330 | FixedStreamArrayIterator<T> & |
331 | operator=(const FixedStreamArrayIterator<T> &Other) { |
332 | Array = Other.Array; |
333 | Index = Other.Index; |
334 | return *this; |
335 | } |
336 | |
337 | const T &operator*() const { return Array[Index]; } |
338 | const T &operator*() { return Array[Index]; } |
339 | |
340 | bool operator==(const FixedStreamArrayIterator<T> &R) const { |
341 | assert(Array == R.Array); |
342 | return (Index == R.Index) && (Array == R.Array); |
343 | } |
344 | |
345 | FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { |
346 | Index += N; |
347 | return *this; |
348 | } |
349 | |
350 | FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { |
351 | assert(std::ptrdiff_t(Index) >= N); |
352 | Index -= N; |
353 | return *this; |
354 | } |
355 | |
356 | std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { |
357 | assert(Array == R.Array); |
358 | assert(Index >= R.Index); |
359 | return Index - R.Index; |
360 | } |
361 | |
362 | bool operator<(const FixedStreamArrayIterator<T> &RHS) const { |
363 | assert(Array == RHS.Array); |
364 | return Index < RHS.Index; |
365 | } |
366 | |
367 | private: |
368 | FixedStreamArray<T> Array; |
369 | uint32_t Index; |
370 | }; |
371 | |
372 | } // namespace llvm |
373 | |
374 | #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H |
375 | |