1//===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Lightweight arrays that are backed by an arbitrary BinaryStream. This file
11/// provides two different array implementations.
12///
13/// VarStreamArray - Arrays of variable length records. The user specifies
14/// an Extractor type that can extract a record from a given offset and
15/// return the number of bytes consumed by the record.
16///
17/// FixedStreamArray - Arrays of fixed length records. This is similar in
18/// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
19/// elements of the array need not be laid out in contiguous memory.
20///
21
22#ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
23#define LLVM_SUPPORT_BINARYSTREAMARRAY_H
24
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/iterator.h"
27#include "llvm/Support/Alignment.h"
28#include "llvm/Support/BinaryStreamRef.h"
29#include "llvm/Support/Error.h"
30#include <cassert>
31#include <cstdint>
32
33namespace llvm {
34
35/// VarStreamArrayExtractor is intended to be specialized to provide customized
36/// extraction logic. On input it receives a BinaryStreamRef pointing to the
37/// beginning of the next record, but where the length of the record is not yet
38/// known. Upon completion, it should return an appropriate Error instance if
39/// a record could not be extracted, or if one could be extracted it should
40/// return success and set Len to the number of bytes this record occupied in
41/// the underlying stream, and it should fill out the fields of the value type
42/// Item appropriately to represent the current record.
43///
44/// You can specialize this template for your own custom value types to avoid
45/// having to specify a second template argument to VarStreamArray (documented
46/// below).
47template <typename T> struct VarStreamArrayExtractor {
48 // Method intentionally deleted. You must provide an explicit specialization
49 // with the following method implemented.
50 Error operator()(BinaryStreamRef Stream, uint32_t &Len,
51 T &Item) const = delete;
52};
53
54/// VarStreamArray represents an array of variable length records backed by a
55/// stream. This could be a contiguous sequence of bytes in memory, it could
56/// be a file on disk, or it could be a PDB stream where bytes are stored as
57/// discontiguous blocks in a file. Usually it is desirable to treat arrays
58/// as contiguous blocks of memory, but doing so with large PDB files, for
59/// example, could mean allocating huge amounts of memory just to allow
60/// re-ordering of stream data to be contiguous before iterating over it. By
61/// abstracting this out, we need not duplicate this memory, and we can
62/// iterate over arrays in arbitrarily formatted streams. Elements are parsed
63/// lazily on iteration, so there is no upfront cost associated with building
64/// or copying a VarStreamArray, no matter how large it may be.
65///
66/// You create a VarStreamArray by specifying a ValueType and an Extractor type.
67/// If you do not specify an Extractor type, you are expected to specialize
68/// VarStreamArrayExtractor<T> for your ValueType.
69///
70/// By default an Extractor is default constructed in the class, but in some
71/// cases you might find it useful for an Extractor to maintain state across
72/// extractions. In this case you can provide your own Extractor through a
73/// secondary constructor. The following examples show various ways of
74/// creating a VarStreamArray.
75///
76/// // Will use VarStreamArrayExtractor<MyType> as the extractor.
77/// VarStreamArray<MyType> MyTypeArray;
78///
79/// // Will use a default-constructed MyExtractor as the extractor.
80/// VarStreamArray<MyType, MyExtractor> MyTypeArray2;
81///
82/// // Will use the specific instance of MyExtractor provided.
83/// // MyExtractor need not be default-constructible in this case.
84/// MyExtractor E(SomeContext);
85/// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
86///
87
88template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
89
90template <typename ValueType,
91 typename Extractor = VarStreamArrayExtractor<ValueType>>
92class VarStreamArray {
93 friend class VarStreamArrayIterator<ValueType, Extractor>;
94
95public:
96 typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
97
98 VarStreamArray() = default;
99
100 explicit VarStreamArray(const Extractor &E) : E(E) {}
101
102 explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
103 : Stream(Stream), Skew(Skew) {}
104
105 VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
106 : Stream(Stream), E(E), Skew(Skew) {}
107
108 Iterator begin(bool *HadError = nullptr) const {
109 return Iterator(*this, E, Skew, nullptr);
110 }
111
112 bool valid() const { return Stream.valid(); }
113
114 bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); }
115
116 uint32_t skew() const { return Skew; }
117 Iterator end() const { return Iterator(E); }
118
119 bool empty() const { return Stream.getLength() == 0; }
120
121 VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
122 uint32_t End) const {
123 assert(Begin >= Skew);
124 // We should never cut off the beginning of the stream since it might be
125 // skewed, meaning the initial bytes are important.
126 BinaryStreamRef NewStream = Stream.slice(Offset: 0, Len: End);
127 return {NewStream, E, Begin};
128 }
129
130 /// given an offset into the array's underlying stream, return an
131 /// iterator to the record at that offset. This is considered unsafe
132 /// since the behavior is undefined if \p Offset does not refer to the
133 /// beginning of a valid record.
134 Iterator at(uint32_t Offset) const {
135 return Iterator(*this, E, Offset, nullptr);
136 }
137
138 const Extractor &getExtractor() const { return E; }
139 Extractor &getExtractor() { return E; }
140
141 BinaryStreamRef getUnderlyingStream() const { return Stream; }
142 void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
143 Stream = NewStream;
144 Skew = NewSkew;
145 }
146
147 void drop_front() { Skew += begin()->length(); }
148
149private:
150 BinaryStreamRef Stream;
151 Extractor E;
152 uint32_t Skew = 0;
153};
154
155template <typename ValueType, typename Extractor>
156class VarStreamArrayIterator
157 : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
158 std::forward_iterator_tag, const ValueType> {
159 typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
160 typedef VarStreamArray<ValueType, Extractor> ArrayType;
161
162public:
163 VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
164 uint32_t Offset, bool *HadError)
165 : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
166 Array(&Array), AbsOffset(Offset), HadError(HadError) {
167 if (IterRef.getLength() == 0)
168 moveToEnd();
169 else {
170 auto EC = Extract(IterRef, ThisLen, ThisValue);
171 if (EC) {
172 consumeError(std::move(EC));
173 markError();
174 }
175 }
176 }
177
178 VarStreamArrayIterator() = default;
179 explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
180 ~VarStreamArrayIterator() = default;
181
182 bool operator==(const IterType &R) const {
183 if (Array && R.Array) {
184 // Both have a valid array, make sure they're same.
185 assert(Array == R.Array);
186 return IterRef == R.IterRef;
187 }
188
189 // Both iterators are at the end.
190 if (!Array && !R.Array)
191 return true;
192
193 // One is not at the end and one is.
194 return false;
195 }
196
197 const ValueType &operator*() const {
198 assert(Array && !HasError);
199 return ThisValue;
200 }
201
202 IterType &operator+=(unsigned N) {
203 for (unsigned I = 0; I < N; ++I) {
204 // We are done with the current record, discard it so that we are
205 // positioned at the next record.
206 AbsOffset += ThisLen;
207 IterRef = IterRef.drop_front(N: ThisLen);
208 if (IterRef.getLength() == 0) {
209 // There is nothing after the current record, we must make this an end
210 // iterator.
211 moveToEnd();
212 } else {
213 // There is some data after the current record.
214 auto EC = Extract(IterRef, ThisLen, ThisValue);
215 if (EC) {
216 consumeError(std::move(EC));
217 markError();
218 } else if (ThisLen == 0) {
219 // An empty record? Make this an end iterator.
220 moveToEnd();
221 }
222 }
223 }
224 return *this;
225 }
226
227 uint32_t offset() const { return AbsOffset; }
228 uint32_t getRecordLength() const { return ThisLen; }
229
230private:
231 void moveToEnd() {
232 Array = nullptr;
233 ThisLen = 0;
234 }
235 void markError() {
236 moveToEnd();
237 HasError = true;
238 if (HadError != nullptr)
239 *HadError = true;
240 }
241
242 ValueType ThisValue;
243 BinaryStreamRef IterRef;
244 Extractor Extract;
245 const ArrayType *Array{nullptr};
246 uint32_t ThisLen{0};
247 uint32_t AbsOffset{0};
248 bool HasError{false};
249 bool *HadError{nullptr};
250};
251
252template <typename T> class FixedStreamArrayIterator;
253
254/// FixedStreamArray is similar to VarStreamArray, except with each record
255/// having a fixed-length. As with VarStreamArray, there is no upfront
256/// cost associated with building or copying a FixedStreamArray, as the
257/// memory for each element is not read from the backing stream until that
258/// element is iterated.
259template <typename T> class FixedStreamArray {
260 friend class FixedStreamArrayIterator<T>;
261
262public:
263 typedef FixedStreamArrayIterator<T> Iterator;
264
265 FixedStreamArray() = default;
266 explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
267 assert(Stream.getLength() % sizeof(T) == 0);
268 }
269
270 bool operator==(const FixedStreamArray<T> &Other) const {
271 return Stream == Other.Stream;
272 }
273
274 bool operator!=(const FixedStreamArray<T> &Other) const {
275 return !(*this == Other);
276 }
277
278 FixedStreamArray(const FixedStreamArray &) = default;
279 FixedStreamArray &operator=(const FixedStreamArray &) = default;
280
281 const T &operator[](uint32_t Index) const {
282 assert(Index < size());
283 uint32_t Off = Index * sizeof(T);
284 ArrayRef<uint8_t> Data;
285 if (auto EC = Stream.readBytes(Offset: Off, Size: sizeof(T), Buffer&: Data)) {
286 assert(false && "Unexpected failure reading from stream");
287 // This should never happen since we asserted that the stream length was
288 // an exact multiple of the element size.
289 consumeError(Err: std::move(EC));
290 }
291 assert(isAddrAligned(Align::Of<T>(), Data.data()));
292 return *reinterpret_cast<const T *>(Data.data());
293 }
294
295 uint32_t size() const { return Stream.getLength() / sizeof(T); }
296
297 bool empty() const { return size() == 0; }
298
299 FixedStreamArrayIterator<T> begin() const {
300 return FixedStreamArrayIterator<T>(*this, 0);
301 }
302
303 FixedStreamArrayIterator<T> end() const {
304 return FixedStreamArrayIterator<T>(*this, size());
305 }
306
307 const T &front() const { return *begin(); }
308 const T &back() const {
309 FixedStreamArrayIterator<T> I = end();
310 return *(--I);
311 }
312
313 BinaryStreamRef getUnderlyingStream() const { return Stream; }
314
315private:
316 BinaryStreamRef Stream;
317};
318
319template <typename T>
320class FixedStreamArrayIterator
321 : public iterator_facade_base<FixedStreamArrayIterator<T>,
322 std::random_access_iterator_tag, const T> {
323
324public:
325 FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
326 : Array(Array), Index(Index) {}
327
328 FixedStreamArrayIterator(const FixedStreamArrayIterator<T> &Other)
329 : Array(Other.Array), Index(Other.Index) {}
330 FixedStreamArrayIterator<T> &
331 operator=(const FixedStreamArrayIterator<T> &Other) {
332 Array = Other.Array;
333 Index = Other.Index;
334 return *this;
335 }
336
337 const T &operator*() const { return Array[Index]; }
338 const T &operator*() { return Array[Index]; }
339
340 bool operator==(const FixedStreamArrayIterator<T> &R) const {
341 assert(Array == R.Array);
342 return (Index == R.Index) && (Array == R.Array);
343 }
344
345 FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
346 Index += N;
347 return *this;
348 }
349
350 FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
351 assert(std::ptrdiff_t(Index) >= N);
352 Index -= N;
353 return *this;
354 }
355
356 std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
357 assert(Array == R.Array);
358 assert(Index >= R.Index);
359 return Index - R.Index;
360 }
361
362 bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
363 assert(Array == RHS.Array);
364 return Index < RHS.Index;
365 }
366
367private:
368 FixedStreamArray<T> Array;
369 uint32_t Index;
370};
371
372} // namespace llvm
373
374#endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H
375