1//===-- DataExtractor.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Support/DataExtractor.h"
10#include "llvm/ADT/StringExtras.h"
11#include "llvm/Support/Errc.h"
12#include "llvm/Support/ErrorHandling.h"
13#include "llvm/Support/LEB128.h"
14#include "llvm/Support/SwapByteOrder.h"
15
16using namespace llvm;
17
18bool DataExtractor::prepareRead(uint64_t Offset, uint64_t Size,
19 Error *E) const {
20 if (isValidOffsetForDataOfSize(offset: Offset, length: Size))
21 return true;
22 if (E) {
23 if (Offset <= Data.size())
24 *E = createStringError(
25 EC: errc::illegal_byte_sequence,
26 Fmt: "unexpected end of data at offset 0x%zx while reading [0x%" PRIx64
27 ", 0x%" PRIx64 ")",
28 Vals: Data.size(), Vals: Offset, Vals: Offset + Size);
29 else
30 *E = createStringError(EC: errc::invalid_argument,
31 Fmt: "offset 0x%" PRIx64
32 " is beyond the end of data at 0x%zx",
33 Vals: Offset, Vals: Data.size());
34 }
35 return false;
36}
37
38static bool isError(Error *E) { return E && *E; }
39
40template <typename T>
41T DataExtractor::getU(uint64_t *offset_ptr, Error *Err) const {
42 ErrorAsOutParameter ErrAsOut(Err);
43 T val = 0;
44 if (isError(E: Err))
45 return val;
46
47 uint64_t offset = *offset_ptr;
48 if (!prepareRead(Offset: offset, Size: sizeof(T), E: Err))
49 return val;
50 std::memcpy(dest: &val, src: &Data.data()[offset], n: sizeof(val));
51 if (sys::IsLittleEndianHost != IsLittleEndian)
52 sys::swapByteOrder(val);
53
54 // Advance the offset
55 *offset_ptr += sizeof(val);
56 return val;
57}
58
59template <typename T>
60T *DataExtractor::getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
61 Error *Err) const {
62 ErrorAsOutParameter ErrAsOut(Err);
63 if (isError(E: Err))
64 return nullptr;
65
66 uint64_t offset = *offset_ptr;
67
68 if (!prepareRead(Offset: offset, Size: sizeof(*dst) * count, E: Err))
69 return nullptr;
70 for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
71 ++value_ptr, offset += sizeof(*dst))
72 *value_ptr = getU<T>(offset_ptr, Err);
73 // Advance the offset
74 *offset_ptr = offset;
75 // Return a non-NULL pointer to the converted data as an indicator of
76 // success
77 return dst;
78}
79
80uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
81 return getU<uint8_t>(offset_ptr, Err);
82}
83
84uint8_t *DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst,
85 uint32_t count) const {
86 return getUs<uint8_t>(offset_ptr, dst, count, Err: nullptr);
87}
88
89uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
90 return getUs<uint8_t>(offset_ptr: &C.Offset, dst: Dst, count: Count, Err: &C.Err);
91}
92
93uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
94 return getU<uint16_t>(offset_ptr, Err);
95}
96
97uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
98 uint32_t count) const {
99 return getUs<uint16_t>(offset_ptr, dst, count, Err: nullptr);
100}
101
102uint32_t DataExtractor::getU24(uint64_t *OffsetPtr, Error *Err) const {
103 uint24_t ExtractedVal = getU<uint24_t>(offset_ptr: OffsetPtr, Err);
104 // The 3 bytes are in the correct byte order for the host.
105 return ExtractedVal.getAsUint32(IsLittleEndian: sys::IsLittleEndianHost);
106}
107
108uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
109 return getU<uint32_t>(offset_ptr, Err);
110}
111
112uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
113 uint32_t count) const {
114 return getUs<uint32_t>(offset_ptr, dst, count, Err: nullptr);
115}
116
117uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
118 return getU<uint64_t>(offset_ptr, Err);
119}
120
121uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
122 uint32_t count) const {
123 return getUs<uint64_t>(offset_ptr, dst, count, Err: nullptr);
124}
125
126uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
127 llvm::Error *Err) const {
128 switch (byte_size) {
129 case 1:
130 return getU8(offset_ptr, Err);
131 case 2:
132 return getU16(offset_ptr, Err);
133 case 4:
134 return getU32(offset_ptr, Err);
135 case 8:
136 return getU64(offset_ptr, Err);
137 }
138
139 // For any other byte size, read the bytes and swap/shift if necessary.
140 ErrorAsOutParameter ErrAsOut(Err);
141 uint64_t val = 0;
142 if (isError(E: Err))
143 return val;
144 uint64_t offset = *offset_ptr;
145 if (!prepareRead(Offset: offset, Size: byte_size, E: Err))
146 return val;
147 // Copy into the least significant bytes of val regardless of host
148 // endianness.
149 std::memcpy(dest: reinterpret_cast<char *>(&val) +
150 (sys::IsLittleEndianHost ? 0 : 8 - byte_size),
151 src: &Data.data()[offset], n: byte_size);
152 // Swap the least significant bytes of val if endianness doesn't match.
153 if (sys::IsLittleEndianHost != IsLittleEndian)
154 val = sys::getSwappedBytes(C: val) >> (8 * (8 - byte_size));
155
156 *offset_ptr += byte_size;
157 return val;
158}
159
160int64_t
161DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
162 switch (byte_size) {
163 case 1:
164 return getS8(OffsetPtr: offset_ptr);
165 case 2:
166 return getS16(OffsetPtr: offset_ptr);
167 case 4:
168 return getS32(OffsetPtr: offset_ptr);
169 case 8:
170 return getS64(OffsetPtr: offset_ptr);
171 }
172 llvm_unreachable("getSigned unhandled case!");
173}
174
175StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const {
176 ErrorAsOutParameter ErrAsOut(Err);
177 if (isError(E: Err))
178 return StringRef();
179
180 uint64_t Start = *OffsetPtr;
181 StringRef::size_type Pos = Data.find(C: '\0', From: Start);
182 if (Pos != StringRef::npos) {
183 *OffsetPtr = Pos + 1;
184 return StringRef(Data.data() + Start, Pos - Start);
185 }
186 if (Err)
187 *Err = createStringError(EC: errc::illegal_byte_sequence,
188 Fmt: "no null terminated string at offset 0x%" PRIx64,
189 Vals: Start);
190 return StringRef();
191}
192
193StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
194 uint64_t Length,
195 StringRef TrimChars) const {
196 StringRef Bytes(getBytes(OffsetPtr, Length));
197 return Bytes.trim(Chars: TrimChars);
198}
199
200StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length,
201 Error *Err) const {
202 ErrorAsOutParameter ErrAsOut(Err);
203 if (isError(E: Err))
204 return StringRef();
205
206 if (!prepareRead(Offset: *OffsetPtr, Size: Length, E: Err))
207 return StringRef();
208
209 StringRef Result = Data.substr(Start: *OffsetPtr, N: Length);
210 *OffsetPtr += Length;
211 return Result;
212}
213
214template <typename T>
215static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
216 T (&Decoder)(const uint8_t *p, unsigned *n,
217 const uint8_t *end, const char **error)) {
218 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Input: Data);
219 assert(*OffsetPtr <= Bytes.size());
220 ErrorAsOutParameter ErrAsOut(Err);
221 if (isError(E: Err))
222 return T();
223
224 const char *error = nullptr;
225 unsigned bytes_read;
226 T result =
227 Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
228 if (error) {
229 if (Err)
230 *Err = createStringError(EC: errc::illegal_byte_sequence,
231 Fmt: "unable to decode LEB128 at offset 0x%8.8" PRIx64
232 ": %s",
233 Vals: *OffsetPtr, Vals: error);
234 return T();
235 }
236 *OffsetPtr += bytes_read;
237 return result;
238}
239
240uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, Error *Err) const {
241 return getLEB128(Data, OffsetPtr: offset_ptr, Err, Decoder&: decodeULEB128);
242}
243
244int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr, Error *Err) const {
245 return getLEB128(Data, OffsetPtr: offset_ptr, Err, Decoder&: decodeSLEB128);
246}
247
248void DataExtractor::skip(Cursor &C, uint64_t Length) const {
249 ErrorAsOutParameter ErrAsOut(C.Err);
250 if (isError(E: &C.Err))
251 return;
252
253 if (prepareRead(Offset: C.Offset, Size: Length, E: &C.Err))
254 C.Offset += Length;
255}
256