1 | //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Bitstream/BitstreamReader.h" |
10 | #include "llvm/ADT/StringRef.h" |
11 | #include <cassert> |
12 | #include <optional> |
13 | #include <string> |
14 | |
15 | using namespace llvm; |
16 | |
17 | //===----------------------------------------------------------------------===// |
18 | // BitstreamCursor implementation |
19 | //===----------------------------------------------------------------------===// |
20 | // |
21 | static Error error(const char *Message) { |
22 | return createStringError(EC: std::errc::illegal_byte_sequence, Fmt: Message); |
23 | } |
24 | |
25 | /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. |
26 | Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { |
27 | // Save the current block's state on BlockScope. |
28 | BlockScope.push_back(Elt: Block(CurCodeSize)); |
29 | BlockScope.back().PrevAbbrevs.swap(x&: CurAbbrevs); |
30 | |
31 | // Add the abbrevs specific to this block to the CurAbbrevs list. |
32 | if (BlockInfo) { |
33 | if (const BitstreamBlockInfo::BlockInfo *Info = |
34 | BlockInfo->getBlockInfo(BlockID)) { |
35 | llvm::append_range(C&: CurAbbrevs, R: Info->Abbrevs); |
36 | } |
37 | } |
38 | |
39 | // Get the codesize of this block. |
40 | Expected<uint32_t> MaybeVBR = ReadVBR(NumBits: bitc::CodeLenWidth); |
41 | if (!MaybeVBR) |
42 | return MaybeVBR.takeError(); |
43 | CurCodeSize = MaybeVBR.get(); |
44 | |
45 | if (CurCodeSize > MaxChunkSize) |
46 | return llvm::createStringError( |
47 | EC: std::errc::illegal_byte_sequence, |
48 | Fmt: "can't read more than %zu at a time, trying to read %u" , Vals: +MaxChunkSize, |
49 | Vals: CurCodeSize); |
50 | |
51 | SkipToFourByteBoundary(); |
52 | Expected<word_t> MaybeNum = Read(NumBits: bitc::BlockSizeWidth); |
53 | if (!MaybeNum) |
54 | return MaybeNum.takeError(); |
55 | word_t NumWords = MaybeNum.get(); |
56 | if (NumWordsP) |
57 | *NumWordsP = NumWords; |
58 | |
59 | if (CurCodeSize == 0) |
60 | return llvm::createStringError( |
61 | EC: std::errc::illegal_byte_sequence, |
62 | Fmt: "can't enter sub-block: current code size is 0" ); |
63 | if (AtEndOfStream()) |
64 | return llvm::createStringError( |
65 | EC: std::errc::illegal_byte_sequence, |
66 | Fmt: "can't enter sub block: already at end of stream" ); |
67 | |
68 | return Error::success(); |
69 | } |
70 | |
71 | static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, |
72 | const BitCodeAbbrevOp &Op) { |
73 | assert(!Op.isLiteral() && "Not to be used with literals!" ); |
74 | |
75 | // Decode the value as we are commanded. |
76 | switch (Op.getEncoding()) { |
77 | case BitCodeAbbrevOp::Array: |
78 | case BitCodeAbbrevOp::Blob: |
79 | llvm_unreachable("Should not reach here" ); |
80 | case BitCodeAbbrevOp::Fixed: |
81 | assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); |
82 | return Cursor.Read(NumBits: (unsigned)Op.getEncodingData()); |
83 | case BitCodeAbbrevOp::VBR: |
84 | assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); |
85 | return Cursor.ReadVBR64(NumBits: (unsigned)Op.getEncodingData()); |
86 | case BitCodeAbbrevOp::Char6: |
87 | if (Expected<unsigned> Res = Cursor.Read(NumBits: 6)) |
88 | return BitCodeAbbrevOp::DecodeChar6(V: Res.get()); |
89 | else |
90 | return Res.takeError(); |
91 | } |
92 | llvm_unreachable("invalid abbreviation encoding" ); |
93 | } |
94 | |
95 | /// skipRecord - Read the current record and discard it. |
96 | Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { |
97 | // Skip unabbreviated records by reading past their entries. |
98 | if (AbbrevID == bitc::UNABBREV_RECORD) { |
99 | Expected<uint32_t> MaybeCode = ReadVBR(NumBits: 6); |
100 | if (!MaybeCode) |
101 | return MaybeCode.takeError(); |
102 | unsigned Code = MaybeCode.get(); |
103 | Expected<uint32_t> MaybeVBR = ReadVBR(NumBits: 6); |
104 | if (!MaybeVBR) |
105 | return MaybeVBR.takeError(); |
106 | unsigned NumElts = MaybeVBR.get(); |
107 | for (unsigned i = 0; i != NumElts; ++i) |
108 | if (Expected<uint64_t> Res = ReadVBR64(NumBits: 6)) |
109 | ; // Skip! |
110 | else |
111 | return Res.takeError(); |
112 | return Code; |
113 | } |
114 | |
115 | Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); |
116 | if (!MaybeAbbv) |
117 | return MaybeAbbv.takeError(); |
118 | |
119 | const BitCodeAbbrev *Abbv = MaybeAbbv.get(); |
120 | const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(N: 0); |
121 | unsigned Code; |
122 | if (CodeOp.isLiteral()) |
123 | Code = CodeOp.getLiteralValue(); |
124 | else { |
125 | if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || |
126 | CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) |
127 | return llvm::createStringError( |
128 | EC: std::errc::illegal_byte_sequence, |
129 | Fmt: "Abbreviation starts with an Array or a Blob" ); |
130 | Expected<uint64_t> MaybeCode = readAbbreviatedField(Cursor&: *this, Op: CodeOp); |
131 | if (!MaybeCode) |
132 | return MaybeCode.takeError(); |
133 | Code = MaybeCode.get(); |
134 | } |
135 | |
136 | for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { |
137 | const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(N: i); |
138 | if (Op.isLiteral()) |
139 | continue; |
140 | |
141 | if (Op.getEncoding() != BitCodeAbbrevOp::Array && |
142 | Op.getEncoding() != BitCodeAbbrevOp::Blob) { |
143 | if (Expected<uint64_t> MaybeField = readAbbreviatedField(Cursor&: *this, Op)) |
144 | continue; |
145 | else |
146 | return MaybeField.takeError(); |
147 | } |
148 | |
149 | if (Op.getEncoding() == BitCodeAbbrevOp::Array) { |
150 | // Array case. Read the number of elements as a vbr6. |
151 | Expected<uint32_t> MaybeNum = ReadVBR(NumBits: 6); |
152 | if (!MaybeNum) |
153 | return MaybeNum.takeError(); |
154 | unsigned NumElts = MaybeNum.get(); |
155 | |
156 | // Get the element encoding. |
157 | assert(i+2 == e && "array op not second to last?" ); |
158 | const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(N: ++i); |
159 | |
160 | // Read all the elements. |
161 | // Decode the value as we are commanded. |
162 | switch (EltEnc.getEncoding()) { |
163 | default: |
164 | return error(Message: "Array element type can't be an Array or a Blob" ); |
165 | case BitCodeAbbrevOp::Fixed: |
166 | assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); |
167 | if (Error Err = |
168 | JumpToBit(BitNo: GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * |
169 | EltEnc.getEncodingData())) |
170 | return Err; |
171 | break; |
172 | case BitCodeAbbrevOp::VBR: |
173 | assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); |
174 | for (; NumElts; --NumElts) |
175 | if (Expected<uint64_t> Res = |
176 | ReadVBR64(NumBits: (unsigned)EltEnc.getEncodingData())) |
177 | ; // Skip! |
178 | else |
179 | return Res.takeError(); |
180 | break; |
181 | case BitCodeAbbrevOp::Char6: |
182 | if (Error Err = JumpToBit(BitNo: GetCurrentBitNo() + NumElts * 6)) |
183 | return Err; |
184 | break; |
185 | } |
186 | continue; |
187 | } |
188 | |
189 | assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); |
190 | // Blob case. Read the number of bytes as a vbr6. |
191 | Expected<uint32_t> MaybeNum = ReadVBR(NumBits: 6); |
192 | if (!MaybeNum) |
193 | return MaybeNum.takeError(); |
194 | unsigned NumElts = MaybeNum.get(); |
195 | SkipToFourByteBoundary(); // 32-bit alignment |
196 | |
197 | // Figure out where the end of this blob will be including tail padding. |
198 | const size_t NewEnd = GetCurrentBitNo() + alignTo(Value: NumElts, Align: 4) * 8; |
199 | |
200 | // If this would read off the end of the bitcode file, just set the |
201 | // record to empty and return. |
202 | if (!canSkipToPos(pos: NewEnd/8)) { |
203 | skipToEnd(); |
204 | break; |
205 | } |
206 | |
207 | // Skip over the blob. |
208 | if (Error Err = JumpToBit(BitNo: NewEnd)) |
209 | return Err; |
210 | } |
211 | return Code; |
212 | } |
213 | |
214 | Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, |
215 | SmallVectorImpl<uint64_t> &Vals, |
216 | StringRef *Blob) { |
217 | if (AbbrevID == bitc::UNABBREV_RECORD) { |
218 | Expected<uint32_t> MaybeCode = ReadVBR(NumBits: 6); |
219 | if (!MaybeCode) |
220 | return MaybeCode.takeError(); |
221 | uint32_t Code = MaybeCode.get(); |
222 | Expected<uint32_t> MaybeNumElts = ReadVBR(NumBits: 6); |
223 | if (!MaybeNumElts) |
224 | return error( |
225 | Message: ("Failed to read size: " + toString(E: MaybeNumElts.takeError())) |
226 | .c_str()); |
227 | uint32_t NumElts = MaybeNumElts.get(); |
228 | if (!isSizePlausible(Size: NumElts)) |
229 | return error(Message: "Size is not plausible" ); |
230 | Vals.reserve(N: Vals.size() + NumElts); |
231 | |
232 | for (unsigned i = 0; i != NumElts; ++i) |
233 | if (Expected<uint64_t> MaybeVal = ReadVBR64(NumBits: 6)) |
234 | Vals.push_back(Elt: MaybeVal.get()); |
235 | else |
236 | return MaybeVal.takeError(); |
237 | return Code; |
238 | } |
239 | |
240 | Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); |
241 | if (!MaybeAbbv) |
242 | return MaybeAbbv.takeError(); |
243 | const BitCodeAbbrev *Abbv = MaybeAbbv.get(); |
244 | |
245 | // Read the record code first. |
246 | assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?" ); |
247 | const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(N: 0); |
248 | unsigned Code; |
249 | if (CodeOp.isLiteral()) |
250 | Code = CodeOp.getLiteralValue(); |
251 | else { |
252 | if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || |
253 | CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) |
254 | return error(Message: "Abbreviation starts with an Array or a Blob" ); |
255 | if (Expected<uint64_t> MaybeCode = readAbbreviatedField(Cursor&: *this, Op: CodeOp)) |
256 | Code = MaybeCode.get(); |
257 | else |
258 | return MaybeCode.takeError(); |
259 | } |
260 | |
261 | for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { |
262 | const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(N: i); |
263 | if (Op.isLiteral()) { |
264 | Vals.push_back(Elt: Op.getLiteralValue()); |
265 | continue; |
266 | } |
267 | |
268 | if (Op.getEncoding() != BitCodeAbbrevOp::Array && |
269 | Op.getEncoding() != BitCodeAbbrevOp::Blob) { |
270 | if (Expected<uint64_t> MaybeVal = readAbbreviatedField(Cursor&: *this, Op)) |
271 | Vals.push_back(Elt: MaybeVal.get()); |
272 | else |
273 | return MaybeVal.takeError(); |
274 | continue; |
275 | } |
276 | |
277 | if (Op.getEncoding() == BitCodeAbbrevOp::Array) { |
278 | // Array case. Read the number of elements as a vbr6. |
279 | Expected<uint32_t> MaybeNumElts = ReadVBR(NumBits: 6); |
280 | if (!MaybeNumElts) |
281 | return error( |
282 | Message: ("Failed to read size: " + toString(E: MaybeNumElts.takeError())) |
283 | .c_str()); |
284 | uint32_t NumElts = MaybeNumElts.get(); |
285 | if (!isSizePlausible(Size: NumElts)) |
286 | return error(Message: "Size is not plausible" ); |
287 | Vals.reserve(N: Vals.size() + NumElts); |
288 | |
289 | // Get the element encoding. |
290 | if (i + 2 != e) |
291 | return error(Message: "Array op not second to last" ); |
292 | const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(N: ++i); |
293 | if (!EltEnc.isEncoding()) |
294 | return error( |
295 | Message: "Array element type has to be an encoding of a type" ); |
296 | |
297 | // Read all the elements. |
298 | switch (EltEnc.getEncoding()) { |
299 | default: |
300 | return error(Message: "Array element type can't be an Array or a Blob" ); |
301 | case BitCodeAbbrevOp::Fixed: |
302 | for (; NumElts; --NumElts) |
303 | if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = |
304 | Read(NumBits: (unsigned)EltEnc.getEncodingData())) |
305 | Vals.push_back(Elt: MaybeVal.get()); |
306 | else |
307 | return MaybeVal.takeError(); |
308 | break; |
309 | case BitCodeAbbrevOp::VBR: |
310 | for (; NumElts; --NumElts) |
311 | if (Expected<uint64_t> MaybeVal = |
312 | ReadVBR64(NumBits: (unsigned)EltEnc.getEncodingData())) |
313 | Vals.push_back(Elt: MaybeVal.get()); |
314 | else |
315 | return MaybeVal.takeError(); |
316 | break; |
317 | case BitCodeAbbrevOp::Char6: |
318 | for (; NumElts; --NumElts) |
319 | if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(NumBits: 6)) |
320 | Vals.push_back(Elt: BitCodeAbbrevOp::DecodeChar6(V: MaybeVal.get())); |
321 | else |
322 | return MaybeVal.takeError(); |
323 | } |
324 | continue; |
325 | } |
326 | |
327 | assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); |
328 | // Blob case. Read the number of bytes as a vbr6. |
329 | Expected<uint32_t> MaybeNumElts = ReadVBR(NumBits: 6); |
330 | if (!MaybeNumElts) |
331 | return MaybeNumElts.takeError(); |
332 | uint32_t NumElts = MaybeNumElts.get(); |
333 | SkipToFourByteBoundary(); // 32-bit alignment |
334 | |
335 | // Figure out where the end of this blob will be including tail padding. |
336 | size_t CurBitPos = GetCurrentBitNo(); |
337 | const size_t NewEnd = CurBitPos + alignTo(Value: NumElts, Align: 4) * 8; |
338 | |
339 | // Make sure the bitstream is large enough to contain the blob. |
340 | if (!canSkipToPos(pos: NewEnd/8)) |
341 | return error(Message: "Blob ends too soon" ); |
342 | |
343 | // Otherwise, inform the streamer that we need these bytes in memory. Skip |
344 | // over tail padding first, in case jumping to NewEnd invalidates the Blob |
345 | // pointer. |
346 | if (Error Err = JumpToBit(BitNo: NewEnd)) |
347 | return Err; |
348 | const char *Ptr = (const char *)getPointerToBit(BitNo: CurBitPos, NumBytes: NumElts); |
349 | |
350 | // If we can return a reference to the data, do so to avoid copying it. |
351 | if (Blob) { |
352 | *Blob = StringRef(Ptr, NumElts); |
353 | } else { |
354 | // Otherwise, unpack into Vals with zero extension. |
355 | auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); |
356 | Vals.append(in_start: UPtr, in_end: UPtr + NumElts); |
357 | } |
358 | } |
359 | |
360 | return Code; |
361 | } |
362 | |
363 | Error BitstreamCursor::ReadAbbrevRecord() { |
364 | auto Abbv = std::make_shared<BitCodeAbbrev>(); |
365 | Expected<uint32_t> MaybeNumOpInfo = ReadVBR(NumBits: 5); |
366 | if (!MaybeNumOpInfo) |
367 | return MaybeNumOpInfo.takeError(); |
368 | unsigned NumOpInfo = MaybeNumOpInfo.get(); |
369 | for (unsigned i = 0; i != NumOpInfo; ++i) { |
370 | Expected<word_t> MaybeIsLiteral = Read(NumBits: 1); |
371 | if (!MaybeIsLiteral) |
372 | return MaybeIsLiteral.takeError(); |
373 | bool IsLiteral = MaybeIsLiteral.get(); |
374 | if (IsLiteral) { |
375 | Expected<uint64_t> MaybeOp = ReadVBR64(NumBits: 8); |
376 | if (!MaybeOp) |
377 | return MaybeOp.takeError(); |
378 | Abbv->Add(OpInfo: BitCodeAbbrevOp(MaybeOp.get())); |
379 | continue; |
380 | } |
381 | |
382 | Expected<word_t> MaybeEncoding = Read(NumBits: 3); |
383 | if (!MaybeEncoding) |
384 | return MaybeEncoding.takeError(); |
385 | if (!BitCodeAbbrevOp::isValidEncoding(E: MaybeEncoding.get())) |
386 | return error(Message: "Invalid encoding" ); |
387 | |
388 | BitCodeAbbrevOp::Encoding E = |
389 | (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); |
390 | if (BitCodeAbbrevOp::hasEncodingData(E)) { |
391 | Expected<uint64_t> MaybeData = ReadVBR64(NumBits: 5); |
392 | if (!MaybeData) |
393 | return MaybeData.takeError(); |
394 | uint64_t Data = MaybeData.get(); |
395 | |
396 | // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) |
397 | // and vbr(0) as a literal zero. This is decoded the same way, and avoids |
398 | // a slow path in Read() to have to handle reading zero bits. |
399 | if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && |
400 | Data == 0) { |
401 | Abbv->Add(OpInfo: BitCodeAbbrevOp(0)); |
402 | continue; |
403 | } |
404 | |
405 | if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && |
406 | Data > MaxChunkSize) |
407 | return error(Message: "Fixed or VBR abbrev record with size > MaxChunkData" ); |
408 | |
409 | Abbv->Add(OpInfo: BitCodeAbbrevOp(E, Data)); |
410 | } else |
411 | Abbv->Add(OpInfo: BitCodeAbbrevOp(E)); |
412 | } |
413 | |
414 | if (Abbv->getNumOperandInfos() == 0) |
415 | return error(Message: "Abbrev record with no operands" ); |
416 | CurAbbrevs.push_back(x: std::move(Abbv)); |
417 | |
418 | return Error::success(); |
419 | } |
420 | |
421 | Expected<std::optional<BitstreamBlockInfo>> |
422 | BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { |
423 | if (llvm::Error Err = EnterSubBlock(BlockID: bitc::BLOCKINFO_BLOCK_ID)) |
424 | return Err; |
425 | |
426 | BitstreamBlockInfo NewBlockInfo; |
427 | |
428 | SmallVector<uint64_t, 64> Record; |
429 | BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; |
430 | |
431 | // Read all the records for this module. |
432 | while (true) { |
433 | Expected<BitstreamEntry> MaybeEntry = |
434 | advanceSkippingSubblocks(Flags: AF_DontAutoprocessAbbrevs); |
435 | if (!MaybeEntry) |
436 | return MaybeEntry.takeError(); |
437 | BitstreamEntry Entry = MaybeEntry.get(); |
438 | |
439 | switch (Entry.Kind) { |
440 | case llvm::BitstreamEntry::SubBlock: // Handled for us already. |
441 | case llvm::BitstreamEntry::Error: |
442 | return std::nullopt; |
443 | case llvm::BitstreamEntry::EndBlock: |
444 | return std::move(NewBlockInfo); |
445 | case llvm::BitstreamEntry::Record: |
446 | // The interesting case. |
447 | break; |
448 | } |
449 | |
450 | // Read abbrev records, associate them with CurBID. |
451 | if (Entry.ID == bitc::DEFINE_ABBREV) { |
452 | if (!CurBlockInfo) |
453 | return std::nullopt; |
454 | if (Error Err = ReadAbbrevRecord()) |
455 | return Err; |
456 | |
457 | // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the |
458 | // appropriate BlockInfo. |
459 | CurBlockInfo->Abbrevs.push_back(x: std::move(CurAbbrevs.back())); |
460 | CurAbbrevs.pop_back(); |
461 | continue; |
462 | } |
463 | |
464 | // Read a record. |
465 | Record.clear(); |
466 | Expected<unsigned> MaybeBlockInfo = readRecord(AbbrevID: Entry.ID, Vals&: Record); |
467 | if (!MaybeBlockInfo) |
468 | return MaybeBlockInfo.takeError(); |
469 | switch (MaybeBlockInfo.get()) { |
470 | default: |
471 | break; // Default behavior, ignore unknown content. |
472 | case bitc::BLOCKINFO_CODE_SETBID: |
473 | if (Record.size() < 1) |
474 | return std::nullopt; |
475 | CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo(BlockID: (unsigned)Record[0]); |
476 | break; |
477 | case bitc::BLOCKINFO_CODE_BLOCKNAME: { |
478 | if (!CurBlockInfo) |
479 | return std::nullopt; |
480 | if (!ReadBlockInfoNames) |
481 | break; // Ignore name. |
482 | CurBlockInfo->Name = std::string(Record.begin(), Record.end()); |
483 | break; |
484 | } |
485 | case bitc::BLOCKINFO_CODE_SETRECORDNAME: { |
486 | if (!CurBlockInfo) |
487 | return std::nullopt; |
488 | if (!ReadBlockInfoNames) |
489 | break; // Ignore name. |
490 | CurBlockInfo->RecordNames.emplace_back( |
491 | args: (unsigned)Record[0], args: std::string(Record.begin() + 1, Record.end())); |
492 | break; |
493 | } |
494 | } |
495 | } |
496 | } |
497 | |