1//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/DebugInfo/MSF/MSFCommon.h"
12#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
13#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
14#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
15#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
16#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
17#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
18#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
19#include "llvm/DebugInfo/PDB/Native/RawError.h"
20#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
21#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
22#include "llvm/Support/BinaryStream.h"
23#include "llvm/Support/BinaryStreamArray.h"
24#include "llvm/Support/BinaryStreamReader.h"
25#include "llvm/Support/Endian.h"
26#include "llvm/Support/Error.h"
27#include "llvm/Support/Path.h"
28#include <algorithm>
29#include <cassert>
30#include <cstdint>
31
32using namespace llvm;
33using namespace llvm::codeview;
34using namespace llvm::msf;
35using namespace llvm::pdb;
36
37namespace {
38typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
39} // end anonymous namespace
40
41PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
42 BumpPtrAllocator &Allocator)
43 : FilePath(std::string(Path)), Allocator(Allocator),
44 Buffer(std::move(PdbFileBuffer)) {}
45
46PDBFile::~PDBFile() = default;
47
48StringRef PDBFile::getFilePath() const { return FilePath; }
49
50StringRef PDBFile::getFileDirectory() const {
51 return sys::path::parent_path(path: FilePath);
52}
53
54uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55
56uint32_t PDBFile::getFreeBlockMapBlock() const {
57 return ContainerLayout.SB->FreeBlockMapBlock;
58}
59
60uint32_t PDBFile::getBlockCount() const {
61 return ContainerLayout.SB->NumBlocks;
62}
63
64uint32_t PDBFile::getNumDirectoryBytes() const {
65 return ContainerLayout.SB->NumDirectoryBytes;
66}
67
68uint32_t PDBFile::getBlockMapIndex() const {
69 return ContainerLayout.SB->BlockMapAddr;
70}
71
72uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73
74uint32_t PDBFile::getNumDirectoryBlocks() const {
75 return msf::bytesToBlocks(NumBytes: ContainerLayout.SB->NumDirectoryBytes,
76 BlockSize: ContainerLayout.SB->BlockSize);
77}
78
79uint64_t PDBFile::getBlockMapOffset() const {
80 return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81 ContainerLayout.SB->BlockSize;
82}
83
84uint32_t PDBFile::getNumStreams() const {
85 return ContainerLayout.StreamSizes.size();
86}
87
88uint32_t PDBFile::getMaxStreamSize() const {
89 return *llvm::max_element(Range: ContainerLayout.StreamSizes);
90}
91
92uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
93 return ContainerLayout.StreamSizes[StreamIndex];
94}
95
96ArrayRef<support::ulittle32_t>
97PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
98 return ContainerLayout.StreamMap[StreamIndex];
99}
100
101uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); }
102
103Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
104 uint32_t NumBytes) const {
105 uint64_t StreamBlockOffset = msf::blockToOffset(BlockNumber: BlockIndex, BlockSize: getBlockSize());
106
107 ArrayRef<uint8_t> Result;
108 if (auto EC = Buffer->readBytes(Offset: StreamBlockOffset, Size: NumBytes, Buffer&: Result))
109 return std::move(EC);
110 return Result;
111}
112
113Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
114 ArrayRef<uint8_t> Data) const {
115 return make_error<RawError>(Args: raw_error_code::not_writable,
116 Args: "PDBFile is immutable");
117}
118
119Error PDBFile::parseFileHeaders() {
120 BinaryStreamReader Reader(*Buffer);
121
122 // Initialize SB.
123 const msf::SuperBlock *SB = nullptr;
124 if (auto EC = Reader.readObject(Dest&: SB)) {
125 consumeError(Err: std::move(EC));
126 return make_error<RawError>(Args: raw_error_code::corrupt_file,
127 Args: "MSF superblock is missing");
128 }
129
130 if (auto EC = msf::validateSuperBlock(SB: *SB))
131 return EC;
132
133 if (Buffer->getLength() % SB->BlockSize != 0)
134 return make_error<RawError>(Args: raw_error_code::corrupt_file,
135 Args: "File size is not a multiple of block size");
136 ContainerLayout.SB = SB;
137
138 // Initialize Free Page Map.
139 ContainerLayout.FreePageMap.resize(N: SB->NumBlocks);
140 // The Fpm exists either at block 1 or block 2 of the MSF. However, this
141 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
142 // thusly an equal number of total blocks in the file. For a block size
143 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
144 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
145 // the Fpm is split across the file at `getBlockSize()` intervals. As a
146 // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
147 // for any non-negative integer k is an Fpm block. In theory, we only really
148 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
149 // current versions of the MSF format already expect the Fpm to be arranged
150 // at getBlockSize() intervals, so we have to be compatible.
151 // See the function fpmPn() for more information:
152 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
153 auto FpmStream =
154 MappedBlockStream::createFpmStream(Layout: ContainerLayout, MsfData: *Buffer, Allocator);
155 BinaryStreamReader FpmReader(*FpmStream);
156 ArrayRef<uint8_t> FpmBytes;
157 if (auto EC = FpmReader.readBytes(Buffer&: FpmBytes, Size: FpmReader.bytesRemaining()))
158 return EC;
159 uint32_t BlocksRemaining = getBlockCount();
160 uint32_t BI = 0;
161 for (auto Byte : FpmBytes) {
162 uint32_t BlocksThisByte = std::min(a: BlocksRemaining, b: 8U);
163 for (uint32_t I = 0; I < BlocksThisByte; ++I) {
164 if (Byte & (1 << I))
165 ContainerLayout.FreePageMap[BI] = true;
166 --BlocksRemaining;
167 ++BI;
168 }
169 }
170
171 Reader.setOffset(getBlockMapOffset());
172 if (auto EC = Reader.readArray(Array&: ContainerLayout.DirectoryBlocks,
173 NumElements: getNumDirectoryBlocks()))
174 return EC;
175
176 return Error::success();
177}
178
179Error PDBFile::parseStreamData() {
180 assert(ContainerLayout.SB);
181 if (DirectoryStream)
182 return Error::success();
183
184 uint32_t NumStreams = 0;
185
186 // Normally you can't use a MappedBlockStream without having fully parsed the
187 // PDB file, because it accesses the directory and various other things, which
188 // is exactly what we are attempting to parse. By specifying a custom
189 // subclass of IPDBStreamData which only accesses the fields that have already
190 // been parsed, we can avoid this and reuse MappedBlockStream.
191 auto DS = MappedBlockStream::createDirectoryStream(Layout: ContainerLayout, MsfData: *Buffer,
192 Allocator);
193 BinaryStreamReader Reader(*DS);
194 if (auto EC = Reader.readInteger(Dest&: NumStreams))
195 return EC;
196
197 if (auto EC = Reader.readArray(Array&: ContainerLayout.StreamSizes, NumElements: NumStreams))
198 return EC;
199 for (uint32_t I = 0; I < NumStreams; ++I) {
200 uint32_t StreamSize = getStreamByteSize(StreamIndex: I);
201 // FIXME: What does StreamSize ~0U mean?
202 uint64_t NumExpectedStreamBlocks =
203 StreamSize == UINT32_MAX
204 ? 0
205 : msf::bytesToBlocks(NumBytes: StreamSize, BlockSize: ContainerLayout.SB->BlockSize);
206
207 // For convenience, we store the block array contiguously. This is because
208 // if someone calls setStreamMap(), it is more convenient to be able to call
209 // it with an ArrayRef instead of setting up a StreamRef. Since the
210 // DirectoryStream is cached in the class and thus lives for the life of the
211 // class, we can be guaranteed that readArray() will return a stable
212 // reference, even if it has to allocate from its internal pool.
213 ArrayRef<support::ulittle32_t> Blocks;
214 if (auto EC = Reader.readArray(Array&: Blocks, NumElements: NumExpectedStreamBlocks))
215 return EC;
216 for (uint32_t Block : Blocks) {
217 uint64_t BlockEndOffset =
218 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
219 if (BlockEndOffset > getFileSize())
220 return make_error<RawError>(Args: raw_error_code::corrupt_file,
221 Args: "Stream block map is corrupt.");
222 }
223 ContainerLayout.StreamMap.push_back(x: Blocks);
224 }
225
226 // We should have read exactly SB->NumDirectoryBytes bytes.
227 assert(Reader.bytesRemaining() == 0);
228 DirectoryStream = std::move(DS);
229 return Error::success();
230}
231
232ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
233 return ContainerLayout.DirectoryBlocks;
234}
235
236std::unique_ptr<MappedBlockStream>
237PDBFile::createIndexedStream(uint16_t SN) const {
238 if (SN == kInvalidStreamIndex)
239 return nullptr;
240 return MappedBlockStream::createIndexedStream(Layout: ContainerLayout, MsfData: *Buffer, StreamIndex: SN,
241 Allocator);
242}
243
244MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
245 MSFStreamLayout Result;
246 auto Blocks = getStreamBlockList(StreamIndex: StreamIdx);
247 Result.Blocks.assign(first: Blocks.begin(), last: Blocks.end());
248 Result.Length = getStreamByteSize(StreamIndex: StreamIdx);
249 return Result;
250}
251
252msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
253 return msf::getFpmStreamLayout(Msf: ContainerLayout);
254}
255
256Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
257 if (!Globals) {
258 auto DbiS = getPDBDbiStream();
259 if (!DbiS)
260 return DbiS.takeError();
261
262 auto GlobalS =
263 safelyCreateIndexedStream(StreamIndex: DbiS->getGlobalSymbolStreamIndex());
264 if (!GlobalS)
265 return GlobalS.takeError();
266 auto TempGlobals = std::make_unique<GlobalsStream>(args: std::move(*GlobalS));
267 if (auto EC = TempGlobals->reload())
268 return std::move(EC);
269 Globals = std::move(TempGlobals);
270 }
271 return *Globals;
272}
273
274Expected<InfoStream &> PDBFile::getPDBInfoStream() {
275 if (!Info) {
276 auto InfoS = safelyCreateIndexedStream(StreamIndex: StreamPDB);
277 if (!InfoS)
278 return InfoS.takeError();
279 auto TempInfo = std::make_unique<InfoStream>(args: std::move(*InfoS));
280 if (auto EC = TempInfo->reload())
281 return std::move(EC);
282 Info = std::move(TempInfo);
283 }
284 return *Info;
285}
286
287Expected<DbiStream &> PDBFile::getPDBDbiStream() {
288 if (!Dbi) {
289 auto DbiS = safelyCreateIndexedStream(StreamIndex: StreamDBI);
290 if (!DbiS)
291 return DbiS.takeError();
292 auto TempDbi = std::make_unique<DbiStream>(args: std::move(*DbiS));
293 if (auto EC = TempDbi->reload(Pdb: this))
294 return std::move(EC);
295 Dbi = std::move(TempDbi);
296 }
297 return *Dbi;
298}
299
300Expected<TpiStream &> PDBFile::getPDBTpiStream() {
301 if (!Tpi) {
302 auto TpiS = safelyCreateIndexedStream(StreamIndex: StreamTPI);
303 if (!TpiS)
304 return TpiS.takeError();
305 auto TempTpi = std::make_unique<TpiStream>(args&: *this, args: std::move(*TpiS));
306 if (auto EC = TempTpi->reload())
307 return std::move(EC);
308 Tpi = std::move(TempTpi);
309 }
310 return *Tpi;
311}
312
313Expected<TpiStream &> PDBFile::getPDBIpiStream() {
314 if (!Ipi) {
315 if (!hasPDBIpiStream())
316 return make_error<RawError>(Args: raw_error_code::no_stream);
317
318 auto IpiS = safelyCreateIndexedStream(StreamIndex: StreamIPI);
319 if (!IpiS)
320 return IpiS.takeError();
321 auto TempIpi = std::make_unique<TpiStream>(args&: *this, args: std::move(*IpiS));
322 if (auto EC = TempIpi->reload())
323 return std::move(EC);
324 Ipi = std::move(TempIpi);
325 }
326 return *Ipi;
327}
328
329Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
330 if (!Publics) {
331 auto DbiS = getPDBDbiStream();
332 if (!DbiS)
333 return DbiS.takeError();
334
335 auto PublicS =
336 safelyCreateIndexedStream(StreamIndex: DbiS->getPublicSymbolStreamIndex());
337 if (!PublicS)
338 return PublicS.takeError();
339 auto TempPublics = std::make_unique<PublicsStream>(args: std::move(*PublicS));
340 if (auto EC = TempPublics->reload())
341 return std::move(EC);
342 Publics = std::move(TempPublics);
343 }
344 return *Publics;
345}
346
347Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
348 if (!Symbols) {
349 auto DbiS = getPDBDbiStream();
350 if (!DbiS)
351 return DbiS.takeError();
352
353 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
354 auto SymbolS = safelyCreateIndexedStream(StreamIndex: SymbolStreamNum);
355 if (!SymbolS)
356 return SymbolS.takeError();
357
358 auto TempSymbols = std::make_unique<SymbolStream>(args: std::move(*SymbolS));
359 if (auto EC = TempSymbols->reload())
360 return std::move(EC);
361 Symbols = std::move(TempSymbols);
362 }
363 return *Symbols;
364}
365
366Expected<PDBStringTable &> PDBFile::getStringTable() {
367 if (!Strings) {
368 auto NS = safelyCreateNamedStream(Name: "/names");
369 if (!NS)
370 return NS.takeError();
371
372 auto N = std::make_unique<PDBStringTable>();
373 BinaryStreamReader Reader(**NS);
374 if (auto EC = N->reload(Reader))
375 return std::move(EC);
376 assert(Reader.bytesRemaining() == 0);
377 StringTableStream = std::move(*NS);
378 Strings = std::move(N);
379 }
380 return *Strings;
381}
382
383Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
384 if (!InjectedSources) {
385 auto IJS = safelyCreateNamedStream(Name: "/src/headerblock");
386 if (!IJS)
387 return IJS.takeError();
388
389 auto Strings = getStringTable();
390 if (!Strings)
391 return Strings.takeError();
392
393 auto IJ = std::make_unique<InjectedSourceStream>(args: std::move(*IJS));
394 if (auto EC = IJ->reload(Strings: *Strings))
395 return std::move(EC);
396 InjectedSources = std::move(IJ);
397 }
398 return *InjectedSources;
399}
400
401uint32_t PDBFile::getPointerSize() {
402 auto DbiS = getPDBDbiStream();
403 if (!DbiS)
404 return 0;
405 PDB_Machine Machine = DbiS->getMachineType();
406 if (Machine == PDB_Machine::Amd64)
407 return 8;
408 return 4;
409}
410
411bool PDBFile::hasPDBDbiStream() const {
412 return StreamDBI < getNumStreams() && getStreamByteSize(StreamIndex: StreamDBI) > 0;
413}
414
415bool PDBFile::hasPDBGlobalsStream() {
416 auto DbiS = getPDBDbiStream();
417 if (!DbiS) {
418 consumeError(Err: DbiS.takeError());
419 return false;
420 }
421
422 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
423}
424
425bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
426
427bool PDBFile::hasPDBIpiStream() const {
428 if (!hasPDBInfoStream())
429 return false;
430
431 if (StreamIPI >= getNumStreams())
432 return false;
433
434 auto &InfoStream = cantFail(ValOrErr: const_cast<PDBFile *>(this)->getPDBInfoStream());
435 return InfoStream.containsIdStream();
436}
437
438bool PDBFile::hasPDBPublicsStream() {
439 auto DbiS = getPDBDbiStream();
440 if (!DbiS) {
441 consumeError(Err: DbiS.takeError());
442 return false;
443 }
444 return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
445}
446
447bool PDBFile::hasPDBSymbolStream() {
448 auto DbiS = getPDBDbiStream();
449 if (!DbiS)
450 return false;
451 return DbiS->getSymRecordStreamIndex() < getNumStreams();
452}
453
454bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
455
456bool PDBFile::hasPDBStringTable() {
457 auto IS = getPDBInfoStream();
458 if (!IS)
459 return false;
460 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name: "/names");
461 if (!ExpectedNSI) {
462 consumeError(Err: ExpectedNSI.takeError());
463 return false;
464 }
465 assert(*ExpectedNSI < getNumStreams());
466 return true;
467}
468
469bool PDBFile::hasPDBInjectedSourceStream() {
470 auto IS = getPDBInfoStream();
471 if (!IS)
472 return false;
473 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name: "/src/headerblock");
474 if (!ExpectedNSI) {
475 consumeError(Err: ExpectedNSI.takeError());
476 return false;
477 }
478 assert(*ExpectedNSI < getNumStreams());
479 return true;
480}
481
482/// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
483/// stream with that index actually exists. If it does not, the return value
484/// will have an MSFError with code msf_error_code::no_stream. Else, the return
485/// value will contain the stream returned by createIndexedStream().
486Expected<std::unique_ptr<MappedBlockStream>>
487PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
488 if (StreamIndex >= getNumStreams())
489 // This rejects kInvalidStreamIndex with an error as well.
490 return make_error<RawError>(Args: raw_error_code::no_stream);
491 return createIndexedStream(SN: StreamIndex);
492}
493
494Expected<std::unique_ptr<MappedBlockStream>>
495PDBFile::safelyCreateNamedStream(StringRef Name) {
496 auto IS = getPDBInfoStream();
497 if (!IS)
498 return IS.takeError();
499
500 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
501 if (!ExpectedNSI)
502 return ExpectedNSI.takeError();
503 uint32_t NameStreamIndex = *ExpectedNSI;
504
505 return safelyCreateIndexedStream(StreamIndex: NameStreamIndex);
506}
507