1//===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ObjectYAML/MinidumpYAML.h"
10#include "llvm/ObjectYAML/yaml2obj.h"
11#include "llvm/Support/ConvertUTF.h"
12#include "llvm/Support/raw_ostream.h"
13#include <optional>
14
15using namespace llvm;
16using namespace llvm::minidump;
17using namespace llvm::MinidumpYAML;
18
19namespace {
20/// A helper class to manage the placement of various structures into the final
21/// minidump binary. Space for objects can be allocated via various allocate***
22/// methods, while the final minidump file is written by calling the writeTo
23/// method. The plain versions of allocation functions take a reference to the
24/// data which is to be written (and hence the data must be available until
25/// writeTo is called), while the "New" versions allocate the data in an
26/// allocator-managed buffer, which is available until the allocator object is
27/// destroyed. For both kinds of functions, it is possible to modify the
28/// data for which the space has been "allocated" until the final writeTo call.
29/// This is useful for "linking" the allocated structures via their offsets.
30class BlobAllocator {
31public:
32 size_t tell() const { return NextOffset; }
33
34 size_t allocateCallback(size_t Size,
35 std::function<void(raw_ostream &)> Callback) {
36 size_t Offset = NextOffset;
37 NextOffset += Size;
38 Callbacks.push_back(x: std::move(Callback));
39 return Offset;
40 }
41
42 size_t allocateBytes(ArrayRef<uint8_t> Data) {
43 return allocateCallback(
44 Size: Data.size(), Callback: [Data](raw_ostream &OS) { OS << toStringRef(Input: Data); });
45 }
46
47 size_t allocateBytes(yaml::BinaryRef Data) {
48 return allocateCallback(Size: Data.binary_size(), Callback: [Data](raw_ostream &OS) {
49 Data.writeAsBinary(OS);
50 });
51 }
52
53 template <typename T> size_t allocateArray(ArrayRef<T> Data) {
54 return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
55 sizeof(T) * Data.size()});
56 }
57
58 template <typename T, typename RangeType>
59 std::pair<size_t, MutableArrayRef<T>>
60 allocateNewArray(const iterator_range<RangeType> &Range);
61
62 template <typename T> size_t allocateObject(const T &Data) {
63 return allocateArray(ArrayRef(Data));
64 }
65
66 template <typename T, typename... Types>
67 std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
68 T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
69 return {allocateObject(*Object), Object};
70 }
71
72 size_t allocateString(StringRef Str);
73
74 void writeTo(raw_ostream &OS) const;
75
76private:
77 size_t NextOffset = 0;
78
79 BumpPtrAllocator Temporaries;
80 std::vector<std::function<void(raw_ostream &)>> Callbacks;
81};
82} // namespace
83
84template <typename T, typename RangeType>
85std::pair<size_t, MutableArrayRef<T>>
86BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
87 size_t Num = std::distance(Range.begin(), Range.end());
88 MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
89 std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
90 return {allocateArray(Array), Array};
91}
92
93size_t BlobAllocator::allocateString(StringRef Str) {
94 SmallVector<UTF16, 32> WStr;
95 bool OK = convertUTF8ToUTF16String(SrcUTF8: Str, DstUTF16&: WStr);
96 assert(OK && "Invalid UTF8 in Str?");
97 (void)OK;
98
99 // The utf16 string is null-terminated, but the terminator is not counted in
100 // the string size.
101 WStr.push_back(Elt: 0);
102 size_t Result =
103 allocateNewObject<support::ulittle32_t>(Args: 2 * (WStr.size() - 1)).first;
104 allocateNewArray<support::ulittle16_t>(Range: make_range(x: WStr.begin(), y: WStr.end()));
105 return Result;
106}
107
108void BlobAllocator::writeTo(raw_ostream &OS) const {
109 size_t BeginOffset = OS.tell();
110 for (const auto &Callback : Callbacks)
111 Callback(OS);
112 assert(OS.tell() == BeginOffset + NextOffset &&
113 "Callbacks wrote an unexpected number of bytes.");
114 (void)BeginOffset;
115}
116
117static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) {
118 return {.DataSize: support::ulittle32_t(Data.binary_size()),
119 .RVA: support::ulittle32_t(File.allocateBytes(Data))};
120}
121
122static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) {
123 File.allocateObject(Data: S.MDExceptionStream);
124
125 size_t DataEnd = File.tell();
126
127 // Lay out the thread context data, (which is not a part of the stream).
128 // TODO: This usually (always?) matches the thread context of the
129 // corresponding thread, and may overlap memory regions as well. We could
130 // add a level of indirection to the MinidumpYAML format (like an array of
131 // Blobs that the LocationDescriptors index into) to be able to distinguish
132 // the cases where location descriptions overlap vs happen to reference
133 // identical data.
134 S.MDExceptionStream.ThreadContext = layout(File, Data: S.ThreadContext);
135
136 return DataEnd;
137}
138
139static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
140 Range.Entry.Memory = layout(File, Data: Range.Content);
141}
142
143static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) {
144 M.Entry.ModuleNameRVA = File.allocateString(Str: M.Name);
145
146 M.Entry.CvRecord = layout(File, Data: M.CvRecord);
147 M.Entry.MiscRecord = layout(File, Data: M.MiscRecord);
148}
149
150static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) {
151 T.Entry.Stack.Memory = layout(File, Data: T.Stack);
152 T.Entry.Context = layout(File, Data: T.Context);
153}
154
155template <typename EntryT>
156static size_t layout(BlobAllocator &File,
157 MinidumpYAML::detail::ListStream<EntryT> &S) {
158
159 File.allocateNewObject<support::ulittle32_t>(S.Entries.size());
160 for (auto &E : S.Entries)
161 File.allocateObject(E.Entry);
162
163 size_t DataEnd = File.tell();
164
165 // Lay out the auxiliary data, (which is not a part of the stream).
166 DataEnd = File.tell();
167 for (auto &E : S.Entries)
168 layout(File, E);
169
170 return DataEnd;
171}
172
173static Directory layout(BlobAllocator &File, Stream &S) {
174 Directory Result;
175 Result.Type = S.Type;
176 Result.Location.RVA = File.tell();
177 std::optional<size_t> DataEnd;
178 switch (S.Kind) {
179 case Stream::StreamKind::Exception:
180 DataEnd = layout(File, S&: cast<MinidumpYAML::ExceptionStream>(Val&: S));
181 break;
182 case Stream::StreamKind::MemoryInfoList: {
183 MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(Val&: S);
184 File.allocateNewObject<minidump::MemoryInfoListHeader>(
185 Args: sizeof(minidump::MemoryInfoListHeader), Args: sizeof(minidump::MemoryInfo),
186 Args: InfoList.Infos.size());
187 File.allocateArray(Data: ArrayRef(InfoList.Infos));
188 break;
189 }
190 case Stream::StreamKind::MemoryList:
191 DataEnd = layout(File, S&: cast<MemoryListStream>(Val&: S));
192 break;
193 case Stream::StreamKind::ModuleList:
194 DataEnd = layout(File, S&: cast<ModuleListStream>(Val&: S));
195 break;
196 case Stream::StreamKind::RawContent: {
197 RawContentStream &Raw = cast<RawContentStream>(Val&: S);
198 File.allocateCallback(Size: Raw.Size, Callback: [&Raw](raw_ostream &OS) {
199 Raw.Content.writeAsBinary(OS);
200 assert(Raw.Content.binary_size() <= Raw.Size);
201 OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0');
202 });
203 break;
204 }
205 case Stream::StreamKind::SystemInfo: {
206 SystemInfoStream &SystemInfo = cast<SystemInfoStream>(Val&: S);
207 File.allocateObject(Data: SystemInfo.Info);
208 // The CSD string is not a part of the stream.
209 DataEnd = File.tell();
210 SystemInfo.Info.CSDVersionRVA = File.allocateString(Str: SystemInfo.CSDVersion);
211 break;
212 }
213 case Stream::StreamKind::TextContent:
214 File.allocateArray(Data: arrayRefFromStringRef(Input: cast<TextContentStream>(Val&: S).Text));
215 break;
216 case Stream::StreamKind::ThreadList:
217 DataEnd = layout(File, S&: cast<ThreadListStream>(Val&: S));
218 break;
219 }
220 // If DataEnd is not set, we assume everything we generated is a part of the
221 // stream.
222 Result.Location.DataSize =
223 DataEnd.value_or(u: File.tell()) - Result.Location.RVA;
224 return Result;
225}
226
227namespace llvm {
228namespace yaml {
229
230bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out,
231 ErrorHandler /*EH*/) {
232 BlobAllocator File;
233 File.allocateObject(Data: Obj.Header);
234
235 std::vector<Directory> StreamDirectory(Obj.Streams.size());
236 Obj.Header.StreamDirectoryRVA = File.allocateArray(Data: ArrayRef(StreamDirectory));
237 Obj.Header.NumberOfStreams = StreamDirectory.size();
238
239 for (const auto &[Index, Stream] : enumerate(First&: Obj.Streams))
240 StreamDirectory[Index] = layout(File, S&: *Stream);
241
242 File.writeTo(OS&: Out);
243 return true;
244}
245
246} // namespace yaml
247} // namespace llvm
248