1 | //===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/ObjectYAML/MinidumpYAML.h" |
10 | #include "llvm/ObjectYAML/yaml2obj.h" |
11 | #include "llvm/Support/ConvertUTF.h" |
12 | #include "llvm/Support/raw_ostream.h" |
13 | #include <optional> |
14 | |
15 | using namespace llvm; |
16 | using namespace llvm::minidump; |
17 | using namespace llvm::MinidumpYAML; |
18 | |
19 | namespace { |
20 | /// A helper class to manage the placement of various structures into the final |
21 | /// minidump binary. Space for objects can be allocated via various allocate*** |
22 | /// methods, while the final minidump file is written by calling the writeTo |
23 | /// method. The plain versions of allocation functions take a reference to the |
24 | /// data which is to be written (and hence the data must be available until |
25 | /// writeTo is called), while the "New" versions allocate the data in an |
26 | /// allocator-managed buffer, which is available until the allocator object is |
27 | /// destroyed. For both kinds of functions, it is possible to modify the |
28 | /// data for which the space has been "allocated" until the final writeTo call. |
29 | /// This is useful for "linking" the allocated structures via their offsets. |
30 | class BlobAllocator { |
31 | public: |
32 | size_t tell() const { return NextOffset; } |
33 | |
34 | size_t allocateCallback(size_t Size, |
35 | std::function<void(raw_ostream &)> Callback) { |
36 | size_t Offset = NextOffset; |
37 | NextOffset += Size; |
38 | Callbacks.push_back(x: std::move(Callback)); |
39 | return Offset; |
40 | } |
41 | |
42 | size_t allocateBytes(ArrayRef<uint8_t> Data) { |
43 | return allocateCallback( |
44 | Size: Data.size(), Callback: [Data](raw_ostream &OS) { OS << toStringRef(Input: Data); }); |
45 | } |
46 | |
47 | size_t allocateBytes(yaml::BinaryRef Data) { |
48 | return allocateCallback(Size: Data.binary_size(), Callback: [Data](raw_ostream &OS) { |
49 | Data.writeAsBinary(OS); |
50 | }); |
51 | } |
52 | |
53 | template <typename T> size_t allocateArray(ArrayRef<T> Data) { |
54 | return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()), |
55 | sizeof(T) * Data.size()}); |
56 | } |
57 | |
58 | template <typename T, typename RangeType> |
59 | std::pair<size_t, MutableArrayRef<T>> |
60 | allocateNewArray(const iterator_range<RangeType> &Range); |
61 | |
62 | template <typename T> size_t allocateObject(const T &Data) { |
63 | return allocateArray(ArrayRef(Data)); |
64 | } |
65 | |
66 | template <typename T, typename... Types> |
67 | std::pair<size_t, T *> allocateNewObject(Types &&... Args) { |
68 | T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...); |
69 | return {allocateObject(*Object), Object}; |
70 | } |
71 | |
72 | size_t allocateString(StringRef Str); |
73 | |
74 | void writeTo(raw_ostream &OS) const; |
75 | |
76 | private: |
77 | size_t NextOffset = 0; |
78 | |
79 | BumpPtrAllocator Temporaries; |
80 | std::vector<std::function<void(raw_ostream &)>> Callbacks; |
81 | }; |
82 | } // namespace |
83 | |
84 | template <typename T, typename RangeType> |
85 | std::pair<size_t, MutableArrayRef<T>> |
86 | BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) { |
87 | size_t Num = std::distance(Range.begin(), Range.end()); |
88 | MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num); |
89 | std::uninitialized_copy(Range.begin(), Range.end(), Array.begin()); |
90 | return {allocateArray(Array), Array}; |
91 | } |
92 | |
93 | size_t BlobAllocator::allocateString(StringRef Str) { |
94 | SmallVector<UTF16, 32> WStr; |
95 | bool OK = convertUTF8ToUTF16String(SrcUTF8: Str, DstUTF16&: WStr); |
96 | assert(OK && "Invalid UTF8 in Str?" ); |
97 | (void)OK; |
98 | |
99 | // The utf16 string is null-terminated, but the terminator is not counted in |
100 | // the string size. |
101 | WStr.push_back(Elt: 0); |
102 | size_t Result = |
103 | allocateNewObject<support::ulittle32_t>(Args: 2 * (WStr.size() - 1)).first; |
104 | allocateNewArray<support::ulittle16_t>(Range: make_range(x: WStr.begin(), y: WStr.end())); |
105 | return Result; |
106 | } |
107 | |
108 | void BlobAllocator::writeTo(raw_ostream &OS) const { |
109 | size_t BeginOffset = OS.tell(); |
110 | for (const auto &Callback : Callbacks) |
111 | Callback(OS); |
112 | assert(OS.tell() == BeginOffset + NextOffset && |
113 | "Callbacks wrote an unexpected number of bytes." ); |
114 | (void)BeginOffset; |
115 | } |
116 | |
117 | static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { |
118 | return {.DataSize: support::ulittle32_t(Data.binary_size()), |
119 | .RVA: support::ulittle32_t(File.allocateBytes(Data))}; |
120 | } |
121 | |
122 | static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) { |
123 | File.allocateObject(Data: S.MDExceptionStream); |
124 | |
125 | size_t DataEnd = File.tell(); |
126 | |
127 | // Lay out the thread context data, (which is not a part of the stream). |
128 | // TODO: This usually (always?) matches the thread context of the |
129 | // corresponding thread, and may overlap memory regions as well. We could |
130 | // add a level of indirection to the MinidumpYAML format (like an array of |
131 | // Blobs that the LocationDescriptors index into) to be able to distinguish |
132 | // the cases where location descriptions overlap vs happen to reference |
133 | // identical data. |
134 | S.MDExceptionStream.ThreadContext = layout(File, Data: S.ThreadContext); |
135 | |
136 | return DataEnd; |
137 | } |
138 | |
139 | static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { |
140 | Range.Entry.Memory = layout(File, Data: Range.Content); |
141 | } |
142 | |
143 | static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { |
144 | M.Entry.ModuleNameRVA = File.allocateString(Str: M.Name); |
145 | |
146 | M.Entry.CvRecord = layout(File, Data: M.CvRecord); |
147 | M.Entry.MiscRecord = layout(File, Data: M.MiscRecord); |
148 | } |
149 | |
150 | static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { |
151 | T.Entry.Stack.Memory = layout(File, Data: T.Stack); |
152 | T.Entry.Context = layout(File, Data: T.Context); |
153 | } |
154 | |
155 | template <typename EntryT> |
156 | static size_t layout(BlobAllocator &File, |
157 | MinidumpYAML::detail::ListStream<EntryT> &S) { |
158 | |
159 | File.allocateNewObject<support::ulittle32_t>(S.Entries.size()); |
160 | for (auto &E : S.Entries) |
161 | File.allocateObject(E.Entry); |
162 | |
163 | size_t DataEnd = File.tell(); |
164 | |
165 | // Lay out the auxiliary data, (which is not a part of the stream). |
166 | DataEnd = File.tell(); |
167 | for (auto &E : S.Entries) |
168 | layout(File, E); |
169 | |
170 | return DataEnd; |
171 | } |
172 | |
173 | static Directory layout(BlobAllocator &File, Stream &S) { |
174 | Directory Result; |
175 | Result.Type = S.Type; |
176 | Result.Location.RVA = File.tell(); |
177 | std::optional<size_t> DataEnd; |
178 | switch (S.Kind) { |
179 | case Stream::StreamKind::Exception: |
180 | DataEnd = layout(File, S&: cast<MinidumpYAML::ExceptionStream>(Val&: S)); |
181 | break; |
182 | case Stream::StreamKind::MemoryInfoList: { |
183 | MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(Val&: S); |
184 | File.allocateNewObject<minidump::MemoryInfoListHeader>( |
185 | Args: sizeof(minidump::MemoryInfoListHeader), Args: sizeof(minidump::MemoryInfo), |
186 | Args: InfoList.Infos.size()); |
187 | File.allocateArray(Data: ArrayRef(InfoList.Infos)); |
188 | break; |
189 | } |
190 | case Stream::StreamKind::MemoryList: |
191 | DataEnd = layout(File, S&: cast<MemoryListStream>(Val&: S)); |
192 | break; |
193 | case Stream::StreamKind::ModuleList: |
194 | DataEnd = layout(File, S&: cast<ModuleListStream>(Val&: S)); |
195 | break; |
196 | case Stream::StreamKind::RawContent: { |
197 | RawContentStream &Raw = cast<RawContentStream>(Val&: S); |
198 | File.allocateCallback(Size: Raw.Size, Callback: [&Raw](raw_ostream &OS) { |
199 | Raw.Content.writeAsBinary(OS); |
200 | assert(Raw.Content.binary_size() <= Raw.Size); |
201 | OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); |
202 | }); |
203 | break; |
204 | } |
205 | case Stream::StreamKind::SystemInfo: { |
206 | SystemInfoStream &SystemInfo = cast<SystemInfoStream>(Val&: S); |
207 | File.allocateObject(Data: SystemInfo.Info); |
208 | // The CSD string is not a part of the stream. |
209 | DataEnd = File.tell(); |
210 | SystemInfo.Info.CSDVersionRVA = File.allocateString(Str: SystemInfo.CSDVersion); |
211 | break; |
212 | } |
213 | case Stream::StreamKind::TextContent: |
214 | File.allocateArray(Data: arrayRefFromStringRef(Input: cast<TextContentStream>(Val&: S).Text)); |
215 | break; |
216 | case Stream::StreamKind::ThreadList: |
217 | DataEnd = layout(File, S&: cast<ThreadListStream>(Val&: S)); |
218 | break; |
219 | } |
220 | // If DataEnd is not set, we assume everything we generated is a part of the |
221 | // stream. |
222 | Result.Location.DataSize = |
223 | DataEnd.value_or(u: File.tell()) - Result.Location.RVA; |
224 | return Result; |
225 | } |
226 | |
227 | namespace llvm { |
228 | namespace yaml { |
229 | |
230 | bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out, |
231 | ErrorHandler /*EH*/) { |
232 | BlobAllocator File; |
233 | File.allocateObject(Data: Obj.Header); |
234 | |
235 | std::vector<Directory> StreamDirectory(Obj.Streams.size()); |
236 | Obj.Header.StreamDirectoryRVA = File.allocateArray(Data: ArrayRef(StreamDirectory)); |
237 | Obj.Header.NumberOfStreams = StreamDirectory.size(); |
238 | |
239 | for (const auto &[Index, Stream] : enumerate(First&: Obj.Streams)) |
240 | StreamDirectory[Index] = layout(File, S&: *Stream); |
241 | |
242 | File.writeTo(OS&: Out); |
243 | return true; |
244 | } |
245 | |
246 | } // namespace yaml |
247 | } // namespace llvm |
248 | |