| 1 | //===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 |  | 
|---|
| 9 | #include "llvm/ObjectYAML/MinidumpYAML.h" | 
|---|
| 10 | #include "llvm/ObjectYAML/yaml2obj.h" | 
|---|
| 11 | #include "llvm/Support/ConvertUTF.h" | 
|---|
| 12 | #include "llvm/Support/raw_ostream.h" | 
|---|
| 13 | #include <optional> | 
|---|
| 14 |  | 
|---|
| 15 | using namespace llvm; | 
|---|
| 16 | using namespace llvm::minidump; | 
|---|
| 17 | using namespace llvm::MinidumpYAML; | 
|---|
| 18 |  | 
|---|
| 19 | namespace { | 
|---|
| 20 | /// A helper class to manage the placement of various structures into the final | 
|---|
| 21 | /// minidump binary. Space for objects can be allocated via various allocate*** | 
|---|
| 22 | /// methods, while the final minidump file is written by calling the writeTo | 
|---|
| 23 | /// method. The plain versions of allocation functions take a reference to the | 
|---|
| 24 | /// data which is to be written (and hence the data must be available until | 
|---|
| 25 | /// writeTo is called), while the "New" versions allocate the data in an | 
|---|
| 26 | /// allocator-managed buffer, which is available until the allocator object is | 
|---|
| 27 | /// destroyed. For both kinds of functions, it is possible to modify the | 
|---|
| 28 | /// data for which the space has been "allocated" until the final writeTo call. | 
|---|
| 29 | /// This is useful for "linking" the allocated structures via their offsets. | 
|---|
| 30 | class BlobAllocator { | 
|---|
| 31 | public: | 
|---|
| 32 | size_t tell() const { return NextOffset; } | 
|---|
| 33 |  | 
|---|
| 34 | size_t allocateCallback(size_t Size, | 
|---|
| 35 | std::function<void(raw_ostream &)> Callback) { | 
|---|
| 36 | size_t Offset = NextOffset; | 
|---|
| 37 | NextOffset += Size; | 
|---|
| 38 | Callbacks.push_back(x: std::move(Callback)); | 
|---|
| 39 | return Offset; | 
|---|
| 40 | } | 
|---|
| 41 |  | 
|---|
| 42 | size_t allocateBytes(ArrayRef<uint8_t> Data) { | 
|---|
| 43 | return allocateCallback( | 
|---|
| 44 | Size: Data.size(), Callback: [Data](raw_ostream &OS) { OS << toStringRef(Input: Data); }); | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | size_t allocateBytes(yaml::BinaryRef Data) { | 
|---|
| 48 | return allocateCallback(Size: Data.binary_size(), Callback: [Data](raw_ostream &OS) { | 
|---|
| 49 | Data.writeAsBinary(OS); | 
|---|
| 50 | }); | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | template <typename T> size_t allocateArray(ArrayRef<T> Data) { | 
|---|
| 54 | return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()), | 
|---|
| 55 | sizeof(T) * Data.size()}); | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 | template <typename T, typename RangeType> | 
|---|
| 59 | std::pair<size_t, MutableArrayRef<T>> | 
|---|
| 60 | allocateNewArray(const iterator_range<RangeType> &Range); | 
|---|
| 61 |  | 
|---|
| 62 | template <typename T> size_t allocateObject(const T &Data) { | 
|---|
| 63 | return allocateArray(ArrayRef(Data)); | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | template <typename T, typename... Types> | 
|---|
| 67 | std::pair<size_t, T *> allocateNewObject(Types &&... Args) { | 
|---|
| 68 | T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...); | 
|---|
| 69 | return {allocateObject(*Object), Object}; | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | size_t allocateString(StringRef Str); | 
|---|
| 73 |  | 
|---|
| 74 | void writeTo(raw_ostream &OS) const; | 
|---|
| 75 |  | 
|---|
| 76 | private: | 
|---|
| 77 | size_t NextOffset = 0; | 
|---|
| 78 |  | 
|---|
| 79 | BumpPtrAllocator Temporaries; | 
|---|
| 80 | std::vector<std::function<void(raw_ostream &)>> Callbacks; | 
|---|
| 81 | }; | 
|---|
| 82 | } // namespace | 
|---|
| 83 |  | 
|---|
| 84 | template <typename T, typename RangeType> | 
|---|
| 85 | std::pair<size_t, MutableArrayRef<T>> | 
|---|
| 86 | BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) { | 
|---|
| 87 | size_t Num = std::distance(Range.begin(), Range.end()); | 
|---|
| 88 | MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num); | 
|---|
| 89 | llvm::uninitialized_copy(Range, Array.begin()); | 
|---|
| 90 | return {allocateArray(Array), Array}; | 
|---|
| 91 | } | 
|---|
| 92 |  | 
|---|
| 93 | size_t BlobAllocator::allocateString(StringRef Str) { | 
|---|
| 94 | SmallVector<UTF16, 32> WStr; | 
|---|
| 95 | bool OK = convertUTF8ToUTF16String(SrcUTF8: Str, DstUTF16&: WStr); | 
|---|
| 96 | assert(OK && "Invalid UTF8 in Str?"); | 
|---|
| 97 | (void)OK; | 
|---|
| 98 |  | 
|---|
| 99 | // The utf16 string is null-terminated, but the terminator is not counted in | 
|---|
| 100 | // the string size. | 
|---|
| 101 | WStr.push_back(Elt: 0); | 
|---|
| 102 | size_t Result = | 
|---|
| 103 | allocateNewObject<support::ulittle32_t>(Args: 2 * (WStr.size() - 1)).first; | 
|---|
| 104 | allocateNewArray<support::ulittle16_t>(Range: make_range(x: WStr.begin(), y: WStr.end())); | 
|---|
| 105 | return Result; | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | void BlobAllocator::writeTo(raw_ostream &OS) const { | 
|---|
| 109 | size_t BeginOffset = OS.tell(); | 
|---|
| 110 | for (const auto &Callback : Callbacks) | 
|---|
| 111 | Callback(OS); | 
|---|
| 112 | assert(OS.tell() == BeginOffset + NextOffset && | 
|---|
| 113 | "Callbacks wrote an unexpected number of bytes."); | 
|---|
| 114 | (void)BeginOffset; | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { | 
|---|
| 118 | return {.DataSize: support::ulittle32_t(Data.binary_size()), | 
|---|
| 119 | .RVA: support::ulittle32_t(File.allocateBytes(Data))}; | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) { | 
|---|
| 123 | File.allocateObject(Data: S.MDExceptionStream); | 
|---|
| 124 |  | 
|---|
| 125 | size_t DataEnd = File.tell(); | 
|---|
| 126 |  | 
|---|
| 127 | // Lay out the thread context data, (which is not a part of the stream). | 
|---|
| 128 | // TODO: This usually (always?) matches the thread context of the | 
|---|
| 129 | // corresponding thread, and may overlap memory regions as well.  We could | 
|---|
| 130 | // add a level of indirection to the MinidumpYAML format (like an array of | 
|---|
| 131 | // Blobs that the LocationDescriptors index into) to be able to distinguish | 
|---|
| 132 | // the cases where location descriptions overlap vs happen to reference | 
|---|
| 133 | // identical data. | 
|---|
| 134 | S.MDExceptionStream.ThreadContext = layout(File, Data: S.ThreadContext); | 
|---|
| 135 |  | 
|---|
| 136 | return DataEnd; | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | static size_t layout(BlobAllocator &File, MinidumpYAML::Memory64ListStream &S) { | 
|---|
| 140 | size_t BaseRVA = File.tell() + sizeof(minidump::Memory64ListHeader); | 
|---|
| 141 | BaseRVA += S.Entries.size() * sizeof(minidump::MemoryDescriptor_64); | 
|---|
| 142 | S.Header.BaseRVA = BaseRVA; | 
|---|
| 143 | S.Header.NumberOfMemoryRanges = S.Entries.size(); | 
|---|
| 144 | File.allocateObject(Data: S.Header); | 
|---|
| 145 | for (auto &E : S.Entries) | 
|---|
| 146 | File.allocateObject(Data: E.Entry); | 
|---|
| 147 |  | 
|---|
| 148 | // Save the new offset for the stream size. | 
|---|
| 149 | size_t DataEnd = File.tell(); | 
|---|
| 150 | for (auto &E : S.Entries) { | 
|---|
| 151 | File.allocateBytes(Data: E.Content); | 
|---|
| 152 | if (E.Entry.DataSize > E.Content.binary_size()) { | 
|---|
| 153 | size_t Padding = E.Entry.DataSize - E.Content.binary_size(); | 
|---|
| 154 | File.allocateCallback(Size: Padding, Callback: [Padding](raw_ostream &OS) { | 
|---|
| 155 | OS << std::string(Padding, '\0'); | 
|---|
| 156 | }); | 
|---|
| 157 | } | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | return DataEnd; | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { | 
|---|
| 164 | Range.Entry.Memory = layout(File, Data: Range.Content); | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { | 
|---|
| 168 | M.Entry.ModuleNameRVA = File.allocateString(Str: M.Name); | 
|---|
| 169 |  | 
|---|
| 170 | M.Entry.CvRecord = layout(File, Data: M.CvRecord); | 
|---|
| 171 | M.Entry.MiscRecord = layout(File, Data: M.MiscRecord); | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { | 
|---|
| 175 | T.Entry.Stack.Memory = layout(File, Data: T.Stack); | 
|---|
| 176 | T.Entry.Context = layout(File, Data: T.Context); | 
|---|
| 177 | } | 
|---|
| 178 |  | 
|---|
| 179 | template <typename EntryT> | 
|---|
| 180 | static size_t layout(BlobAllocator &File, | 
|---|
| 181 | MinidumpYAML::detail::ListStream<EntryT> &S) { | 
|---|
| 182 |  | 
|---|
| 183 | File.allocateNewObject<support::ulittle32_t>(S.Entries.size()); | 
|---|
| 184 | for (auto &E : S.Entries) | 
|---|
| 185 | File.allocateObject(E.Entry); | 
|---|
| 186 |  | 
|---|
| 187 | size_t DataEnd = File.tell(); | 
|---|
| 188 |  | 
|---|
| 189 | // Lay out the auxiliary data, (which is not a part of the stream). | 
|---|
| 190 | DataEnd = File.tell(); | 
|---|
| 191 | for (auto &E : S.Entries) | 
|---|
| 192 | layout(File, E); | 
|---|
| 193 |  | 
|---|
| 194 | return DataEnd; | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | static Directory layout(BlobAllocator &File, Stream &S) { | 
|---|
| 198 | Directory Result; | 
|---|
| 199 | Result.Type = S.Type; | 
|---|
| 200 | Result.Location.RVA = File.tell(); | 
|---|
| 201 | std::optional<size_t> DataEnd; | 
|---|
| 202 | switch (S.Kind) { | 
|---|
| 203 | case Stream::StreamKind::Exception: | 
|---|
| 204 | DataEnd = layout(File, S&: cast<MinidumpYAML::ExceptionStream>(Val&: S)); | 
|---|
| 205 | break; | 
|---|
| 206 | case Stream::StreamKind::MemoryInfoList: { | 
|---|
| 207 | MemoryInfoListStream &InfoList = cast<MemoryInfoListStream>(Val&: S); | 
|---|
| 208 | File.allocateNewObject<minidump::MemoryInfoListHeader>( | 
|---|
| 209 | Args: sizeof(minidump::MemoryInfoListHeader), Args: sizeof(minidump::MemoryInfo), | 
|---|
| 210 | Args: InfoList.Infos.size()); | 
|---|
| 211 | File.allocateArray(Data: ArrayRef(InfoList.Infos)); | 
|---|
| 212 | break; | 
|---|
| 213 | } | 
|---|
| 214 | case Stream::StreamKind::MemoryList: | 
|---|
| 215 | DataEnd = layout(File, S&: cast<MemoryListStream>(Val&: S)); | 
|---|
| 216 | break; | 
|---|
| 217 | case Stream::StreamKind::Memory64List: | 
|---|
| 218 | DataEnd = layout(File, S&: cast<Memory64ListStream>(Val&: S)); | 
|---|
| 219 | break; | 
|---|
| 220 | case Stream::StreamKind::ModuleList: | 
|---|
| 221 | DataEnd = layout(File, S&: cast<ModuleListStream>(Val&: S)); | 
|---|
| 222 | break; | 
|---|
| 223 | case Stream::StreamKind::RawContent: { | 
|---|
| 224 | RawContentStream &Raw = cast<RawContentStream>(Val&: S); | 
|---|
| 225 | File.allocateCallback(Size: Raw.Size, Callback: [&Raw](raw_ostream &OS) { | 
|---|
| 226 | Raw.Content.writeAsBinary(OS); | 
|---|
| 227 | assert(Raw.Content.binary_size() <= Raw.Size); | 
|---|
| 228 | OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); | 
|---|
| 229 | }); | 
|---|
| 230 | break; | 
|---|
| 231 | } | 
|---|
| 232 | case Stream::StreamKind::SystemInfo: { | 
|---|
| 233 | SystemInfoStream &SystemInfo = cast<SystemInfoStream>(Val&: S); | 
|---|
| 234 | File.allocateObject(Data: SystemInfo.Info); | 
|---|
| 235 | // The CSD string is not a part of the stream. | 
|---|
| 236 | DataEnd = File.tell(); | 
|---|
| 237 | SystemInfo.Info.CSDVersionRVA = File.allocateString(Str: SystemInfo.CSDVersion); | 
|---|
| 238 | break; | 
|---|
| 239 | } | 
|---|
| 240 | case Stream::StreamKind::TextContent: | 
|---|
| 241 | File.allocateArray(Data: arrayRefFromStringRef(Input: cast<TextContentStream>(Val&: S).Text)); | 
|---|
| 242 | break; | 
|---|
| 243 | case Stream::StreamKind::ThreadList: | 
|---|
| 244 | DataEnd = layout(File, S&: cast<ThreadListStream>(Val&: S)); | 
|---|
| 245 | break; | 
|---|
| 246 | } | 
|---|
| 247 | // If DataEnd is not set, we assume everything we generated is a part of the | 
|---|
| 248 | // stream. | 
|---|
| 249 | Result.Location.DataSize = | 
|---|
| 250 | DataEnd.value_or(u: File.tell()) - Result.Location.RVA; | 
|---|
| 251 | return Result; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | namespace llvm { | 
|---|
| 255 | namespace yaml { | 
|---|
| 256 |  | 
|---|
| 257 | bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out, | 
|---|
| 258 | ErrorHandler /*EH*/) { | 
|---|
| 259 | BlobAllocator File; | 
|---|
| 260 | File.allocateObject(Data: Obj.Header); | 
|---|
| 261 |  | 
|---|
| 262 | std::vector<Directory> StreamDirectory(Obj.Streams.size()); | 
|---|
| 263 | Obj.Header.StreamDirectoryRVA = File.allocateArray(Data: ArrayRef(StreamDirectory)); | 
|---|
| 264 | Obj.Header.NumberOfStreams = StreamDirectory.size(); | 
|---|
| 265 |  | 
|---|
| 266 | for (const auto &[Index, Stream] : enumerate(First&: Obj.Streams)) | 
|---|
| 267 | StreamDirectory[Index] = layout(File, S&: *Stream); | 
|---|
| 268 |  | 
|---|
| 269 | File.writeTo(OS&: Out); | 
|---|
| 270 | return true; | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | } // namespace yaml | 
|---|
| 274 | } // namespace llvm | 
|---|
| 275 |  | 
|---|