1//===- xray-converter.cpp: XRay Trace Conversion --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implements the trace conversion functions.
10//
11//===----------------------------------------------------------------------===//
12#include "xray-converter.h"
13
14#include "trie-node.h"
15#include "xray-registry.h"
16#include "llvm/DebugInfo/Symbolize/Symbolize.h"
17#include "llvm/Support/EndianStream.h"
18#include "llvm/Support/FileSystem.h"
19#include "llvm/Support/FormatVariadic.h"
20#include "llvm/Support/ScopedPrinter.h"
21#include "llvm/Support/YAMLTraits.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/XRay/InstrumentationMap.h"
24#include "llvm/XRay/Trace.h"
25#include "llvm/XRay/YAMLXRayRecord.h"
26
27using namespace llvm;
28using namespace xray;
29
30// llvm-xray convert
31// ----------------------------------------------------------------------------
32static cl::SubCommand Convert("convert", "Trace Format Conversion");
33static cl::opt<std::string> ConvertInput(cl::Positional,
34 cl::desc("<xray log file>"),
35 cl::Required, cl::sub(Convert));
36enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT };
37static cl::opt<ConvertFormats> ConvertOutputFormat(
38 "output-format", cl::desc("output format"),
39 cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"),
40 clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"),
41 clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event",
42 "Output in chrome's trace event format. "
43 "May be visualized with the Catapult trace viewer.")),
44 cl::sub(Convert));
45static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat),
46 cl::desc("Alias for -output-format"));
47static cl::opt<std::string>
48 ConvertOutput("output", cl::value_desc("output file"), cl::init(Val: "-"),
49 cl::desc("output file; use '-' for stdout"),
50 cl::sub(Convert));
51static cl::alias ConvertOutput2("o", cl::aliasopt(ConvertOutput),
52 cl::desc("Alias for -output"));
53
54static cl::opt<bool>
55 ConvertSymbolize("symbolize",
56 cl::desc("symbolize function ids from the input log"),
57 cl::init(Val: false), cl::sub(Convert));
58static cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize),
59 cl::desc("Alias for -symbolize"));
60static cl::opt<bool>
61 NoDemangle("no-demangle",
62 cl::desc("determines whether to demangle function name "
63 "when symbolizing function ids from the input log"),
64 cl::init(Val: false), cl::sub(Convert));
65
66static cl::opt<bool> Demangle("demangle",
67 cl::desc("demangle symbols (default)"),
68 cl::sub(Convert));
69
70static cl::opt<std::string>
71 ConvertInstrMap("instr_map",
72 cl::desc("binary with the instrumentation map, or "
73 "a separate instrumentation map"),
74 cl::value_desc("binary with xray_instr_map"),
75 cl::sub(Convert), cl::init(Val: ""));
76static cl::alias ConvertInstrMap2("m", cl::aliasopt(ConvertInstrMap),
77 cl::desc("Alias for -instr_map"));
78static cl::opt<bool> ConvertSortInput(
79 "sort",
80 cl::desc("determines whether to sort input log records by timestamp"),
81 cl::sub(Convert), cl::init(Val: true));
82static cl::alias ConvertSortInput2("s", cl::aliasopt(ConvertSortInput),
83 cl::desc("Alias for -sort"));
84
85using llvm::yaml::Output;
86
87void TraceConverter::exportAsYAML(const Trace &Records, raw_ostream &OS) {
88 YAMLXRayTrace Trace;
89 const auto &FH = Records.getFileHeader();
90 Trace.Header = {.Version: FH.Version, .Type: FH.Type, .ConstantTSC: FH.ConstantTSC, .NonstopTSC: FH.NonstopTSC,
91 .CycleFrequency: FH.CycleFrequency};
92 Trace.Records.reserve(n: Records.size());
93 for (const auto &R : Records) {
94 Trace.Records.push_back(x: {.RecordType: R.RecordType, .CPU: R.CPU, .Type: R.Type, .FuncId: R.FuncId,
95 .Function: Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId: R.FuncId)
96 : llvm::to_string(Value: R.FuncId),
97 .TSC: R.TSC, .TId: R.TId, .PId: R.PId, .CallArgs: R.CallArgs, .Data: R.Data});
98 }
99 Output Out(OS, nullptr, 0);
100 Out.setWriteDefaultValues(false);
101 Out << Trace;
102}
103
104void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) {
105 // First write out the file header, in the correct endian-appropriate format
106 // (XRay assumes currently little endian).
107 support::endian::Writer Writer(OS, llvm::endianness::little);
108 const auto &FH = Records.getFileHeader();
109 Writer.write(Val: FH.Version);
110 Writer.write(Val: FH.Type);
111 uint32_t Bitfield{0};
112 if (FH.ConstantTSC)
113 Bitfield |= 1uL;
114 if (FH.NonstopTSC)
115 Bitfield |= 1uL << 1;
116 Writer.write(Val: Bitfield);
117 Writer.write(Val: FH.CycleFrequency);
118
119 // There's 16 bytes of padding at the end of the file header.
120 static constexpr uint32_t Padding4B = 0;
121 Writer.write(Val: Padding4B);
122 Writer.write(Val: Padding4B);
123 Writer.write(Val: Padding4B);
124 Writer.write(Val: Padding4B);
125
126 // Then write out the rest of the records, still in an endian-appropriate
127 // format.
128 for (const auto &R : Records) {
129 switch (R.Type) {
130 case RecordTypes::ENTER:
131 case RecordTypes::ENTER_ARG:
132 Writer.write(Val: R.RecordType);
133 Writer.write(Val: static_cast<uint8_t>(R.CPU));
134 Writer.write(Val: uint8_t{0});
135 break;
136 case RecordTypes::EXIT:
137 Writer.write(Val: R.RecordType);
138 Writer.write(Val: static_cast<uint8_t>(R.CPU));
139 Writer.write(Val: uint8_t{1});
140 break;
141 case RecordTypes::TAIL_EXIT:
142 Writer.write(Val: R.RecordType);
143 Writer.write(Val: static_cast<uint8_t>(R.CPU));
144 Writer.write(Val: uint8_t{2});
145 break;
146 case RecordTypes::CUSTOM_EVENT:
147 case RecordTypes::TYPED_EVENT:
148 // Skip custom and typed event records for v1 logs.
149 continue;
150 }
151 Writer.write(Val: R.FuncId);
152 Writer.write(Val: R.TSC);
153 Writer.write(Val: R.TId);
154
155 if (FH.Version >= 3)
156 Writer.write(Val: R.PId);
157 else
158 Writer.write(Val: Padding4B);
159
160 Writer.write(Val: Padding4B);
161 Writer.write(Val: Padding4B);
162 }
163}
164
165namespace {
166
167// A structure that allows building a dictionary of stack ids for the Chrome
168// trace event format.
169struct StackIdData {
170 // Each Stack of function calls has a unique ID.
171 unsigned id;
172
173 // Bookkeeping so that IDs can be maintained uniquely across threads.
174 // Traversal keeps sibling pointers to other threads stacks. This is helpful
175 // to determine when a thread encounters a new stack and should assign a new
176 // unique ID.
177 SmallVector<TrieNode<StackIdData> *, 4> siblings;
178};
179
180using StackTrieNode = TrieNode<StackIdData>;
181
182// A helper function to find the sibling nodes for an encountered function in a
183// thread of execution. Relies on the invariant that each time a new node is
184// traversed in a thread, sibling bidirectional pointers are maintained.
185SmallVector<StackTrieNode *, 4>
186findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId,
187 const DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>>
188 &StackRootsByThreadId) {
189
190 SmallVector<StackTrieNode *, 4> Siblings{};
191
192 if (parent == nullptr) {
193 for (const auto &map_iter : StackRootsByThreadId) {
194 // Only look for siblings in other threads.
195 if (map_iter.first != TId)
196 for (auto node_iter : map_iter.second) {
197 if (node_iter->FuncId == FnId)
198 Siblings.push_back(Elt: node_iter);
199 }
200 }
201 return Siblings;
202 }
203
204 for (auto *ParentSibling : parent->ExtraData.siblings)
205 for (auto node_iter : ParentSibling->Callees)
206 if (node_iter->FuncId == FnId)
207 Siblings.push_back(Elt: node_iter);
208
209 return Siblings;
210}
211
212// Given a function being invoked in a thread with id TId, finds and returns the
213// StackTrie representing the function call stack. If no node exists, creates
214// the node. Assigns unique IDs to stacks newly encountered among all threads
215// and keeps sibling links up to when creating new nodes.
216StackTrieNode *findOrCreateStackNode(
217 StackTrieNode *Parent, int32_t FuncId, uint32_t TId,
218 DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> &StackRootsByThreadId,
219 DenseMap<unsigned, StackTrieNode *> &StacksByStackId, unsigned *id_counter,
220 std::forward_list<StackTrieNode> &NodeStore) {
221 SmallVector<StackTrieNode *, 4> &ParentCallees =
222 Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees;
223 auto match = find_if(Range&: ParentCallees, P: [FuncId](StackTrieNode *ParentCallee) {
224 return FuncId == ParentCallee->FuncId;
225 });
226 if (match != ParentCallees.end())
227 return *match;
228
229 SmallVector<StackTrieNode *, 4> siblings =
230 findSiblings(parent: Parent, FnId: FuncId, TId, StackRootsByThreadId);
231 if (siblings.empty()) {
232 NodeStore.push_front(val: {.FuncId: FuncId, .Parent: Parent, .Callees: {}, .ExtraData: {.id: (*id_counter)++, .siblings: {}}});
233 StackTrieNode *CurrentStack = &NodeStore.front();
234 StacksByStackId[*id_counter - 1] = CurrentStack;
235 ParentCallees.push_back(Elt: CurrentStack);
236 return CurrentStack;
237 }
238 unsigned stack_id = siblings[0]->ExtraData.id;
239 NodeStore.push_front(val: {.FuncId: FuncId, .Parent: Parent, .Callees: {}, .ExtraData: {.id: stack_id, .siblings: std::move(siblings)}});
240 StackTrieNode *CurrentStack = &NodeStore.front();
241 for (auto *sibling : CurrentStack->ExtraData.siblings)
242 sibling->ExtraData.siblings.push_back(Elt: CurrentStack);
243 ParentCallees.push_back(Elt: CurrentStack);
244 return CurrentStack;
245}
246
247void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
248 uint32_t TId, uint32_t PId, bool Symbolize,
249 const FuncIdConversionHelper &FuncIdHelper,
250 double EventTimestampUs,
251 const StackTrieNode &StackCursor,
252 StringRef FunctionPhenotype) {
253 OS << " ";
254 if (Version >= 3) {
255 OS << llvm::formatv(
256 Fmt: R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
257 R"("ts" : "{4:f4}", "sf" : "{5}" })",
258 Vals: (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
259 : llvm::to_string(Value: FuncId)),
260 Vals&: FunctionPhenotype, Vals&: TId, Vals&: PId, Vals&: EventTimestampUs,
261 Vals: StackCursor.ExtraData.id);
262 } else {
263 OS << llvm::formatv(
264 Fmt: R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
265 R"("ts" : "{3:f3}", "sf" : "{4}" })",
266 Vals: (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
267 : llvm::to_string(Value: FuncId)),
268 Vals&: FunctionPhenotype, Vals&: TId, Vals&: EventTimestampUs, Vals: StackCursor.ExtraData.id);
269 }
270}
271
272} // namespace
273
274void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
275 raw_ostream &OS) {
276 const auto &FH = Records.getFileHeader();
277 auto Version = FH.Version;
278 auto CycleFreq = FH.CycleFrequency;
279
280 unsigned id_counter = 0;
281 int NumOutputRecords = 0;
282
283 OS << "{\n \"traceEvents\": [\n";
284 DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
285 DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
286 DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
287 std::forward_list<StackTrieNode> NodeStore{};
288 for (const auto &R : Records) {
289 // Chrome trace event format always wants data in micros.
290 // CyclesPerMicro = CycleHertz / 10^6
291 // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
292 // Could lose some precision here by converting the TSC to a double to
293 // multiply by the period in micros. 52 bit mantissa is a good start though.
294 // TODO: Make feature request to Chrome Trace viewer to accept ticks and a
295 // frequency or do some more involved calculation to avoid dangers of
296 // conversion.
297 double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC);
298 StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId];
299 switch (R.Type) {
300 case RecordTypes::CUSTOM_EVENT:
301 case RecordTypes::TYPED_EVENT:
302 // TODO: Support typed and custom event rendering on Chrome Trace Viewer.
303 break;
304 case RecordTypes::ENTER:
305 case RecordTypes::ENTER_ARG:
306 StackCursor = findOrCreateStackNode(Parent: StackCursor, FuncId: R.FuncId, TId: R.TId,
307 StackRootsByThreadId, StacksByStackId,
308 id_counter: &id_counter, NodeStore);
309 // Each record is represented as a json dictionary with function name,
310 // type of B for begin or E for end, thread id, process id,
311 // timestamp in microseconds, and a stack frame id. The ids are logged
312 // in an id dictionary after the events.
313 if (NumOutputRecords++ > 0) {
314 OS << ",\n";
315 }
316 writeTraceViewerRecord(Version, OS, FuncId: R.FuncId, TId: R.TId, PId: R.PId, Symbolize,
317 FuncIdHelper, EventTimestampUs, StackCursor: *StackCursor, FunctionPhenotype: "B");
318 break;
319 case RecordTypes::EXIT:
320 case RecordTypes::TAIL_EXIT:
321 // No entries to record end for.
322 if (StackCursor == nullptr)
323 break;
324 // Should we emit an END record anyway or account this condition?
325 // (And/Or in loop termination below)
326 StackTrieNode *PreviousCursor = nullptr;
327 do {
328 if (NumOutputRecords++ > 0) {
329 OS << ",\n";
330 }
331 writeTraceViewerRecord(Version, OS, FuncId: StackCursor->FuncId, TId: R.TId, PId: R.PId,
332 Symbolize, FuncIdHelper, EventTimestampUs,
333 StackCursor: *StackCursor, FunctionPhenotype: "E");
334 PreviousCursor = StackCursor;
335 StackCursor = StackCursor->Parent;
336 } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
337 break;
338 }
339 }
340 OS << "\n ],\n"; // Close the Trace Events array.
341 OS << " "
342 << "\"displayTimeUnit\": \"ns\",\n";
343
344 // The stackFrames dictionary substantially reduces size of the output file by
345 // avoiding repeating the entire call stack of function names for each entry.
346 OS << R"( "stackFrames": {)";
347 int stack_frame_count = 0;
348 for (auto map_iter : StacksByStackId) {
349 if (stack_frame_count++ == 0)
350 OS << "\n";
351 else
352 OS << ",\n";
353 OS << " ";
354 OS << llvm::formatv(
355 Fmt: R"("{0}" : { "name" : "{1}")", Vals&: map_iter.first,
356 Vals: (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId: map_iter.second->FuncId)
357 : llvm::to_string(Value: map_iter.second->FuncId)));
358 if (map_iter.second->Parent != nullptr)
359 OS << llvm::formatv(Fmt: R"(, "parent": "{0}")",
360 Vals&: map_iter.second->Parent->ExtraData.id);
361 OS << " }";
362 }
363 OS << "\n }\n"; // Close the stack frames map.
364 OS << "}\n"; // Close the JSON entry.
365}
366
367namespace llvm {
368namespace xray {
369
370static CommandRegistration Unused(&Convert, []() -> Error {
371 // FIXME: Support conversion to BINARY when upgrading XRay trace versions.
372 InstrumentationMap Map;
373 if (!ConvertInstrMap.empty()) {
374 auto InstrumentationMapOrError = loadInstrumentationMap(Filename: ConvertInstrMap);
375 if (!InstrumentationMapOrError)
376 return joinErrors(E1: make_error<StringError>(
377 Args: Twine("Cannot open instrumentation map '") +
378 ConvertInstrMap + "'",
379 Args: std::make_error_code(e: std::errc::invalid_argument)),
380 E2: InstrumentationMapOrError.takeError());
381 Map = std::move(*InstrumentationMapOrError);
382 }
383
384 const auto &FunctionAddresses = Map.getFunctionAddresses();
385 symbolize::LLVMSymbolizer::Options SymbolizerOpts;
386 if (Demangle.getPosition() < NoDemangle.getPosition())
387 SymbolizerOpts.Demangle = false;
388 symbolize::LLVMSymbolizer Symbolizer(SymbolizerOpts);
389 llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer,
390 FunctionAddresses);
391 llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize);
392 std::error_code EC;
393 raw_fd_ostream OS(ConvertOutput, EC,
394 ConvertOutputFormat == ConvertFormats::BINARY
395 ? sys::fs::OpenFlags::OF_None
396 : sys::fs::OpenFlags::OF_TextWithCRLF);
397 if (EC)
398 return make_error<StringError>(
399 Args: Twine("Cannot open file '") + ConvertOutput + "' for writing.", Args&: EC);
400
401 auto TraceOrErr = loadTraceFile(Filename: ConvertInput, Sort: ConvertSortInput);
402 if (!TraceOrErr)
403 return joinErrors(
404 E1: make_error<StringError>(
405 Args: Twine("Failed loading input file '") + ConvertInput + "'.",
406 Args: std::make_error_code(e: std::errc::executable_format_error)),
407 E2: TraceOrErr.takeError());
408
409 auto &T = *TraceOrErr;
410 switch (ConvertOutputFormat) {
411 case ConvertFormats::YAML:
412 TC.exportAsYAML(Records: T, OS);
413 break;
414 case ConvertFormats::BINARY:
415 TC.exportAsRAWv1(Records: T, OS);
416 break;
417 case ConvertFormats::CHROME_TRACE_EVENT:
418 TC.exportAsChromeTraceEventFormat(Records: T, OS);
419 break;
420 }
421 return Error::success();
422});
423
424} // namespace xray
425} // namespace llvm
426