Trace.cpp source code [llvm_projects/llvm/lib/XRay/Trace.cpp]

1	//===- Trace.cpp - XRay Trace Loading implementation. ---------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// XRay log reader implementation.
10	//
11	//===----------------------------------------------------------------------===//
12	#include "llvm/XRay/Trace.h"
13	#include "llvm/ADT/STLExtras.h"
14	#include "llvm/Support/DataExtractor.h"
15	#include "llvm/Support/Error.h"
16	#include "llvm/Support/FileSystem.h"
17	#include "llvm/XRay/BlockIndexer.h"
18	#include "llvm/XRay/BlockVerifier.h"
19	#include "llvm/XRay/FDRRecordConsumer.h"
20	#include "llvm/XRay/FDRRecordProducer.h"
21	#include "llvm/XRay/FDRRecords.h"
22	#include "llvm/XRay/FDRTraceExpander.h"
23	#include "llvm/XRay/FileHeaderReader.h"
24	#include "llvm/XRay/YAMLXRayRecord.h"
25	#include <memory>
26	#include <vector>
27
28	using namespace llvm;
29	using namespace llvm::xray;
30	using llvm::yaml::Input;
31
32	static Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
33	XRayFileHeader &FileHeader,
34	std::vector<XRayRecord> &Records) {
35	if (Data.size() < `32`)
36	return make_error<StringError>(
37	Args: "Not enough bytes for an XRay log.",
38	Args: std::make_error_code(e: std::errc::invalid_argument));
39
40	if (Data.size() - `32` == `0` \|\| Data.size() % `32` != `0`)
41	return make_error<StringError>(
42	Args: "Invalid-sized XRay data.",
43	Args: std::make_error_code(e: std::errc::invalid_argument));
44
45	DataExtractor Reader(Data, IsLittleEndian, `8`);
46	uint64_t OffsetPtr = `0`;
47	auto FileHeaderOrError = readBinaryFormatHeader(HeaderExtractor&: Reader, OffsetPtr);
48	if (!FileHeaderOrError)
49	return FileHeaderOrError.takeError();
50	FileHeader = std::move(FileHeaderOrError.get());
51
52	size_t NumReservations = llvm::divideCeil(Numerator: Reader.size() - OffsetPtr, Denominator: `32U`);
53	Records.reserve(n: NumReservations);
54
55	// Each record after the header will be 32 bytes, in the following format:
56	//
57	// (2) uint16 : record type
58	// (1) uint8 : cpu id
59	// (1) uint8 : type
60	// (4) sint32 : function id
61	// (8) uint64 : tsc
62	// (4) uint32 : thread id
63	// (4) uint32 : process id
64	// (8) - : padding
65	while (Reader.isValidOffset(offset: OffsetPtr)) {
66	if (!Reader.isValidOffsetForDataOfSize(offset: OffsetPtr, length: `32`))
67	return createStringError(
68	EC: std::make_error_code(e: std::errc::executable_format_error),
69	Fmt: "Not enough bytes to read a full record at offset %" PRId64 ".",
70	Vals: OffsetPtr);
71	auto PreReadOffset = OffsetPtr;
72	auto RecordType = Reader.getU16(offset_ptr: &OffsetPtr);
73	if (OffsetPtr == PreReadOffset)
74	return createStringError(
75	EC: std::make_error_code(e: std::errc::executable_format_error),
76	Fmt: "Failed reading record type at offset %" PRId64 ".", Vals: OffsetPtr);
77
78	switch (RecordType) {
79	case `0`: { // Normal records.
80	Records.emplace_back();
81	auto &Record = Records.back();
82	Record.RecordType = RecordType;
83
84	PreReadOffset = OffsetPtr;
85	Record.CPU = Reader.getU8(offset_ptr: &OffsetPtr);
86	if (OffsetPtr == PreReadOffset)
87	return createStringError(
88	EC: std::make_error_code(e: std::errc::executable_format_error),
89	Fmt: "Failed reading CPU field at offset %" PRId64 ".", Vals: OffsetPtr);
90
91	PreReadOffset = OffsetPtr;
92	auto Type = Reader.getU8(offset_ptr: &OffsetPtr);
93	if (OffsetPtr == PreReadOffset)
94	return createStringError(
95	EC: std::make_error_code(e: std::errc::executable_format_error),
96	Fmt: "Failed reading record type field at offset %" PRId64 ".",
97	Vals: OffsetPtr);
98
99	switch (Type) {
100	case `0`:
101	Record.Type = RecordTypes::ENTER;
102	break;
103	case `1`:
104	Record.Type = RecordTypes::EXIT;
105	break;
106	case `2`:
107	Record.Type = RecordTypes::TAIL_EXIT;
108	break;
109	case `3`:
110	Record.Type = RecordTypes::ENTER_ARG;
111	break;
112	default:
113	return createStringError(
114	EC: std::make_error_code(e: std::errc::executable_format_error),
115	Fmt: "Unknown record type '%d' at offset %" PRId64 ".", Vals: Type, Vals: OffsetPtr);
116	}
117
118	PreReadOffset = OffsetPtr;
119	Record.FuncId = Reader.getSigned(offset_ptr: &OffsetPtr, size: sizeof(int32_t));
120	if (OffsetPtr == PreReadOffset)
121	return createStringError(
122	EC: std::make_error_code(e: std::errc::executable_format_error),
123	Fmt: "Failed reading function id field at offset %" PRId64 ".",
124	Vals: OffsetPtr);
125
126	PreReadOffset = OffsetPtr;
127	Record.TSC = Reader.getU64(offset_ptr: &OffsetPtr);
128	if (OffsetPtr == PreReadOffset)
129	return createStringError(
130	EC: std::make_error_code(e: std::errc::executable_format_error),
131	Fmt: "Failed reading TSC field at offset %" PRId64 ".", Vals: OffsetPtr);
132
133	PreReadOffset = OffsetPtr;
134	Record.TId = Reader.getU32(offset_ptr: &OffsetPtr);
135	if (OffsetPtr == PreReadOffset)
136	return createStringError(
137	EC: std::make_error_code(e: std::errc::executable_format_error),
138	Fmt: "Failed reading thread id field at offset %" PRId64 ".", Vals: OffsetPtr);
139
140	PreReadOffset = OffsetPtr;
141	Record.PId = Reader.getU32(offset_ptr: &OffsetPtr);
142	if (OffsetPtr == PreReadOffset)
143	return createStringError(
144	EC: std::make_error_code(e: std::errc::executable_format_error),
145	Fmt: "Failed reading process id at offset %" PRId64 ".", Vals: OffsetPtr);
146
147	break;
148	}
149	case `1`: { // Arg payload record.
150	auto &Record = Records.back();
151
152	// We skip the next two bytes of the record, because we don't need the
153	// type and the CPU record for arg payloads.
154	OffsetPtr += `2`;
155	PreReadOffset = OffsetPtr;
156	int32_t FuncId = Reader.getSigned(offset_ptr: &OffsetPtr, size: sizeof(int32_t));
157	if (OffsetPtr == PreReadOffset)
158	return createStringError(
159	EC: std::make_error_code(e: std::errc::executable_format_error),
160	Fmt: "Failed reading function id field at offset %" PRId64 ".",
161	Vals: OffsetPtr);
162
163	PreReadOffset = OffsetPtr;
164	auto TId = Reader.getU32(offset_ptr: &OffsetPtr);
165	if (OffsetPtr == PreReadOffset)
166	return createStringError(
167	EC: std::make_error_code(e: std::errc::executable_format_error),
168	Fmt: "Failed reading thread id field at offset %" PRId64 ".", Vals: OffsetPtr);
169
170	PreReadOffset = OffsetPtr;
171	auto PId = Reader.getU32(offset_ptr: &OffsetPtr);
172	if (OffsetPtr == PreReadOffset)
173	return createStringError(
174	EC: std::make_error_code(e: std::errc::executable_format_error),
175	Fmt: "Failed reading process id field at offset %" PRId64 ".",
176	Vals: OffsetPtr);
177
178	// Make a check for versions above 3 for the Pid field
179	if (Record.FuncId != FuncId \|\| Record.TId != TId \|\|
180	(FileHeader.Version >= `3` ? Record.PId != PId : false))
181	return createStringError(
182	EC: std::make_error_code(e: std::errc::executable_format_error),
183	Fmt: "Corrupted log, found arg payload following non-matching "
184	"function+thread record. Record for function %d != %d at offset "
185	"%" PRId64 ".",
186	Vals: Record.FuncId, Vals: FuncId, Vals: OffsetPtr);
187
188	PreReadOffset = OffsetPtr;
189	auto Arg = Reader.getU64(offset_ptr: &OffsetPtr);
190	if (OffsetPtr == PreReadOffset)
191	return createStringError(
192	EC: std::make_error_code(e: std::errc::executable_format_error),
193	Fmt: "Failed reading argument payload at offset %" PRId64 ".",
194	Vals: OffsetPtr);
195
196	Record.CallArgs.push_back(x: Arg);
197	break;
198	}
199	default:
200	return createStringError(
201	EC: std::make_error_code(e: std::errc::executable_format_error),
202	Fmt: "Unknown record type '%d' at offset %" PRId64 ".", Vals: RecordType,
203	Vals: OffsetPtr);
204	}
205	// Advance the offset pointer enough bytes to align to 32-byte records for
206	// basic mode logs.
207	OffsetPtr += `8`;
208	}
209	return Error::success();
210	}
211
212	/// Reads a log in FDR mode for version 1 of this binary format. FDR mode is
213	/// defined as part of the compiler-rt project in xray_fdr_logging.h, and such
214	/// a log consists of the familiar 32 bit XRayHeader, followed by sequences of
215	/// of interspersed 16 byte Metadata Records and 8 byte Function Records.
216	///
217	/// The following is an attempt to document the grammar of the format, which is
218	/// parsed by this function for little-endian machines. Since the format makes
219	/// use of BitFields, when we support big-endian architectures, we will need to
220	/// adjust not only the endianness parameter to llvm's RecordExtractor, but also
221	/// the bit twiddling logic, which is consistent with the little-endian
222	/// convention that BitFields within a struct will first be packed into the
223	/// least significant bits the address they belong to.
224	///
225	/// We expect a format complying with the grammar in the following pseudo-EBNF
226	/// in Version 1 of the FDR log.
227	///
228	/// FDRLog: XRayFileHeader ThreadBuffer*
229	/// XRayFileHeader: 32 bytes to identify the log as FDR with machine metadata.
230	/// Includes BufferSize
231	/// ThreadBuffer: NewBuffer WallClockTime NewCPUId FunctionSequence EOB
232	/// BufSize: 8 byte unsigned integer indicating how large the buffer is.
233	/// NewBuffer: 16 byte metadata record with Thread Id.
234	/// WallClockTime: 16 byte metadata record with human readable time.
235	/// Pid: 16 byte metadata record with Pid
236	/// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading.
237	/// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize.
238	/// FunctionSequence: NewCPUId \| TSCWrap \| FunctionRecord
239	/// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading.
240	/// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta.
241	///
242	/// In Version 2, we make the following changes:
243	///
244	/// ThreadBuffer: BufferExtents NewBuffer WallClockTime NewCPUId
245	/// FunctionSequence
246	/// BufferExtents: 16 byte metdata record describing how many usable bytes are
247	/// in the buffer. This is measured from the start of the buffer
248	/// and must always be at least 48 (bytes).
249	///
250	/// In Version 3, we make the following changes:
251	///
252	/// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId
253	/// FunctionSequence
254	/// EOB: deprecated
255	///
256	/// In Version 4, we make the following changes:
257	///
258	/// CustomEventRecord now includes the CPU data.
259	///
260	/// In Version 5, we make the following changes:
261	///
262	/// CustomEventRecord and TypedEventRecord now use TSC delta encoding similar to
263	/// what FunctionRecord instances use, and we no longer need to include the CPU
264	/// id in the CustomEventRecord.
265	///
266	static Error loadFDRLog(StringRef Data, bool IsLittleEndian,
267	XRayFileHeader &FileHeader,
268	std::vector<XRayRecord> &Records) {
269
270	if (Data.size() < `32`)
271	return createStringError(EC: std::make_error_code(e: std::errc::invalid_argument),
272	S: "Not enough bytes for an XRay FDR log.");
273	DataExtractor DE(Data, IsLittleEndian, `8`);
274
275	uint64_t OffsetPtr = `0`;
276	auto FileHeaderOrError = readBinaryFormatHeader(HeaderExtractor&: DE, OffsetPtr);
277	if (!FileHeaderOrError)
278	return FileHeaderOrError.takeError();
279	FileHeader = std::move(FileHeaderOrError.get());
280
281	// First we load the records into memory.
282	std::vector<std::unique_ptr<Record>> FDRRecords;
283
284	{
285	FileBasedRecordProducer P(FileHeader, DE, OffsetPtr);
286	LogBuilderConsumer C(FDRRecords);
287	while (DE.isValidOffsetForDataOfSize(offset: OffsetPtr, length: `1`)) {
288	auto R = P.produce();
289	if (!R)
290	return R.takeError();
291	if (auto E = C.consume(R: std::move(R.get())))
292	return E;
293	}
294	}
295
296	// Next we index the records into blocks.
297	BlockIndexer::Index Index;
298	{
299	BlockIndexer Indexer(Index);
300	for (auto &R : FDRRecords)
301	if (auto E = R ->apply(V&: Indexer))
302	return E;
303	if (auto E = Indexer.flush())
304	return E;
305	}
306
307	// Then we verify the consistency of the blocks.
308	{
309	for (auto &PTB : Index) {
310	auto &Blocks = PTB.second;
311	for (auto &B : Blocks) {
312	BlockVerifier Verifier;
313	for (auto *R : B.Records)
314	if (auto E = R->apply(V&: Verifier))
315	return E;
316	if (auto E = Verifier.verify())
317	return E;
318	}
319	}
320	}
321
322	// This is now the meat of the algorithm. Here we sort the blocks according to
323	// the Walltime record in each of the blocks for the same thread. This allows
324	// us to more consistently recreate the execution trace in temporal order.
325	// After the sort, we then reconstitute `Trace` records using a stateful
326	// visitor associated with a single process+thread pair.
327	{
328	for (auto &PTB : Index) {
329	auto &Blocks = PTB.second;
330	llvm::sort(C&: Blocks, Comp: [](const BlockIndexer::Block &L,
331	const BlockIndexer::Block &R) {
332	return (L.WallclockTime->seconds() < R.WallclockTime->seconds() &&
333	L.WallclockTime->nanos() < R.WallclockTime->nanos());
334	});
335	auto Adder = [&](const XRayRecord &R) { Records.push_back(x: R); };
336	TraceExpander Expander(Adder, FileHeader.Version);
337	for (auto &B : Blocks) {
338	for (auto *R : B.Records)
339	if (auto E = R->apply(V&: Expander))
340	return E;
341	}
342	if (auto E = Expander.flush())
343	return E;
344	}
345	}
346
347	return Error::success();
348	}
349
350	static Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
351	std::vector<XRayRecord> &Records) {
352	YAMLXRayTrace Trace;
353	Input In(Data);
354	In >> Trace;
355	if (In.error())
356	return make_error<StringError>(Args: "Failed loading YAML Data.", Args: In.error());
357
358	FileHeader.Version = Trace.Header.Version;
359	FileHeader.Type = Trace.Header.Type;
360	FileHeader.ConstantTSC = Trace.Header.ConstantTSC;
361	FileHeader.NonstopTSC = Trace.Header.NonstopTSC;
362	FileHeader.CycleFrequency = Trace.Header.CycleFrequency;
363
364	if (FileHeader.Version != `1`)
365	return make_error<StringError>(
366	Args: Twine ("Unsupported XRay file version: ") + Twine (FileHeader.Version),
367	Args: std::make_error_code(e: std::errc::invalid_argument));
368
369	Records.clear();
370	std::transform(first: Trace.Records.begin(), last: Trace.Records.end(),
371	result: std::back_inserter(x&: Records), unary_op: [&](const YAMLXRayRecord &R) {
372	return XRayRecord{.RecordType: R.RecordType, .CPU: R.CPU, .Type: R.Type,
373	.FuncId: R.FuncId, .TSC: R.TSC, .TId: R.TId,
374	.PId: R.PId, .CallArgs: R.CallArgs, .Data: R.Data};
375	});
376	return Error::success();
377	}
378
379	Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
380	Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Name: Filename);
381	if (!FdOrErr)
382	return FdOrErr.takeError();
383
384	uint64_t FileSize;
385	if (auto EC = sys::fs::file_size(Path: Filename, Result&: FileSize)) {
386	return make_error<StringError>(
387	Args: Twine ("Cannot read log from '") + Filename + "'", Args&: EC);
388	}
389	if (FileSize < `4`) {
390	return make_error<StringError>(
391	Args: Twine ("File '") + Filename + "' too small for XRay.",
392	Args: std::make_error_code(e: std::errc::executable_format_error));
393	}
394
395	// Map the opened file into memory and use a StringRef to access it later.
396	std::error_code EC;
397	sys::fs::mapped_file_region MappedFile(
398	*FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, `0`,
399	EC);
400	sys::fs::closeFile(F&: *FdOrErr);
401	if (EC) {
402	return make_error<StringError>(
403	Args: Twine ("Cannot read log from '") + Filename + "'", Args&: EC);
404	}
405	auto Data = StringRef (MappedFile.data(), MappedFile.size());
406
407	// TODO: Lift the endianness and implementation selection here.
408	DataExtractor LittleEndianDE(Data, true, `8`);
409	auto TraceOrError = loadTrace(Extractor: LittleEndianDE, Sort);
410	if (!TraceOrError) {
411	DataExtractor BigEndianDE(Data, false, `8`);
412	consumeError(Err: TraceOrError.takeError());
413	TraceOrError = loadTrace(Extractor: BigEndianDE, Sort);
414	}
415	return TraceOrError;
416	}
417
418	Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
419	// Attempt to detect the file type using file magic. We have a slight bias
420	// towards the binary format, and we do this by making sure that the first 4
421	// bytes of the binary file is some combination of the following byte
422	// patterns: (observe the code loading them assumes they're little endian)
423	//
424	// 0x01 0x00 0x00 0x00 - version 1, "naive" format
425	// 0x01 0x00 0x01 0x00 - version 1, "flight data recorder" format
426	// 0x02 0x00 0x01 0x00 - version 2, "flight data recorder" format
427	//
428	// YAML files don't typically have those first four bytes as valid text so we
429	// try loading assuming YAML if we don't find these bytes.
430	//
431	// Only if we can't load either the binary or the YAML format will we yield an
432	// error.
433	DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), `8`);
434	uint64_t OffsetPtr = `0`;
435	uint16_t Version = HeaderExtractor.getU16(offset_ptr: &OffsetPtr);
436	uint16_t Type = HeaderExtractor.getU16(offset_ptr: &OffsetPtr);
437
438	enum BinaryFormatType { NAIVE_FORMAT = `0`, FLIGHT_DATA_RECORDER_FORMAT = `1` };
439
440	Trace T;
441	switch (Type) {
442	case NAIVE_FORMAT:
443	if (Version == `1` \|\| Version == `2` \|\| Version == `3`) {
444	if (auto E = loadNaiveFormatLog(Data: DE.getData(), IsLittleEndian: DE.isLittleEndian(),
445	FileHeader&: T.FileHeader, Records&: T.Records))
446	return std::move(E);
447	} else {
448	return make_error<StringError>(
449	Args: Twine ("Unsupported version for Basic/Naive Mode logging: ") +
450	Twine (Version),
451	Args: std::make_error_code(e: std::errc::executable_format_error));
452	}
453	break;
454	case FLIGHT_DATA_RECORDER_FORMAT:
455	if (Version >= `1` && Version <= `5`) {
456	if (auto E = loadFDRLog(Data: DE.getData(), IsLittleEndian: DE.isLittleEndian(), FileHeader&: T.FileHeader,
457	Records&: T.Records))
458	return std::move(E);
459	} else {
460	return make_error<StringError>(
461	Args: Twine ("Unsupported version for FDR Mode logging: ") + Twine (Version),
462	Args: std::make_error_code(e: std::errc::executable_format_error));
463	}
464	break;
465	default:
466	if (auto E = loadYAMLLog(Data: DE.getData(), FileHeader&: T.FileHeader, Records&: T.Records))
467	return std::move(E);
468	}
469
470	if (Sort)
471	llvm::stable_sort(Range&: T.Records, C: [&](const XRayRecord &L, const XRayRecord &R) {
472	return L.TSC < R.TSC;
473	});
474
475	return std::move(T);
476	}
477

Browse the source code of llvm_projects/llvm/lib/XRay/Trace.cpp