1//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Register objects for access by profilers via the perf JIT interface.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
14
15#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
16
17#include "llvm/Support/FileSystem.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/Support/Path.h"
20#include "llvm/Support/Process.h"
21#include "llvm/Support/Threading.h"
22
23#include <mutex>
24#include <optional>
25
26#ifdef __linux__
27
28#include <sys/mman.h> // mmap()
29#include <time.h> // clock_gettime(), time(), localtime_r() */
30
31#define DEBUG_TYPE "orc"
32
33// language identifier (XXX: should we generate something better from debug
34// info?)
35#define JIT_LANG "llvm-IR"
36#define LLVM_PERF_JIT_MAGIC \
37 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
38 (uint32_t)'D')
39#define LLVM_PERF_JIT_VERSION 1
40
41using namespace llvm;
42using namespace llvm::orc;
43
44struct PerfState {
45 // cache lookups
46 uint32_t Pid;
47
48 // base directory for output data
49 std::string JitPath;
50
51 // output data stream, closed via Dumpstream
52 int DumpFd = -1;
53
54 // output data stream
55 std::unique_ptr<raw_fd_ostream> Dumpstream;
56
57 // perf mmap marker
58 void *MarkerAddr = NULL;
59};
60
61// prevent concurrent dumps from messing up the output file
62static std::mutex Mutex;
63static std::optional<PerfState> State;
64
65struct RecHeader {
66 uint32_t Id;
67 uint32_t TotalSize;
68 uint64_t Timestamp;
69};
70
71struct DIR {
72 RecHeader Prefix;
73 uint64_t CodeAddr;
74 uint64_t NrEntry;
75};
76
77struct DIE {
78 uint64_t CodeAddr;
79 uint32_t Line;
80 uint32_t Discrim;
81};
82
83struct CLR {
84 RecHeader Prefix;
85 uint32_t Pid;
86 uint32_t Tid;
87 uint64_t Vma;
88 uint64_t CodeAddr;
89 uint64_t CodeSize;
90 uint64_t CodeIndex;
91};
92
93struct UWR {
94 RecHeader Prefix;
95 uint64_t UnwindDataSize;
96 uint64_t EhFrameHeaderSize;
97 uint64_t MappedSize;
98};
99
100static inline uint64_t timespec_to_ns(const struct timespec *TS) {
101 const uint64_t NanoSecPerSec = 1000000000;
102 return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
103}
104
105static inline uint64_t perf_get_timestamp() {
106 timespec TS;
107 if (clock_gettime(CLOCK_MONOTONIC, tp: &TS))
108 return 0;
109
110 return timespec_to_ns(TS: &TS);
111}
112
113static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
114 assert(State && "PerfState not initialized");
115 LLVM_DEBUG(dbgs() << "Writing debug record with "
116 << DebugRecord.Entries.size() << " entries\n");
117 [[maybe_unused]] size_t Written = 0;
118 DIR Dir{.Prefix: RecHeader{.Id: static_cast<uint32_t>(DebugRecord.Prefix.Id),
119 .TotalSize: DebugRecord.Prefix.TotalSize, .Timestamp: perf_get_timestamp()},
120 .CodeAddr: DebugRecord.CodeAddr, .NrEntry: DebugRecord.Entries.size()};
121 State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Dir), Size: sizeof(Dir));
122 Written += sizeof(Dir);
123 for (auto &Die : DebugRecord.Entries) {
124 DIE d{.CodeAddr: Die.Addr, .Line: Die.Lineno, .Discrim: Die.Discrim};
125 State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&d), Size: sizeof(d));
126 State->Dumpstream->write(Ptr: Die.Name.data(), Size: Die.Name.size() + 1);
127 Written += sizeof(d) + Die.Name.size() + 1;
128 }
129 LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
130}
131
132static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
133 assert(State && "PerfState not initialized");
134 uint32_t Tid = get_threadid();
135 LLVM_DEBUG(dbgs() << "Writing code record with code size "
136 << CodeRecord.CodeSize << " and code index "
137 << CodeRecord.CodeIndex << "\n");
138 CLR Clr{.Prefix: RecHeader{.Id: static_cast<uint32_t>(CodeRecord.Prefix.Id),
139 .TotalSize: CodeRecord.Prefix.TotalSize, .Timestamp: perf_get_timestamp()},
140 .Pid: State->Pid,
141 .Tid: Tid,
142 .Vma: CodeRecord.Vma,
143 .CodeAddr: CodeRecord.CodeAddr,
144 .CodeSize: CodeRecord.CodeSize,
145 .CodeIndex: CodeRecord.CodeIndex};
146 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "
147 << CodeRecord.Name.size() + 1 << " bytes of name, "
148 << CodeRecord.CodeSize << " bytes of code\n");
149 State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Clr), Size: sizeof(Clr));
150 State->Dumpstream->write(Ptr: CodeRecord.Name.data(), Size: CodeRecord.Name.size() + 1);
151 State->Dumpstream->write(Ptr: (const char *)CodeRecord.CodeAddr,
152 Size: CodeRecord.CodeSize);
153}
154
155static void
156writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
157 assert(State && "PerfState not initialized");
158 dbgs() << "Writing unwind record with unwind data size "
159 << UnwindRecord.UnwindDataSize << " and EH frame header size "
160 << UnwindRecord.EHFrameHdrSize << " and mapped size "
161 << UnwindRecord.MappedSize << "\n";
162 UWR Uwr{.Prefix: RecHeader{.Id: static_cast<uint32_t>(UnwindRecord.Prefix.Id),
163 .TotalSize: UnwindRecord.Prefix.TotalSize, .Timestamp: perf_get_timestamp()},
164 .UnwindDataSize: UnwindRecord.UnwindDataSize, .EhFrameHeaderSize: UnwindRecord.EHFrameHdrSize,
165 .MappedSize: UnwindRecord.MappedSize};
166 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
167 << UnwindRecord.EHFrameHdrSize
168 << " bytes of EH frame header, "
169 << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
170 << " bytes of EH frame\n");
171 State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Uwr), Size: sizeof(Uwr));
172 if (UnwindRecord.EHFrameHdrAddr)
173 State->Dumpstream->write(Ptr: (const char *)UnwindRecord.EHFrameHdrAddr,
174 Size: UnwindRecord.EHFrameHdrSize);
175 else
176 State->Dumpstream->write(Ptr: UnwindRecord.EHFrameHdr.data(),
177 Size: UnwindRecord.EHFrameHdrSize);
178 State->Dumpstream->write(Ptr: (const char *)UnwindRecord.EHFrameAddr,
179 Size: UnwindRecord.UnwindDataSize -
180 UnwindRecord.EHFrameHdrSize);
181}
182
183static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
184 if (!State)
185 return make_error<StringError>(Args: "PerfState not initialized",
186 Args: inconvertibleErrorCode());
187
188 // Serialize the batch
189 std::lock_guard<std::mutex> Lock(Mutex);
190 if (Batch.UnwindingRecord.Prefix.TotalSize > 0)
191 writeUnwindRecord(UnwindRecord: Batch.UnwindingRecord);
192
193 for (const auto &DebugInfo : Batch.DebugInfoRecords)
194 writeDebugRecord(DebugRecord: DebugInfo);
195
196 for (const auto &CodeLoad : Batch.CodeLoadRecords)
197 writeCodeRecord(CodeRecord: CodeLoad);
198
199 State->Dumpstream->flush();
200
201 return Error::success();
202}
203
204struct Header {
205 uint32_t Magic; // characters "JiTD"
206 uint32_t Version; // header version
207 uint32_t TotalSize; // total size of header
208 uint32_t ElfMach; // elf mach target
209 uint32_t Pad1; // reserved
210 uint32_t Pid;
211 uint64_t Timestamp; // timestamp
212 uint64_t Flags; // flags
213};
214
215static Error OpenMarker(PerfState &State) {
216 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
217 // is captured either live (perf record running when we mmap) or in deferred
218 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
219 // file for more meta data info about the jitted code. Perf report/annotate
220 // detect this special filename and process the jitdump file.
221 //
222 // Mapping must be PROT_EXEC to ensure it is captured by perf record
223 // even when not using -d option.
224 State.MarkerAddr =
225 ::mmap(NULL, len: sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
226 MAP_PRIVATE, fd: State.DumpFd, offset: 0);
227
228 if (State.MarkerAddr == MAP_FAILED)
229 return make_error<llvm::StringError>(Args: "could not mmap JIT marker",
230 Args: inconvertibleErrorCode());
231
232 return Error::success();
233}
234
235void CloseMarker(PerfState &State) {
236 if (!State.MarkerAddr)
237 return;
238
239 munmap(addr: State.MarkerAddr, len: sys::Process::getPageSizeEstimate());
240 State.MarkerAddr = nullptr;
241}
242
243static Expected<Header> FillMachine(PerfState &State) {
244 Header Hdr;
245 Hdr.Magic = LLVM_PERF_JIT_MAGIC;
246 Hdr.Version = LLVM_PERF_JIT_VERSION;
247 Hdr.TotalSize = sizeof(Hdr);
248 Hdr.Pid = State.Pid;
249 Hdr.Timestamp = perf_get_timestamp();
250
251 char Id[16];
252 struct {
253 uint16_t e_type;
254 uint16_t e_machine;
255 } Info;
256
257 size_t RequiredMemory = sizeof(Id) + sizeof(Info);
258
259 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
260 MemoryBuffer::getFileSlice(Filename: "/proc/self/exe", MapSize: RequiredMemory, Offset: 0);
261
262 // This'll not guarantee that enough data was actually read from the
263 // underlying file. Instead the trailing part of the buffer would be
264 // zeroed. Given the ELF signature check below that seems ok though,
265 // it's unlikely that the file ends just after that, and the
266 // consequence would just be that perf wouldn't recognize the
267 // signature.
268 if (!MB)
269 return make_error<llvm::StringError>(Args: "could not open /proc/self/exe",
270 Args: MB.getError());
271
272 memcpy(dest: &Id, src: (*MB)->getBufferStart(), n: sizeof(Id));
273 memcpy(dest: &Info, src: (*MB)->getBufferStart() + sizeof(Id), n: sizeof(Info));
274
275 // check ELF signature
276 if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')
277 return make_error<llvm::StringError>(Args: "invalid ELF signature",
278 Args: inconvertibleErrorCode());
279
280 Hdr.ElfMach = Info.e_machine;
281
282 return Hdr;
283}
284
285static Error InitDebuggingDir(PerfState &State) {
286 time_t Time;
287 struct tm LocalTime;
288 char TimeBuffer[sizeof("YYYYMMDD")];
289 SmallString<64> Path;
290
291 // search for location to dump data to
292 if (const char *BaseDir = getenv(name: "JITDUMPDIR"))
293 Path.append(RHS: BaseDir);
294 else if (!sys::path::home_directory(result&: Path))
295 Path = ".";
296
297 // create debug directory
298 Path += "/.debug/jit/";
299 if (auto EC = sys::fs::create_directories(path: Path)) {
300 std::string ErrStr;
301 raw_string_ostream ErrStream(ErrStr);
302 ErrStream << "could not create jit cache directory " << Path << ": "
303 << EC.message() << "\n";
304 return make_error<StringError>(Args: std::move(ErrStr), Args: inconvertibleErrorCode());
305 }
306
307 // create unique directory for dump data related to this process
308 time(timer: &Time);
309 localtime_r(timer: &Time, tp: &LocalTime);
310 strftime(s: TimeBuffer, maxsize: sizeof(TimeBuffer), format: "%Y%m%d", tp: &LocalTime);
311 Path += JIT_LANG "-jit-";
312 Path += TimeBuffer;
313
314 SmallString<128> UniqueDebugDir;
315
316 using sys::fs::createUniqueDirectory;
317 if (auto EC = createUniqueDirectory(Prefix: Path, ResultPath&: UniqueDebugDir)) {
318 std::string ErrStr;
319 raw_string_ostream ErrStream(ErrStr);
320 ErrStream << "could not create unique jit cache directory "
321 << UniqueDebugDir << ": " << EC.message() << "\n";
322 return make_error<StringError>(Args: std::move(ErrStr), Args: inconvertibleErrorCode());
323 }
324
325 State.JitPath = std::string(UniqueDebugDir);
326
327 return Error::success();
328}
329
330static Error registerJITLoaderPerfStartImpl() {
331 PerfState Tentative;
332 Tentative.Pid = sys::Process::getProcessId();
333 // check if clock-source is supported
334 if (!perf_get_timestamp())
335 return make_error<StringError>(Args: "kernel does not support CLOCK_MONOTONIC",
336 Args: inconvertibleErrorCode());
337
338 if (auto Err = InitDebuggingDir(State&: Tentative))
339 return Err;
340
341 std::string Filename;
342 raw_string_ostream FilenameBuf(Filename);
343 FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";
344
345 // Need to open ourselves, because we need to hand the FD to OpenMarker() and
346 // raw_fd_ostream doesn't expose the FD.
347 using sys::fs::openFileForWrite;
348 if (auto EC = openFileForReadWrite(Name: Filename, ResultFD&: Tentative.DumpFd,
349 Disp: sys::fs::CD_CreateNew, Flags: sys::fs::OF_None)) {
350 std::string ErrStr;
351 raw_string_ostream ErrStream(ErrStr);
352 ErrStream << "could not open JIT dump file " << Filename << ": "
353 << EC.message() << "\n";
354 return make_error<StringError>(Args: std::move(ErrStr), Args: inconvertibleErrorCode());
355 }
356
357 Tentative.Dumpstream =
358 std::make_unique<raw_fd_ostream>(args&: Tentative.DumpFd, args: true);
359
360 auto Header = FillMachine(State&: Tentative);
361 if (!Header)
362 return Header.takeError();
363
364 // signal this process emits JIT information
365 if (auto Err = OpenMarker(State&: Tentative))
366 return Err;
367
368 Tentative.Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Header.get()),
369 Size: sizeof(*Header));
370
371 // Everything initialized, can do profiling now.
372 if (Tentative.Dumpstream->has_error())
373 return make_error<StringError>(Args: "could not write JIT dump header",
374 Args: inconvertibleErrorCode());
375
376 State = std::move(Tentative);
377 return Error::success();
378}
379
380static Error registerJITLoaderPerfEndImpl() {
381 if (!State)
382 return make_error<StringError>(Args: "PerfState not initialized",
383 Args: inconvertibleErrorCode());
384
385 RecHeader Close;
386 Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
387 Close.TotalSize = sizeof(Close);
388 Close.Timestamp = perf_get_timestamp();
389 State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Close),
390 Size: sizeof(Close));
391 if (State->MarkerAddr)
392 CloseMarker(State&: *State);
393
394 State.reset();
395 return Error::success();
396}
397
398extern "C" llvm::orc::shared::CWrapperFunctionResult
399llvm_orc_registerJITLoaderPerfImpl(const char *ArgData, size_t ArgSize) {
400 using namespace orc::shared;
401 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
402 ArgData, ArgSize, Handler&: registerJITLoaderPerfImpl)
403 .release();
404}
405
406extern "C" llvm::orc::shared::CWrapperFunctionResult
407llvm_orc_registerJITLoaderPerfStart(const char *ArgData, size_t ArgSize) {
408 using namespace orc::shared;
409 return WrapperFunction<SPSError()>::handle(ArgData, ArgSize,
410 Handler&: registerJITLoaderPerfStartImpl)
411 .release();
412}
413
414extern "C" llvm::orc::shared::CWrapperFunctionResult
415llvm_orc_registerJITLoaderPerfEnd(const char *ArgData, size_t ArgSize) {
416 using namespace orc::shared;
417 return WrapperFunction<SPSError()>::handle(ArgData, ArgSize,
418 Handler&: registerJITLoaderPerfEndImpl)
419 .release();
420}
421
422#else
423
424using namespace llvm;
425using namespace llvm::orc;
426
427static Error badOS() {
428 using namespace llvm;
429 return llvm::make_error<StringError>(
430 "unsupported OS (perf support is only available on linux!)",
431 inconvertibleErrorCode());
432}
433
434static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
435
436extern "C" llvm::orc::shared::CWrapperFunctionResult
437llvm_orc_registerJITLoaderPerfImpl(const char *ArgData, size_t ArgSize) {
438 using namespace shared;
439 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
440 ArgData, ArgSize, badOSBatch)
441 .release();
442}
443
444extern "C" llvm::orc::shared::CWrapperFunctionResult
445llvm_orc_registerJITLoaderPerfStart(const char *ArgData, size_t ArgSize) {
446 using namespace shared;
447 return WrapperFunction<SPSError()>::handle(ArgData, ArgSize, badOS).release();
448}
449
450extern "C" llvm::orc::shared::CWrapperFunctionResult
451llvm_orc_registerJITLoaderPerfEnd(const char *ArgData, size_t ArgSize) {
452 using namespace shared;
453 return WrapperFunction<SPSError()>::handle(ArgData, ArgSize, badOS).release();
454}
455
456#endif
457