1 | //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Register objects for access by profilers via the perf JIT interface. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" |
14 | |
15 | #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" |
16 | |
17 | #include "llvm/Support/FileSystem.h" |
18 | #include "llvm/Support/MemoryBuffer.h" |
19 | #include "llvm/Support/Path.h" |
20 | #include "llvm/Support/Process.h" |
21 | #include "llvm/Support/Threading.h" |
22 | |
23 | #include <mutex> |
24 | #include <optional> |
25 | |
26 | #ifdef __linux__ |
27 | |
28 | #include <sys/mman.h> // mmap() |
29 | #include <time.h> // clock_gettime(), time(), localtime_r() */ |
30 | #include <unistd.h> // for read(), close() |
31 | |
32 | #define DEBUG_TYPE "orc" |
33 | |
34 | // language identifier (XXX: should we generate something better from debug |
35 | // info?) |
36 | #define JIT_LANG "llvm-IR" |
37 | #define LLVM_PERF_JIT_MAGIC \ |
38 | ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ |
39 | (uint32_t)'D') |
40 | #define LLVM_PERF_JIT_VERSION 1 |
41 | |
42 | using namespace llvm; |
43 | using namespace llvm::orc; |
44 | |
45 | struct PerfState { |
46 | // cache lookups |
47 | uint32_t Pid; |
48 | |
49 | // base directory for output data |
50 | std::string JitPath; |
51 | |
52 | // output data stream, closed via Dumpstream |
53 | int DumpFd = -1; |
54 | |
55 | // output data stream |
56 | std::unique_ptr<raw_fd_ostream> Dumpstream; |
57 | |
58 | // perf mmap marker |
59 | void *MarkerAddr = NULL; |
60 | }; |
61 | |
62 | // prevent concurrent dumps from messing up the output file |
63 | static std::mutex Mutex; |
64 | static std::optional<PerfState> State; |
65 | |
66 | struct { |
67 | uint32_t ; |
68 | uint32_t ; |
69 | uint64_t ; |
70 | }; |
71 | |
72 | struct DIR { |
73 | RecHeader Prefix; |
74 | uint64_t CodeAddr; |
75 | uint64_t NrEntry; |
76 | }; |
77 | |
78 | struct DIE { |
79 | uint64_t CodeAddr; |
80 | uint32_t Line; |
81 | uint32_t Discrim; |
82 | }; |
83 | |
84 | struct CLR { |
85 | RecHeader Prefix; |
86 | uint32_t Pid; |
87 | uint32_t Tid; |
88 | uint64_t Vma; |
89 | uint64_t CodeAddr; |
90 | uint64_t CodeSize; |
91 | uint64_t CodeIndex; |
92 | }; |
93 | |
94 | struct UWR { |
95 | RecHeader Prefix; |
96 | uint64_t UnwindDataSize; |
97 | uint64_t ; |
98 | uint64_t MappedSize; |
99 | }; |
100 | |
101 | static inline uint64_t timespec_to_ns(const struct timespec *TS) { |
102 | const uint64_t NanoSecPerSec = 1000000000; |
103 | return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec; |
104 | } |
105 | |
106 | static inline uint64_t perf_get_timestamp() { |
107 | timespec TS; |
108 | if (clock_gettime(CLOCK_MONOTONIC, tp: &TS)) |
109 | return 0; |
110 | |
111 | return timespec_to_ns(TS: &TS); |
112 | } |
113 | |
114 | static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { |
115 | assert(State && "PerfState not initialized" ); |
116 | LLVM_DEBUG(dbgs() << "Writing debug record with " |
117 | << DebugRecord.Entries.size() << " entries\n" ); |
118 | [[maybe_unused]] size_t Written = 0; |
119 | DIR Dir{.Prefix: RecHeader{.Id: static_cast<uint32_t>(DebugRecord.Prefix.Id), |
120 | .TotalSize: DebugRecord.Prefix.TotalSize, .Timestamp: perf_get_timestamp()}, |
121 | .CodeAddr: DebugRecord.CodeAddr, .NrEntry: DebugRecord.Entries.size()}; |
122 | State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Dir), Size: sizeof(Dir)); |
123 | Written += sizeof(Dir); |
124 | for (auto &Die : DebugRecord.Entries) { |
125 | DIE d{.CodeAddr: Die.Addr, .Line: Die.Lineno, .Discrim: Die.Discrim}; |
126 | State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&d), Size: sizeof(d)); |
127 | State->Dumpstream->write(Ptr: Die.Name.data(), Size: Die.Name.size() + 1); |
128 | Written += sizeof(d) + Die.Name.size() + 1; |
129 | } |
130 | LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n" ); |
131 | } |
132 | |
133 | static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { |
134 | assert(State && "PerfState not initialized" ); |
135 | uint32_t Tid = get_threadid(); |
136 | LLVM_DEBUG(dbgs() << "Writing code record with code size " |
137 | << CodeRecord.CodeSize << " and code index " |
138 | << CodeRecord.CodeIndex << "\n" ); |
139 | CLR Clr{.Prefix: RecHeader{.Id: static_cast<uint32_t>(CodeRecord.Prefix.Id), |
140 | .TotalSize: CodeRecord.Prefix.TotalSize, .Timestamp: perf_get_timestamp()}, |
141 | .Pid: State->Pid, |
142 | .Tid: Tid, |
143 | .Vma: CodeRecord.Vma, |
144 | .CodeAddr: CodeRecord.CodeAddr, |
145 | .CodeSize: CodeRecord.CodeSize, |
146 | .CodeIndex: CodeRecord.CodeIndex}; |
147 | LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, " |
148 | << CodeRecord.Name.size() + 1 << " bytes of name, " |
149 | << CodeRecord.CodeSize << " bytes of code\n" ); |
150 | State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Clr), Size: sizeof(Clr)); |
151 | State->Dumpstream->write(Ptr: CodeRecord.Name.data(), Size: CodeRecord.Name.size() + 1); |
152 | State->Dumpstream->write(Ptr: (const char *)CodeRecord.CodeAddr, |
153 | Size: CodeRecord.CodeSize); |
154 | } |
155 | |
156 | static void |
157 | writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { |
158 | assert(State && "PerfState not initialized" ); |
159 | dbgs() << "Writing unwind record with unwind data size " |
160 | << UnwindRecord.UnwindDataSize << " and EH frame header size " |
161 | << UnwindRecord.EHFrameHdrSize << " and mapped size " |
162 | << UnwindRecord.MappedSize << "\n" ; |
163 | UWR Uwr{.Prefix: RecHeader{.Id: static_cast<uint32_t>(UnwindRecord.Prefix.Id), |
164 | .TotalSize: UnwindRecord.Prefix.TotalSize, .Timestamp: perf_get_timestamp()}, |
165 | .UnwindDataSize: UnwindRecord.UnwindDataSize, .EhFrameHeaderSize: UnwindRecord.EHFrameHdrSize, |
166 | .MappedSize: UnwindRecord.MappedSize}; |
167 | LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, " |
168 | << UnwindRecord.EHFrameHdrSize |
169 | << " bytes of EH frame header, " |
170 | << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize |
171 | << " bytes of EH frame\n" ); |
172 | State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Uwr), Size: sizeof(Uwr)); |
173 | if (UnwindRecord.EHFrameHdrAddr) |
174 | State->Dumpstream->write(Ptr: (const char *)UnwindRecord.EHFrameHdrAddr, |
175 | Size: UnwindRecord.EHFrameHdrSize); |
176 | else |
177 | State->Dumpstream->write(Ptr: UnwindRecord.EHFrameHdr.data(), |
178 | Size: UnwindRecord.EHFrameHdrSize); |
179 | State->Dumpstream->write(Ptr: (const char *)UnwindRecord.EHFrameAddr, |
180 | Size: UnwindRecord.UnwindDataSize - |
181 | UnwindRecord.EHFrameHdrSize); |
182 | } |
183 | |
184 | static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) { |
185 | if (!State) |
186 | return make_error<StringError>(Args: "PerfState not initialized" , |
187 | Args: inconvertibleErrorCode()); |
188 | |
189 | // Serialize the batch |
190 | std::lock_guard<std::mutex> Lock(Mutex); |
191 | if (Batch.UnwindingRecord.Prefix.TotalSize > 0) |
192 | writeUnwindRecord(UnwindRecord: Batch.UnwindingRecord); |
193 | |
194 | for (const auto &DebugInfo : Batch.DebugInfoRecords) |
195 | writeDebugRecord(DebugRecord: DebugInfo); |
196 | |
197 | for (const auto &CodeLoad : Batch.CodeLoadRecords) |
198 | writeCodeRecord(CodeRecord: CodeLoad); |
199 | |
200 | State->Dumpstream->flush(); |
201 | |
202 | return Error::success(); |
203 | } |
204 | |
205 | struct { |
206 | uint32_t ; // characters "JiTD" |
207 | uint32_t ; // header version |
208 | uint32_t ; // total size of header |
209 | uint32_t ; // elf mach target |
210 | uint32_t ; // reserved |
211 | uint32_t ; |
212 | uint64_t ; // timestamp |
213 | uint64_t ; // flags |
214 | }; |
215 | |
216 | static Error OpenMarker(PerfState &State) { |
217 | // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap |
218 | // is captured either live (perf record running when we mmap) or in deferred |
219 | // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump |
220 | // file for more meta data info about the jitted code. Perf report/annotate |
221 | // detect this special filename and process the jitdump file. |
222 | // |
223 | // Mapping must be PROT_EXEC to ensure it is captured by perf record |
224 | // even when not using -d option. |
225 | State.MarkerAddr = |
226 | ::mmap(NULL, len: sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC, |
227 | MAP_PRIVATE, fd: State.DumpFd, offset: 0); |
228 | |
229 | if (State.MarkerAddr == MAP_FAILED) |
230 | return make_error<llvm::StringError>(Args: "could not mmap JIT marker" , |
231 | Args: inconvertibleErrorCode()); |
232 | |
233 | return Error::success(); |
234 | } |
235 | |
236 | void CloseMarker(PerfState &State) { |
237 | if (!State.MarkerAddr) |
238 | return; |
239 | |
240 | munmap(addr: State.MarkerAddr, len: sys::Process::getPageSizeEstimate()); |
241 | State.MarkerAddr = nullptr; |
242 | } |
243 | |
244 | static Expected<Header> FillMachine(PerfState &State) { |
245 | Header Hdr; |
246 | Hdr.Magic = LLVM_PERF_JIT_MAGIC; |
247 | Hdr.Version = LLVM_PERF_JIT_VERSION; |
248 | Hdr.TotalSize = sizeof(Hdr); |
249 | Hdr.Pid = State.Pid; |
250 | Hdr.Timestamp = perf_get_timestamp(); |
251 | |
252 | char Id[16]; |
253 | struct { |
254 | uint16_t e_type; |
255 | uint16_t e_machine; |
256 | } Info; |
257 | |
258 | size_t RequiredMemory = sizeof(Id) + sizeof(Info); |
259 | |
260 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
261 | MemoryBuffer::getFileSlice(Filename: "/proc/self/exe" , MapSize: RequiredMemory, Offset: 0); |
262 | |
263 | // This'll not guarantee that enough data was actually read from the |
264 | // underlying file. Instead the trailing part of the buffer would be |
265 | // zeroed. Given the ELF signature check below that seems ok though, |
266 | // it's unlikely that the file ends just after that, and the |
267 | // consequence would just be that perf wouldn't recognize the |
268 | // signature. |
269 | if (!MB) |
270 | return make_error<llvm::StringError>(Args: "could not open /proc/self/exe" , |
271 | Args: MB.getError()); |
272 | |
273 | memcpy(dest: &Id, src: (*MB)->getBufferStart(), n: sizeof(Id)); |
274 | memcpy(dest: &Info, src: (*MB)->getBufferStart() + sizeof(Id), n: sizeof(Info)); |
275 | |
276 | // check ELF signature |
277 | if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F') |
278 | return make_error<llvm::StringError>(Args: "invalid ELF signature" , |
279 | Args: inconvertibleErrorCode()); |
280 | |
281 | Hdr.ElfMach = Info.e_machine; |
282 | |
283 | return Hdr; |
284 | } |
285 | |
286 | static Error InitDebuggingDir(PerfState &State) { |
287 | time_t Time; |
288 | struct tm LocalTime; |
289 | char TimeBuffer[sizeof("YYYYMMDD" )]; |
290 | SmallString<64> Path; |
291 | |
292 | // search for location to dump data to |
293 | if (const char *BaseDir = getenv(name: "JITDUMPDIR" )) |
294 | Path.append(RHS: BaseDir); |
295 | else if (!sys::path::home_directory(result&: Path)) |
296 | Path = "." ; |
297 | |
298 | // create debug directory |
299 | Path += "/.debug/jit/" ; |
300 | if (auto EC = sys::fs::create_directories(path: Path)) { |
301 | std::string ErrStr; |
302 | raw_string_ostream ErrStream(ErrStr); |
303 | ErrStream << "could not create jit cache directory " << Path << ": " |
304 | << EC.message() << "\n" ; |
305 | return make_error<StringError>(Args: std::move(ErrStr), Args: inconvertibleErrorCode()); |
306 | } |
307 | |
308 | // create unique directory for dump data related to this process |
309 | time(timer: &Time); |
310 | localtime_r(timer: &Time, tp: &LocalTime); |
311 | strftime(s: TimeBuffer, maxsize: sizeof(TimeBuffer), format: "%Y%m%d" , tp: &LocalTime); |
312 | Path += JIT_LANG "-jit-" ; |
313 | Path += TimeBuffer; |
314 | |
315 | SmallString<128> UniqueDebugDir; |
316 | |
317 | using sys::fs::createUniqueDirectory; |
318 | if (auto EC = createUniqueDirectory(Prefix: Path, ResultPath&: UniqueDebugDir)) { |
319 | std::string ErrStr; |
320 | raw_string_ostream ErrStream(ErrStr); |
321 | ErrStream << "could not create unique jit cache directory " |
322 | << UniqueDebugDir << ": " << EC.message() << "\n" ; |
323 | return make_error<StringError>(Args: std::move(ErrStr), Args: inconvertibleErrorCode()); |
324 | } |
325 | |
326 | State.JitPath = std::string(UniqueDebugDir); |
327 | |
328 | return Error::success(); |
329 | } |
330 | |
331 | static Error registerJITLoaderPerfStartImpl() { |
332 | PerfState Tentative; |
333 | Tentative.Pid = sys::Process::getProcessId(); |
334 | // check if clock-source is supported |
335 | if (!perf_get_timestamp()) |
336 | return make_error<StringError>(Args: "kernel does not support CLOCK_MONOTONIC" , |
337 | Args: inconvertibleErrorCode()); |
338 | |
339 | if (auto Err = InitDebuggingDir(State&: Tentative)) |
340 | return Err; |
341 | |
342 | std::string Filename; |
343 | raw_string_ostream FilenameBuf(Filename); |
344 | FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump" ; |
345 | |
346 | // Need to open ourselves, because we need to hand the FD to OpenMarker() and |
347 | // raw_fd_ostream doesn't expose the FD. |
348 | using sys::fs::openFileForWrite; |
349 | if (auto EC = openFileForReadWrite(Name: FilenameBuf.str(), ResultFD&: Tentative.DumpFd, |
350 | Disp: sys::fs::CD_CreateNew, Flags: sys::fs::OF_None)) { |
351 | std::string ErrStr; |
352 | raw_string_ostream ErrStream(ErrStr); |
353 | ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": " |
354 | << EC.message() << "\n" ; |
355 | return make_error<StringError>(Args: std::move(ErrStr), Args: inconvertibleErrorCode()); |
356 | } |
357 | |
358 | Tentative.Dumpstream = |
359 | std::make_unique<raw_fd_ostream>(args&: Tentative.DumpFd, args: true); |
360 | |
361 | auto = FillMachine(State&: Tentative); |
362 | if (!Header) |
363 | return Header.takeError(); |
364 | |
365 | // signal this process emits JIT information |
366 | if (auto Err = OpenMarker(State&: Tentative)) |
367 | return Err; |
368 | |
369 | Tentative.Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Header.get()), |
370 | Size: sizeof(*Header)); |
371 | |
372 | // Everything initialized, can do profiling now. |
373 | if (Tentative.Dumpstream->has_error()) |
374 | return make_error<StringError>(Args: "could not write JIT dump header" , |
375 | Args: inconvertibleErrorCode()); |
376 | |
377 | State = std::move(Tentative); |
378 | return Error::success(); |
379 | } |
380 | |
381 | static Error registerJITLoaderPerfEndImpl() { |
382 | if (!State) |
383 | return make_error<StringError>(Args: "PerfState not initialized" , |
384 | Args: inconvertibleErrorCode()); |
385 | |
386 | RecHeader Close; |
387 | Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE); |
388 | Close.TotalSize = sizeof(Close); |
389 | Close.Timestamp = perf_get_timestamp(); |
390 | State->Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Close), |
391 | Size: sizeof(Close)); |
392 | if (State->MarkerAddr) |
393 | CloseMarker(State&: *State); |
394 | |
395 | State.reset(); |
396 | return Error::success(); |
397 | } |
398 | |
399 | extern "C" llvm::orc::shared::CWrapperFunctionResult |
400 | llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { |
401 | using namespace orc::shared; |
402 | return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle( |
403 | ArgData: Data, ArgSize: Size, Handler&: registerJITLoaderPerfImpl) |
404 | .release(); |
405 | } |
406 | |
407 | extern "C" llvm::orc::shared::CWrapperFunctionResult |
408 | llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { |
409 | using namespace orc::shared; |
410 | return WrapperFunction<SPSError()>::handle(ArgData: Data, ArgSize: Size, |
411 | Handler&: registerJITLoaderPerfStartImpl) |
412 | .release(); |
413 | } |
414 | |
415 | extern "C" llvm::orc::shared::CWrapperFunctionResult |
416 | llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { |
417 | using namespace orc::shared; |
418 | return WrapperFunction<SPSError()>::handle(ArgData: Data, ArgSize: Size, |
419 | Handler&: registerJITLoaderPerfEndImpl) |
420 | .release(); |
421 | } |
422 | |
423 | #else |
424 | |
425 | using namespace llvm; |
426 | using namespace llvm::orc; |
427 | |
428 | static Error badOS() { |
429 | using namespace llvm; |
430 | return llvm::make_error<StringError>( |
431 | "unsupported OS (perf support is only available on linux!)" , |
432 | inconvertibleErrorCode()); |
433 | } |
434 | |
435 | static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); } |
436 | |
437 | extern "C" llvm::orc::shared::CWrapperFunctionResult |
438 | llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { |
439 | using namespace shared; |
440 | return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size, |
441 | badOSBatch) |
442 | .release(); |
443 | } |
444 | |
445 | extern "C" llvm::orc::shared::CWrapperFunctionResult |
446 | llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { |
447 | using namespace shared; |
448 | return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); |
449 | } |
450 | |
451 | extern "C" llvm::orc::shared::CWrapperFunctionResult |
452 | llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { |
453 | using namespace shared; |
454 | return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); |
455 | } |
456 | |
457 | #endif |
458 | |