1//===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Symbolizer is used by sanitizers to map instruction address to a location in
10// source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
11// defined in the program, or (if they are missing) tries to find and
12// launch "llvm-symbolizer" commandline tool in a separate process and
13// communicate with it.
14//
15// Generally we should try to avoid calling system library functions during
16// symbolization (and use their replacements from sanitizer_libc.h instead).
17//===----------------------------------------------------------------------===//
18#ifndef SANITIZER_SYMBOLIZER_H
19#define SANITIZER_SYMBOLIZER_H
20
21#include "sanitizer_common.h"
22#include "sanitizer_mutex.h"
23#include "sanitizer_vector.h"
24
25namespace __sanitizer {
26
27struct AddressInfo {
28 // Owns all the string members. Storage for them is
29 // (de)allocated using sanitizer internal allocator.
30 uptr address;
31
32 char *module;
33 uptr module_offset;
34 ModuleArch module_arch;
35 u8 uuid[kModuleUUIDSize];
36 uptr uuid_size;
37
38 static const uptr kUnknown = ~(uptr)0;
39 char *function;
40 uptr function_offset;
41
42 char *file;
43 int line;
44 int column;
45
46 AddressInfo();
47 // Deletes all strings and resets all fields.
48 void Clear();
49 void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
50 void FillModuleInfo(const LoadedModule &mod);
51 uptr module_base() const { return address - module_offset; }
52};
53
54// Linked list of symbolized frames (each frame is described by AddressInfo).
55struct SymbolizedStack {
56 SymbolizedStack *next;
57 AddressInfo info;
58 static SymbolizedStack *New(uptr addr);
59 // Deletes current, and all subsequent frames in the linked list.
60 // The object cannot be accessed after the call to this function.
61 void ClearAll();
62
63 private:
64 SymbolizedStack();
65};
66
67class SymbolizedStackHolder {
68 SymbolizedStack *Stack;
69
70 void clear() {
71 if (Stack)
72 Stack->ClearAll();
73 }
74
75 public:
76 explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr)
77 : Stack(Stack) {}
78 ~SymbolizedStackHolder() { clear(); }
79 void reset(SymbolizedStack *S = nullptr) {
80 if (Stack != S)
81 clear();
82 Stack = S;
83 }
84 const SymbolizedStack *get() const { return Stack; }
85};
86
87// For now, DataInfo is used to describe global variable.
88struct DataInfo {
89 // Owns all the string members. Storage for them is
90 // (de)allocated using sanitizer internal allocator.
91 char *module;
92 uptr module_offset;
93 ModuleArch module_arch;
94
95 char *file;
96 uptr line;
97 char *name;
98 uptr start;
99 uptr size;
100
101 DataInfo();
102 void Clear();
103};
104
105struct LocalInfo {
106 char *function_name = nullptr;
107 char *name = nullptr;
108 char *decl_file = nullptr;
109 unsigned decl_line = 0;
110
111 bool has_frame_offset = false;
112 bool has_size = false;
113 bool has_tag_offset = false;
114
115 sptr frame_offset;
116 uptr size;
117 uptr tag_offset;
118
119 void Clear();
120};
121
122struct FrameInfo {
123 char *module;
124 uptr module_offset;
125 ModuleArch module_arch;
126
127 InternalMmapVector<LocalInfo> locals;
128 void Clear();
129};
130
131class SymbolizerTool;
132
133class Symbolizer final {
134 public:
135 /// Initialize and return platform-specific implementation of symbolizer
136 /// (if it wasn't already initialized).
137 static Symbolizer *GetOrInit();
138 static void LateInitialize();
139 // Returns a list of symbolized frames for a given address (containing
140 // all inlined functions, if necessary).
141 SymbolizedStack *SymbolizePC(uptr address);
142 bool SymbolizeData(uptr address, DataInfo *info);
143 bool SymbolizeFrame(uptr address, FrameInfo *info);
144
145 // The module names Symbolizer returns are stable and unique for every given
146 // module. It is safe to store and compare them as pointers.
147 bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
148 uptr *module_address);
149 const char *GetModuleNameForPc(uptr pc) {
150 const char *module_name = nullptr;
151 uptr unused;
152 if (GetModuleNameAndOffsetForPC(pc, module_name: &module_name, module_address: &unused))
153 return module_name;
154 return nullptr;
155 }
156
157 // Release internal caches (if any).
158 void Flush();
159 // Attempts to demangle the provided C++ mangled name. Never returns nullptr.
160 const char *Demangle(const char *name);
161
162 // Allow user to install hooks that would be called before/after Symbolizer
163 // does the actual file/line info fetching. Specific sanitizers may need this
164 // to distinguish system library calls made in user code from calls made
165 // during in-process symbolization.
166 typedef void (*StartSymbolizationHook)();
167 typedef void (*EndSymbolizationHook)();
168 // May be called at most once.
169 void AddHooks(StartSymbolizationHook start_hook,
170 EndSymbolizationHook end_hook);
171
172 void RefreshModules();
173 const LoadedModule *FindModuleForAddress(uptr address);
174
175 void InvalidateModuleList();
176
177 const ListOfModules &GetRefreshedListOfModules();
178
179 private:
180 // GetModuleNameAndOffsetForPC has to return a string to the caller.
181 // Since the corresponding module might get unloaded later, we should create
182 // our owned copies of the strings that we can safely return.
183 // ModuleNameOwner does not provide any synchronization, thus calls to
184 // its method should be protected by |mu_|.
185 class ModuleNameOwner {
186 public:
187 explicit ModuleNameOwner(Mutex *synchronized_by)
188 : last_match_(nullptr), mu_(synchronized_by) {
189 storage_.reserve(new_size: kInitialCapacity);
190 }
191 const char *GetOwnedCopy(const char *str);
192
193 private:
194 static const uptr kInitialCapacity = 1000;
195 InternalMmapVector<const char*> storage_;
196 const char *last_match_;
197
198 Mutex *mu_;
199 } module_names_;
200
201 /// Platform-specific function for creating a Symbolizer object.
202 static Symbolizer *PlatformInit();
203
204 bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
205 uptr *module_offset,
206 ModuleArch *module_arch);
207 ListOfModules modules_;
208 ListOfModules fallback_modules_;
209 // If stale, need to reload the modules before looking up addresses.
210 bool modules_fresh_;
211
212 // Platform-specific default demangler, returns nullptr on failure.
213 const char *PlatformDemangle(const char *name);
214
215 static Symbolizer *symbolizer_;
216 static StaticSpinMutex init_mu_;
217
218 // Mutex locked from public methods of |Symbolizer|, so that the internals
219 // (including individual symbolizer tools and platform-specific methods) are
220 // always synchronized.
221 Mutex mu_;
222
223 IntrusiveList<SymbolizerTool> tools_;
224
225 explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
226
227 static LowLevelAllocator symbolizer_allocator_;
228
229 StartSymbolizationHook start_hook_;
230 EndSymbolizationHook end_hook_;
231 class SymbolizerScope {
232 public:
233 explicit SymbolizerScope(const Symbolizer *sym);
234 ~SymbolizerScope();
235 private:
236 const Symbolizer *sym_;
237 int errno_; // Backup errno in case symbolizer change the value.
238 };
239};
240
241#ifdef SANITIZER_WINDOWS
242void InitializeDbgHelpIfNeeded();
243#endif
244
245} // namespace __sanitizer
246
247#endif // SANITIZER_SYMBOLIZER_H
248