| 1 | //===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // Header for internal classes and functions to be used by implementations of | 
|---|
| 10 | // symbolizers. | 
|---|
| 11 | // | 
|---|
| 12 | //===----------------------------------------------------------------------===// | 
|---|
| 13 | #ifndef SANITIZER_SYMBOLIZER_INTERNAL_H | 
|---|
| 14 | #define SANITIZER_SYMBOLIZER_INTERNAL_H | 
|---|
| 15 |  | 
|---|
| 16 | #include "sanitizer_file.h" | 
|---|
| 17 | #include "sanitizer_symbolizer.h" | 
|---|
| 18 | #include "sanitizer_vector.h" | 
|---|
| 19 |  | 
|---|
| 20 | namespace __sanitizer { | 
|---|
| 21 |  | 
|---|
| 22 | // Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr | 
|---|
| 23 | // is extracted. When extracting a string, a newly allocated (using | 
|---|
| 24 | // InternalAlloc) and null-terminated buffer is returned. They return a pointer | 
|---|
| 25 | // to the next characted after the found delimiter. | 
|---|
| 26 | const char *(const char *str, const char *delims, char **result); | 
|---|
| 27 | const char *(const char *str, const char *delims, int *result); | 
|---|
| 28 | const char *(const char *str, const char *delims, uptr *result); | 
|---|
| 29 | const char *(const char *str, const char *delimiter, | 
|---|
| 30 | char **result); | 
|---|
| 31 |  | 
|---|
| 32 | const char *DemangleSwiftAndCXX(const char *name); | 
|---|
| 33 |  | 
|---|
| 34 | // SymbolizerTool is an interface that is implemented by individual "tools" | 
|---|
| 35 | // that can perform symbolication (external llvm-symbolizer, libbacktrace, | 
|---|
| 36 | // Windows DbgHelp symbolizer, etc.). | 
|---|
| 37 | class SymbolizerTool { | 
|---|
| 38 | public: | 
|---|
| 39 | // The main |Symbolizer| class implements a "fallback chain" of symbolizer | 
|---|
| 40 | // tools. In a request to symbolize an address, if one tool returns false, | 
|---|
| 41 | // the next tool in the chain will be tried. | 
|---|
| 42 | SymbolizerTool *next; | 
|---|
| 43 |  | 
|---|
| 44 | SymbolizerTool() : next(nullptr) { } | 
|---|
| 45 |  | 
|---|
| 46 | // Can't declare pure virtual functions in sanitizer runtimes: | 
|---|
| 47 | // __cxa_pure_virtual might be unavailable. | 
|---|
| 48 |  | 
|---|
| 49 | // The |stack| parameter is inout. It is pre-filled with the address, | 
|---|
| 50 | // module base and module offset values and is to be used to construct | 
|---|
| 51 | // other stack frames. | 
|---|
| 52 | virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) { | 
|---|
| 53 | UNIMPLEMENTED(); | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | // The |info| parameter is inout. It is pre-filled with the module base | 
|---|
| 57 | // and module offset values. | 
|---|
| 58 | virtual bool SymbolizeData(uptr addr, DataInfo *info) { | 
|---|
| 59 | UNIMPLEMENTED(); | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) { | 
|---|
| 63 | return false; | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | virtual void Flush() {} | 
|---|
| 67 |  | 
|---|
| 68 | // Return nullptr to fallback to the default platform-specific demangler. | 
|---|
| 69 | virtual const char *Demangle(const char *name) { | 
|---|
| 70 | return nullptr; | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | protected: | 
|---|
| 74 | ~SymbolizerTool() {} | 
|---|
| 75 | }; | 
|---|
| 76 |  | 
|---|
| 77 | // SymbolizerProcess encapsulates communication between the tool and | 
|---|
| 78 | // external symbolizer program, running in a different subprocess. | 
|---|
| 79 | // SymbolizerProcess may not be used from two threads simultaneously. | 
|---|
| 80 | class SymbolizerProcess { | 
|---|
| 81 | public: | 
|---|
| 82 | explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false); | 
|---|
| 83 | const char *SendCommand(const char *command); | 
|---|
| 84 |  | 
|---|
| 85 | protected: | 
|---|
| 86 | ~SymbolizerProcess() {} | 
|---|
| 87 |  | 
|---|
| 88 | /// The maximum number of arguments required to invoke a tool process. | 
|---|
| 89 | static const unsigned kArgVMax = 16; | 
|---|
| 90 |  | 
|---|
| 91 | // Customizable by subclasses. | 
|---|
| 92 | virtual bool StartSymbolizerSubprocess(); | 
|---|
| 93 | virtual bool ReadFromSymbolizer(); | 
|---|
| 94 | // Return the environment to run the symbolizer in. | 
|---|
| 95 | virtual char **GetEnvP() { return GetEnviron(); } | 
|---|
| 96 | InternalMmapVector<char> &GetBuff() { return buffer_; } | 
|---|
| 97 |  | 
|---|
| 98 | private: | 
|---|
| 99 | virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { | 
|---|
| 100 | UNIMPLEMENTED(); | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | /// Fill in an argv array to invoke the child process. | 
|---|
| 104 | virtual void GetArgV(const char *path_to_binary, | 
|---|
| 105 | const char *(&argv)[kArgVMax]) const { | 
|---|
| 106 | UNIMPLEMENTED(); | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|
| 109 | bool Restart(); | 
|---|
| 110 | const char *SendCommandImpl(const char *command); | 
|---|
| 111 | bool WriteToSymbolizer(const char *buffer, uptr length); | 
|---|
| 112 |  | 
|---|
| 113 | const char *path_; | 
|---|
| 114 | fd_t input_fd_; | 
|---|
| 115 | fd_t output_fd_; | 
|---|
| 116 |  | 
|---|
| 117 | InternalMmapVector<char> buffer_; | 
|---|
| 118 |  | 
|---|
| 119 | static const uptr kMaxTimesRestarted = 5; | 
|---|
| 120 | static const int kSymbolizerStartupTimeMillis = 10; | 
|---|
| 121 | uptr times_restarted_; | 
|---|
| 122 | bool failed_to_start_; | 
|---|
| 123 | bool reported_invalid_path_; | 
|---|
| 124 | bool use_posix_spawn_; | 
|---|
| 125 | }; | 
|---|
| 126 |  | 
|---|
| 127 | class LLVMSymbolizerProcess; | 
|---|
| 128 |  | 
|---|
| 129 | // This tool invokes llvm-symbolizer in a subprocess. It should be as portable | 
|---|
| 130 | // as the llvm-symbolizer tool is. | 
|---|
| 131 | class LLVMSymbolizer final : public SymbolizerTool { | 
|---|
| 132 | public: | 
|---|
| 133 | explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator); | 
|---|
| 134 |  | 
|---|
| 135 | bool SymbolizePC(uptr addr, SymbolizedStack *stack) override; | 
|---|
| 136 | bool SymbolizeData(uptr addr, DataInfo *info) override; | 
|---|
| 137 | bool SymbolizeFrame(uptr addr, FrameInfo *info) override; | 
|---|
| 138 |  | 
|---|
| 139 | private: | 
|---|
| 140 | const char *FormatAndSendCommand(const char *command_prefix, | 
|---|
| 141 | const char *module_name, uptr module_offset, | 
|---|
| 142 | ModuleArch arch); | 
|---|
| 143 |  | 
|---|
| 144 | LLVMSymbolizerProcess *symbolizer_process_; | 
|---|
| 145 | static const uptr kBufferSize = 16 * 1024; | 
|---|
| 146 | char buffer_[kBufferSize]; | 
|---|
| 147 | }; | 
|---|
| 148 |  | 
|---|
| 149 | // Parses one or more two-line strings in the following format: | 
|---|
| 150 | //   <function_name> | 
|---|
| 151 | //   <file_name>:<line_number>[:<column_number>] | 
|---|
| 152 | // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of | 
|---|
| 153 | // them use the same output format.  Returns true if any useful debug | 
|---|
| 154 | // information was found. | 
|---|
| 155 | void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res); | 
|---|
| 156 |  | 
|---|
| 157 | // Parses a two-line string in the following format: | 
|---|
| 158 | //   <symbol_name> | 
|---|
| 159 | //   <start_address> <size> | 
|---|
| 160 | // Used by LLVMSymbolizer and InternalSymbolizer. | 
|---|
| 161 | void ParseSymbolizeDataOutput(const char *str, DataInfo *info); | 
|---|
| 162 |  | 
|---|
| 163 | // Parses repeated strings in the following format: | 
|---|
| 164 | //   <function_name> | 
|---|
| 165 | //   <var_name> | 
|---|
| 166 | //   <file_name>:<line_number>[:<column_number>] | 
|---|
| 167 | //   [<frame_offset>|??] [<size>|??] [<tag_offset>|??] | 
|---|
| 168 | // Used by LLVMSymbolizer and InternalSymbolizer. | 
|---|
| 169 | void ParseSymbolizeFrameOutput(const char *str, | 
|---|
| 170 | InternalMmapVector<LocalInfo> *locals); | 
|---|
| 171 |  | 
|---|
| 172 | }  // namespace __sanitizer | 
|---|
| 173 |  | 
|---|
| 174 | #endif  // SANITIZER_SYMBOLIZER_INTERNAL_H | 
|---|
| 175 |  | 
|---|