1 | //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" |
10 | #include "llvm/MC/MCContext.h" |
11 | #include "llvm/MC/MCExpr.h" |
12 | #include "llvm/MC/MCInst.h" |
13 | #include "llvm/MC/TargetRegistry.h" |
14 | #include "llvm/Support/raw_ostream.h" |
15 | #include <cstring> |
16 | |
17 | using namespace llvm; |
18 | |
19 | namespace llvm { |
20 | class Triple; |
21 | } |
22 | |
23 | // This function tries to add a symbolic operand in place of the immediate |
24 | // Value in the MCInst. The immediate Value has had any PC adjustment made by |
25 | // the caller. If the instruction is a branch instruction then IsBranch is true, |
26 | // else false. If the getOpInfo() function was set as part of the |
27 | // setupForSymbolicDisassembly() call then that function is called to get any |
28 | // symbolic information at the Address for this instruction. If that returns |
29 | // non-zero then the symbolic information it returns is used to create an MCExpr |
30 | // and that is added as an operand to the MCInst. If getOpInfo() returns zero |
31 | // and IsBranch is true then a symbol look up for Value is done and if a symbol |
32 | // is found an MCExpr is created with that, else an MCExpr with Value is |
33 | // created. This function returns true if it adds an operand to the MCInst and |
34 | // false otherwise. |
35 | bool MCExternalSymbolizer::tryAddingSymbolicOperand( |
36 | MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address, |
37 | bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { |
38 | struct LLVMOpInfo1 SymbolicOp; |
39 | std::memset(s: &SymbolicOp, c: '\0', n: sizeof(struct LLVMOpInfo1)); |
40 | SymbolicOp.Value = Value; |
41 | |
42 | if (!GetOpInfo || |
43 | !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) { |
44 | // Clear SymbolicOp.Value from above and also all other fields. |
45 | std::memset(s: &SymbolicOp, c: '\0', n: sizeof(struct LLVMOpInfo1)); |
46 | |
47 | // At this point, GetOpInfo() did not find any relocation information about |
48 | // this operand and we are left to use the SymbolLookUp() call back to guess |
49 | // if the Value is the address of a symbol. In the case this is a branch |
50 | // that always makes sense to guess. But in the case of an immediate it is |
51 | // a bit more questionable if it is an address of a symbol or some other |
52 | // reference. So if the immediate Value comes from a width of 1 byte, |
53 | // OpSize, we will not guess it is an address of a symbol. Because in |
54 | // object files assembled starting at address 0 this usually leads to |
55 | // incorrect symbolication. |
56 | if (!SymbolLookUp || (OpSize == 1 && !IsBranch)) |
57 | return false; |
58 | |
59 | uint64_t ReferenceType; |
60 | if (IsBranch) |
61 | ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; |
62 | else |
63 | ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; |
64 | const char *ReferenceName; |
65 | const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, |
66 | &ReferenceName); |
67 | if (Name) { |
68 | SymbolicOp.AddSymbol.Name = Name; |
69 | SymbolicOp.AddSymbol.Present = true; |
70 | // If Name is a C++ symbol name put the human readable name in a comment. |
71 | if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name) |
72 | cStream << ReferenceName; |
73 | } |
74 | // For branches always create an MCExpr so it gets printed as hex address. |
75 | else if (IsBranch) { |
76 | SymbolicOp.Value = Value; |
77 | } |
78 | if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) |
79 | cStream << "symbol stub for: " << ReferenceName; |
80 | else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) |
81 | cStream << "Objc message: " << ReferenceName; |
82 | if (!Name && !IsBranch) |
83 | return false; |
84 | } |
85 | |
86 | const MCExpr *Add = nullptr; |
87 | if (SymbolicOp.AddSymbol.Present) { |
88 | if (SymbolicOp.AddSymbol.Name) { |
89 | StringRef Name(SymbolicOp.AddSymbol.Name); |
90 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
91 | Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
92 | } else { |
93 | Add = MCConstantExpr::create(Value: (int)SymbolicOp.AddSymbol.Value, Ctx); |
94 | } |
95 | } |
96 | |
97 | const MCExpr *Sub = nullptr; |
98 | if (SymbolicOp.SubtractSymbol.Present) { |
99 | if (SymbolicOp.SubtractSymbol.Name) { |
100 | StringRef Name(SymbolicOp.SubtractSymbol.Name); |
101 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
102 | Sub = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
103 | } else { |
104 | Sub = MCConstantExpr::create(Value: (int)SymbolicOp.SubtractSymbol.Value, Ctx); |
105 | } |
106 | } |
107 | |
108 | const MCExpr *Off = nullptr; |
109 | if (SymbolicOp.Value != 0) |
110 | Off = MCConstantExpr::create(Value: SymbolicOp.Value, Ctx); |
111 | |
112 | const MCExpr *Expr; |
113 | if (Sub) { |
114 | const MCExpr *LHS; |
115 | if (Add) |
116 | LHS = MCBinaryExpr::createSub(LHS: Add, RHS: Sub, Ctx); |
117 | else |
118 | LHS = MCUnaryExpr::createMinus(Expr: Sub, Ctx); |
119 | if (Off) |
120 | Expr = MCBinaryExpr::createAdd(LHS, RHS: Off, Ctx); |
121 | else |
122 | Expr = LHS; |
123 | } else if (Add) { |
124 | if (Off) |
125 | Expr = MCBinaryExpr::createAdd(LHS: Add, RHS: Off, Ctx); |
126 | else |
127 | Expr = Add; |
128 | } else { |
129 | if (Off) |
130 | Expr = Off; |
131 | else |
132 | Expr = MCConstantExpr::create(Value: 0, Ctx); |
133 | } |
134 | |
135 | Expr = RelInfo->createExprForCAPIVariantKind(SubExpr: Expr, VariantKind: SymbolicOp.VariantKind); |
136 | if (!Expr) |
137 | return false; |
138 | |
139 | MI.addOperand(Op: MCOperand::createExpr(Val: Expr)); |
140 | return true; |
141 | } |
142 | |
143 | // This function tries to add a comment as to what is being referenced by a load |
144 | // instruction with the base register that is the Pc. These can often be values |
145 | // in a literal pool near the Address of the instruction. The Address of the |
146 | // instruction and its immediate Value are used as a possible literal pool entry. |
147 | // The SymbolLookUp call back will return the name of a symbol referenced by the |
148 | // literal pool's entry if the referenced address is that of a symbol. Or it |
149 | // will return a pointer to a literal 'C' string if the referenced address of |
150 | // the literal pool's entry is an address into a section with C string literals. |
151 | // Or if the reference is to an Objective-C data structure it will return a |
152 | // specific reference type for it and a string. |
153 | void MCExternalSymbolizer::(raw_ostream &cStream, |
154 | int64_t Value, |
155 | uint64_t Address) { |
156 | if (SymbolLookUp) { |
157 | uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; |
158 | const char *ReferenceName; |
159 | (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); |
160 | if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) |
161 | cStream << "literal pool symbol address: " << ReferenceName; |
162 | else if(ReferenceType == |
163 | LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { |
164 | cStream << "literal pool for: \"" ; |
165 | cStream.write_escaped(Str: ReferenceName); |
166 | cStream << "\"" ; |
167 | } |
168 | else if(ReferenceType == |
169 | LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) |
170 | cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"" ; |
171 | else if(ReferenceType == |
172 | LLVMDisassembler_ReferenceType_Out_Objc_Message) |
173 | cStream << "Objc message: " << ReferenceName; |
174 | else if(ReferenceType == |
175 | LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) |
176 | cStream << "Objc message ref: " << ReferenceName; |
177 | else if(ReferenceType == |
178 | LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) |
179 | cStream << "Objc selector ref: " << ReferenceName; |
180 | else if(ReferenceType == |
181 | LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) |
182 | cStream << "Objc class ref: " << ReferenceName; |
183 | } |
184 | } |
185 | |
186 | namespace llvm { |
187 | MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, |
188 | LLVMSymbolLookupCallback SymbolLookUp, |
189 | void *DisInfo, MCContext *Ctx, |
190 | std::unique_ptr<MCRelocationInfo> &&RelInfo) { |
191 | assert(Ctx && "No MCContext given for symbolic disassembly" ); |
192 | |
193 | return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo, |
194 | SymbolLookUp, DisInfo); |
195 | } |
196 | } |
197 | |