1 | //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" |
10 | #include "llvm/MC/MCContext.h" |
11 | #include "llvm/MC/MCExpr.h" |
12 | #include "llvm/MC/MCInst.h" |
13 | #include "llvm/Support/raw_ostream.h" |
14 | #include <cstring> |
15 | |
16 | using namespace llvm; |
17 | |
18 | namespace llvm { |
19 | class Triple; |
20 | } |
21 | |
22 | // This function tries to add a symbolic operand in place of the immediate |
23 | // Value in the MCInst. The immediate Value has had any PC adjustment made by |
24 | // the caller. If the instruction is a branch instruction then IsBranch is true, |
25 | // else false. If the getOpInfo() function was set as part of the |
26 | // setupForSymbolicDisassembly() call then that function is called to get any |
27 | // symbolic information at the Address for this instruction. If that returns |
28 | // non-zero then the symbolic information it returns is used to create an MCExpr |
29 | // and that is added as an operand to the MCInst. If getOpInfo() returns zero |
30 | // and IsBranch is true then a symbol look up for Value is done and if a symbol |
31 | // is found an MCExpr is created with that, else an MCExpr with Value is |
32 | // created. This function returns true if it adds an operand to the MCInst and |
33 | // false otherwise. |
34 | bool MCExternalSymbolizer::tryAddingSymbolicOperand( |
35 | MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address, |
36 | bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { |
37 | struct LLVMOpInfo1 SymbolicOp; |
38 | std::memset(s: &SymbolicOp, c: '\0', n: sizeof(struct LLVMOpInfo1)); |
39 | SymbolicOp.Value = Value; |
40 | |
41 | if (!GetOpInfo || |
42 | !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) { |
43 | // Clear SymbolicOp.Value from above and also all other fields. |
44 | std::memset(s: &SymbolicOp, c: '\0', n: sizeof(struct LLVMOpInfo1)); |
45 | |
46 | // At this point, GetOpInfo() did not find any relocation information about |
47 | // this operand and we are left to use the SymbolLookUp() call back to guess |
48 | // if the Value is the address of a symbol. In the case this is a branch |
49 | // that always makes sense to guess. But in the case of an immediate it is |
50 | // a bit more questionable if it is an address of a symbol or some other |
51 | // reference. So if the immediate Value comes from a width of 1 byte, |
52 | // OpSize, we will not guess it is an address of a symbol. Because in |
53 | // object files assembled starting at address 0 this usually leads to |
54 | // incorrect symbolication. |
55 | if (!SymbolLookUp || (OpSize == 1 && !IsBranch)) |
56 | return false; |
57 | |
58 | uint64_t ReferenceType; |
59 | if (IsBranch) |
60 | ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; |
61 | else |
62 | ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; |
63 | const char *ReferenceName; |
64 | const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, |
65 | &ReferenceName); |
66 | if (Name) { |
67 | SymbolicOp.AddSymbol.Name = Name; |
68 | SymbolicOp.AddSymbol.Present = true; |
69 | // If Name is a C++ symbol name put the human readable name in a comment. |
70 | if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name) |
71 | cStream << ReferenceName; |
72 | } |
73 | // For branches always create an MCExpr so it gets printed as hex address. |
74 | else if (IsBranch) { |
75 | SymbolicOp.Value = Value; |
76 | } |
77 | if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) |
78 | cStream << "symbol stub for: " << ReferenceName; |
79 | else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) |
80 | cStream << "Objc message: " << ReferenceName; |
81 | if (!Name && !IsBranch) |
82 | return false; |
83 | } |
84 | |
85 | const MCExpr *Add = nullptr; |
86 | if (SymbolicOp.AddSymbol.Present) { |
87 | if (SymbolicOp.AddSymbol.Name) { |
88 | StringRef Name(SymbolicOp.AddSymbol.Name); |
89 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
90 | Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
91 | } else { |
92 | Add = MCConstantExpr::create(Value: (int)SymbolicOp.AddSymbol.Value, Ctx); |
93 | } |
94 | } |
95 | |
96 | const MCExpr *Sub = nullptr; |
97 | if (SymbolicOp.SubtractSymbol.Present) { |
98 | if (SymbolicOp.SubtractSymbol.Name) { |
99 | StringRef Name(SymbolicOp.SubtractSymbol.Name); |
100 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
101 | Sub = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
102 | } else { |
103 | Sub = MCConstantExpr::create(Value: (int)SymbolicOp.SubtractSymbol.Value, Ctx); |
104 | } |
105 | } |
106 | |
107 | const MCExpr *Off = nullptr; |
108 | if (SymbolicOp.Value != 0) |
109 | Off = MCConstantExpr::create(Value: SymbolicOp.Value, Ctx); |
110 | |
111 | const MCExpr *Expr; |
112 | if (Sub) { |
113 | const MCExpr *LHS; |
114 | if (Add) |
115 | LHS = MCBinaryExpr::createSub(LHS: Add, RHS: Sub, Ctx); |
116 | else |
117 | LHS = MCUnaryExpr::createMinus(Expr: Sub, Ctx); |
118 | if (Off) |
119 | Expr = MCBinaryExpr::createAdd(LHS, RHS: Off, Ctx); |
120 | else |
121 | Expr = LHS; |
122 | } else if (Add) { |
123 | if (Off) |
124 | Expr = MCBinaryExpr::createAdd(LHS: Add, RHS: Off, Ctx); |
125 | else |
126 | Expr = Add; |
127 | } else { |
128 | if (Off) |
129 | Expr = Off; |
130 | else |
131 | Expr = MCConstantExpr::create(Value: 0, Ctx); |
132 | } |
133 | |
134 | Expr = RelInfo->createExprForCAPIVariantKind(SubExpr: Expr, VariantKind: SymbolicOp.VariantKind); |
135 | if (!Expr) |
136 | return false; |
137 | |
138 | MI.addOperand(Op: MCOperand::createExpr(Val: Expr)); |
139 | return true; |
140 | } |
141 | |
142 | // This function tries to add a comment as to what is being referenced by a load |
143 | // instruction with the base register that is the Pc. These can often be values |
144 | // in a literal pool near the Address of the instruction. The Address of the |
145 | // instruction and its immediate Value are used as a possible literal pool entry. |
146 | // The SymbolLookUp call back will return the name of a symbol referenced by the |
147 | // literal pool's entry if the referenced address is that of a symbol. Or it |
148 | // will return a pointer to a literal 'C' string if the referenced address of |
149 | // the literal pool's entry is an address into a section with C string literals. |
150 | // Or if the reference is to an Objective-C data structure it will return a |
151 | // specific reference type for it and a string. |
152 | void MCExternalSymbolizer::(raw_ostream &cStream, |
153 | int64_t Value, |
154 | uint64_t Address) { |
155 | if (SymbolLookUp) { |
156 | uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; |
157 | const char *ReferenceName; |
158 | (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); |
159 | if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) |
160 | cStream << "literal pool symbol address: " << ReferenceName; |
161 | else if(ReferenceType == |
162 | LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { |
163 | cStream << "literal pool for: \"" ; |
164 | cStream.write_escaped(Str: ReferenceName); |
165 | cStream << "\"" ; |
166 | } |
167 | else if(ReferenceType == |
168 | LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) |
169 | cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"" ; |
170 | else if(ReferenceType == |
171 | LLVMDisassembler_ReferenceType_Out_Objc_Message) |
172 | cStream << "Objc message: " << ReferenceName; |
173 | else if(ReferenceType == |
174 | LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) |
175 | cStream << "Objc message ref: " << ReferenceName; |
176 | else if(ReferenceType == |
177 | LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) |
178 | cStream << "Objc selector ref: " << ReferenceName; |
179 | else if(ReferenceType == |
180 | LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) |
181 | cStream << "Objc class ref: " << ReferenceName; |
182 | } |
183 | } |
184 | |
185 | namespace llvm { |
186 | MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, |
187 | LLVMSymbolLookupCallback SymbolLookUp, |
188 | void *DisInfo, MCContext *Ctx, |
189 | std::unique_ptr<MCRelocationInfo> &&RelInfo) { |
190 | assert(Ctx && "No MCContext given for symbolic disassembly" ); |
191 | |
192 | return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo, |
193 | SymbolLookUp, DisInfo); |
194 | } |
195 | } |
196 | |