1 | //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AArch64ExternalSymbolizer.h" |
10 | #include "MCTargetDesc/AArch64MCAsmInfo.h" |
11 | #include "llvm/MC/MCContext.h" |
12 | #include "llvm/MC/MCExpr.h" |
13 | #include "llvm/MC/MCInst.h" |
14 | #include "llvm/MC/MCRegisterInfo.h" |
15 | #include "llvm/Support/Format.h" |
16 | #include "llvm/Support/raw_ostream.h" |
17 | |
18 | using namespace llvm; |
19 | |
20 | #define DEBUG_TYPE "aarch64-disassembler" |
21 | |
22 | static AArch64::Specifier |
23 | getMachOSpecifier(uint64_t LLVMDisassembler_VariantKind) { |
24 | switch (LLVMDisassembler_VariantKind) { |
25 | case LLVMDisassembler_VariantKind_None: |
26 | return AArch64::S_None; |
27 | case LLVMDisassembler_VariantKind_ARM64_PAGE: |
28 | return AArch64::S_MACHO_PAGE; |
29 | case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: |
30 | return AArch64::S_MACHO_PAGEOFF; |
31 | case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: |
32 | return AArch64::S_MACHO_GOTPAGE; |
33 | case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: |
34 | return AArch64::S_MACHO_GOTPAGEOFF; |
35 | case LLVMDisassembler_VariantKind_ARM64_TLVP: |
36 | return AArch64::S_MACHO_TLVPPAGE; |
37 | case LLVMDisassembler_VariantKind_ARM64_TLVOFF: |
38 | return AArch64::S_MACHO_TLVPPAGEOFF; |
39 | default: |
40 | llvm_unreachable("bad LLVMDisassembler_VariantKind" ); |
41 | } |
42 | } |
43 | |
44 | /// tryAddingSymbolicOperand - tryAddingSymbolicOperand tries to add a symbolic |
45 | /// operand in place of the immediate Value in the MCInst. The immediate |
46 | /// Value has not had any PC adjustment made by the caller. If the instruction |
47 | /// is a branch that adds the PC to the immediate Value then isBranch is |
48 | /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any |
49 | /// symbolic information at the Address for this instruction. If that returns |
50 | /// non-zero then the symbolic information it returns is used to create an |
51 | /// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() |
52 | /// returns zero and isBranch is Success then a symbol look up for |
53 | /// Address + Value is done and if a symbol is found an MCExpr is created with |
54 | /// that, else an MCExpr with Address + Value is created. If GetOpInfo() |
55 | /// returns zero and isBranch is Fail then the Opcode of the MCInst is |
56 | /// tested and for ADRP an other instructions that help to load of pointers |
57 | /// a symbol look up is done to see it is returns a specific reference type |
58 | /// to add to the comment stream. This function returns Success if it adds |
59 | /// an operand to the MCInst and Fail otherwise. |
60 | bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( |
61 | MCInst &MI, raw_ostream &, int64_t Value, uint64_t Address, |
62 | bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { |
63 | if (!SymbolLookUp) |
64 | return false; |
65 | // FIXME: This method shares a lot of code with |
66 | // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible |
67 | // refactor the MCExternalSymbolizer interface to allow more of this |
68 | // implementation to be shared. |
69 | // |
70 | struct LLVMOpInfo1 SymbolicOp; |
71 | memset(s: &SymbolicOp, c: '\0', n: sizeof(struct LLVMOpInfo1)); |
72 | SymbolicOp.Value = Value; |
73 | uint64_t ReferenceType; |
74 | const char *ReferenceName; |
75 | if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize, |
76 | 1, &SymbolicOp)) { |
77 | if (IsBranch) { |
78 | ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; |
79 | const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, |
80 | Address, &ReferenceName); |
81 | if (Name) { |
82 | SymbolicOp.AddSymbol.Name = Name; |
83 | SymbolicOp.AddSymbol.Present = true; |
84 | SymbolicOp.Value = 0; |
85 | } else { |
86 | SymbolicOp.Value = Address + Value; |
87 | } |
88 | if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) |
89 | CommentStream << "symbol stub for: " << ReferenceName; |
90 | else if (ReferenceType == |
91 | LLVMDisassembler_ReferenceType_Out_Objc_Message) |
92 | CommentStream << "Objc message: " << ReferenceName; |
93 | } else if (MI.getOpcode() == AArch64::ADRP) { |
94 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; |
95 | // otool expects the fully encoded ADRP instruction to be passed in as |
96 | // the value here, so reconstruct it: |
97 | const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); |
98 | uint32_t EncodedInst = 0x90000000; |
99 | EncodedInst |= (Value & 0x3) << 29; // immlo |
100 | EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi |
101 | EncodedInst |= MCRI.getEncodingValue(Reg: MI.getOperand(i: 0).getReg()); // reg |
102 | SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, |
103 | &ReferenceName); |
104 | CommentStream << format(Fmt: "0x%llx" , Vals: (0xfffffffffffff000LL & Address) + |
105 | Value * 0x1000); |
106 | } else if (MI.getOpcode() == AArch64::ADDXri || |
107 | MI.getOpcode() == AArch64::LDRXui || |
108 | MI.getOpcode() == AArch64::LDRXl || |
109 | MI.getOpcode() == AArch64::ADR) { |
110 | if (MI.getOpcode() == AArch64::ADDXri) |
111 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; |
112 | else if (MI.getOpcode() == AArch64::LDRXui) |
113 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; |
114 | if (MI.getOpcode() == AArch64::LDRXl) { |
115 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; |
116 | SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, |
117 | &ReferenceName); |
118 | } else if (MI.getOpcode() == AArch64::ADR) { |
119 | ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; |
120 | SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, |
121 | &ReferenceName); |
122 | } else { |
123 | const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); |
124 | // otool expects the fully encoded ADD/LDR instruction to be passed in |
125 | // as the value here, so reconstruct it: |
126 | unsigned EncodedInst = |
127 | MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000; |
128 | EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] |
129 | EncodedInst |= |
130 | MCRI.getEncodingValue(Reg: MI.getOperand(i: 1).getReg()) << 5; // Rn |
131 | EncodedInst |= MCRI.getEncodingValue(Reg: MI.getOperand(i: 0).getReg()); // Rd |
132 | |
133 | SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, |
134 | &ReferenceName); |
135 | } |
136 | if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) |
137 | CommentStream << "literal pool symbol address: " << ReferenceName; |
138 | else if (ReferenceType == |
139 | LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { |
140 | CommentStream << "literal pool for: \"" ; |
141 | CommentStream.write_escaped(Str: ReferenceName); |
142 | CommentStream << "\"" ; |
143 | } else if (ReferenceType == |
144 | LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) |
145 | CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"" ; |
146 | else if (ReferenceType == |
147 | LLVMDisassembler_ReferenceType_Out_Objc_Message) |
148 | CommentStream << "Objc message: " << ReferenceName; |
149 | else if (ReferenceType == |
150 | LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) |
151 | CommentStream << "Objc message ref: " << ReferenceName; |
152 | else if (ReferenceType == |
153 | LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) |
154 | CommentStream << "Objc selector ref: " << ReferenceName; |
155 | else if (ReferenceType == |
156 | LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) |
157 | CommentStream << "Objc class ref: " << ReferenceName; |
158 | // For these instructions, the SymbolLookUp() above is just to get the |
159 | // ReferenceType and ReferenceName. We want to make sure not to |
160 | // fall through so we don't build an MCExpr to leave the disassembly |
161 | // of the immediate values of these instructions to the InstPrinter. |
162 | return false; |
163 | } else { |
164 | return false; |
165 | } |
166 | } |
167 | |
168 | const MCExpr *Add = nullptr; |
169 | if (SymbolicOp.AddSymbol.Present) { |
170 | if (SymbolicOp.AddSymbol.Name) { |
171 | StringRef Name(SymbolicOp.AddSymbol.Name); |
172 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
173 | auto Spec = getMachOSpecifier(LLVMDisassembler_VariantKind: SymbolicOp.VariantKind); |
174 | if (Spec != AArch64::S_None) |
175 | Add = MCSymbolRefExpr::create(Symbol: Sym, specifier: Spec, Ctx); |
176 | else |
177 | Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
178 | } else { |
179 | Add = MCConstantExpr::create(Value: SymbolicOp.AddSymbol.Value, Ctx); |
180 | } |
181 | } |
182 | |
183 | const MCExpr *Sub = nullptr; |
184 | if (SymbolicOp.SubtractSymbol.Present) { |
185 | if (SymbolicOp.SubtractSymbol.Name) { |
186 | StringRef Name(SymbolicOp.SubtractSymbol.Name); |
187 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); |
188 | Sub = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
189 | } else { |
190 | Sub = MCConstantExpr::create(Value: SymbolicOp.SubtractSymbol.Value, Ctx); |
191 | } |
192 | } |
193 | |
194 | const MCExpr *Off = nullptr; |
195 | if (SymbolicOp.Value != 0) |
196 | Off = MCConstantExpr::create(Value: SymbolicOp.Value, Ctx); |
197 | |
198 | const MCExpr *Expr; |
199 | if (Sub) { |
200 | const MCExpr *LHS; |
201 | if (Add) |
202 | LHS = MCBinaryExpr::createSub(LHS: Add, RHS: Sub, Ctx); |
203 | else |
204 | LHS = MCUnaryExpr::createMinus(Expr: Sub, Ctx); |
205 | if (Off) |
206 | Expr = MCBinaryExpr::createAdd(LHS, RHS: Off, Ctx); |
207 | else |
208 | Expr = LHS; |
209 | } else if (Add) { |
210 | if (Off) |
211 | Expr = MCBinaryExpr::createAdd(LHS: Add, RHS: Off, Ctx); |
212 | else |
213 | Expr = Add; |
214 | } else { |
215 | if (Off) |
216 | Expr = Off; |
217 | else |
218 | Expr = MCConstantExpr::create(Value: 0, Ctx); |
219 | } |
220 | |
221 | MI.addOperand(Op: MCOperand::createExpr(Val: Expr)); |
222 | |
223 | return true; |
224 | } |
225 | |