1//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExternalSymbolizer.h"
10#include "MCTargetDesc/AArch64MCAsmInfo.h"
11#include "llvm/MC/MCContext.h"
12#include "llvm/MC/MCExpr.h"
13#include "llvm/MC/MCInst.h"
14#include "llvm/MC/MCRegisterInfo.h"
15#include "llvm/Support/Format.h"
16#include "llvm/Support/raw_ostream.h"
17
18using namespace llvm;
19
20#define DEBUG_TYPE "aarch64-disassembler"
21
22static AArch64::Specifier
23getMachOSpecifier(uint64_t LLVMDisassembler_VariantKind) {
24 switch (LLVMDisassembler_VariantKind) {
25 case LLVMDisassembler_VariantKind_None:
26 return AArch64::S_None;
27 case LLVMDisassembler_VariantKind_ARM64_PAGE:
28 return AArch64::S_MACHO_PAGE;
29 case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
30 return AArch64::S_MACHO_PAGEOFF;
31 case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
32 return AArch64::S_MACHO_GOTPAGE;
33 case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
34 return AArch64::S_MACHO_GOTPAGEOFF;
35 case LLVMDisassembler_VariantKind_ARM64_TLVP:
36 return AArch64::S_MACHO_TLVPPAGE;
37 case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
38 return AArch64::S_MACHO_TLVPPAGEOFF;
39 default:
40 llvm_unreachable("bad LLVMDisassembler_VariantKind");
41 }
42}
43
44/// tryAddingSymbolicOperand - tryAddingSymbolicOperand tries to add a symbolic
45/// operand in place of the immediate Value in the MCInst. The immediate
46/// Value has not had any PC adjustment made by the caller. If the instruction
47/// is a branch that adds the PC to the immediate Value then isBranch is
48/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
49/// symbolic information at the Address for this instruction. If that returns
50/// non-zero then the symbolic information it returns is used to create an
51/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo()
52/// returns zero and isBranch is Success then a symbol look up for
53/// Address + Value is done and if a symbol is found an MCExpr is created with
54/// that, else an MCExpr with Address + Value is created. If GetOpInfo()
55/// returns zero and isBranch is Fail then the Opcode of the MCInst is
56/// tested and for ADRP an other instructions that help to load of pointers
57/// a symbol look up is done to see it is returns a specific reference type
58/// to add to the comment stream. This function returns Success if it adds
59/// an operand to the MCInst and Fail otherwise.
60bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
61 MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
62 bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
63 if (!SymbolLookUp)
64 return false;
65 // FIXME: This method shares a lot of code with
66 // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
67 // refactor the MCExternalSymbolizer interface to allow more of this
68 // implementation to be shared.
69 //
70 struct LLVMOpInfo1 SymbolicOp;
71 memset(s: &SymbolicOp, c: '\0', n: sizeof(struct LLVMOpInfo1));
72 SymbolicOp.Value = Value;
73 uint64_t ReferenceType;
74 const char *ReferenceName;
75 if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
76 1, &SymbolicOp)) {
77 if (IsBranch) {
78 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
79 const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
80 Address, &ReferenceName);
81 if (Name) {
82 SymbolicOp.AddSymbol.Name = Name;
83 SymbolicOp.AddSymbol.Present = true;
84 SymbolicOp.Value = 0;
85 } else {
86 SymbolicOp.Value = Address + Value;
87 }
88 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
89 CommentStream << "symbol stub for: " << ReferenceName;
90 else if (ReferenceType ==
91 LLVMDisassembler_ReferenceType_Out_Objc_Message)
92 CommentStream << "Objc message: " << ReferenceName;
93 } else if (MI.getOpcode() == AArch64::ADRP) {
94 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
95 // otool expects the fully encoded ADRP instruction to be passed in as
96 // the value here, so reconstruct it:
97 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
98 uint32_t EncodedInst = 0x90000000;
99 EncodedInst |= (Value & 0x3) << 29; // immlo
100 EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
101 EncodedInst |= MCRI.getEncodingValue(Reg: MI.getOperand(i: 0).getReg()); // reg
102 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
103 &ReferenceName);
104 CommentStream << format(Fmt: "0x%llx", Vals: (0xfffffffffffff000LL & Address) +
105 Value * 0x1000);
106 } else if (MI.getOpcode() == AArch64::ADDXri ||
107 MI.getOpcode() == AArch64::LDRXui ||
108 MI.getOpcode() == AArch64::LDRXl ||
109 MI.getOpcode() == AArch64::ADR) {
110 if (MI.getOpcode() == AArch64::ADDXri)
111 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
112 else if (MI.getOpcode() == AArch64::LDRXui)
113 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
114 if (MI.getOpcode() == AArch64::LDRXl) {
115 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
116 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
117 &ReferenceName);
118 } else if (MI.getOpcode() == AArch64::ADR) {
119 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
120 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
121 &ReferenceName);
122 } else {
123 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
124 // otool expects the fully encoded ADD/LDR instruction to be passed in
125 // as the value here, so reconstruct it:
126 unsigned EncodedInst =
127 MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
128 EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
129 EncodedInst |=
130 MCRI.getEncodingValue(Reg: MI.getOperand(i: 1).getReg()) << 5; // Rn
131 EncodedInst |= MCRI.getEncodingValue(Reg: MI.getOperand(i: 0).getReg()); // Rd
132
133 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
134 &ReferenceName);
135 }
136 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
137 CommentStream << "literal pool symbol address: " << ReferenceName;
138 else if (ReferenceType ==
139 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
140 CommentStream << "literal pool for: \"";
141 CommentStream.write_escaped(Str: ReferenceName);
142 CommentStream << "\"";
143 } else if (ReferenceType ==
144 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
145 CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
146 else if (ReferenceType ==
147 LLVMDisassembler_ReferenceType_Out_Objc_Message)
148 CommentStream << "Objc message: " << ReferenceName;
149 else if (ReferenceType ==
150 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
151 CommentStream << "Objc message ref: " << ReferenceName;
152 else if (ReferenceType ==
153 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
154 CommentStream << "Objc selector ref: " << ReferenceName;
155 else if (ReferenceType ==
156 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
157 CommentStream << "Objc class ref: " << ReferenceName;
158 // For these instructions, the SymbolLookUp() above is just to get the
159 // ReferenceType and ReferenceName. We want to make sure not to
160 // fall through so we don't build an MCExpr to leave the disassembly
161 // of the immediate values of these instructions to the InstPrinter.
162 return false;
163 } else {
164 return false;
165 }
166 }
167
168 const MCExpr *Add = nullptr;
169 if (SymbolicOp.AddSymbol.Present) {
170 if (SymbolicOp.AddSymbol.Name) {
171 StringRef Name(SymbolicOp.AddSymbol.Name);
172 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
173 auto Spec = getMachOSpecifier(LLVMDisassembler_VariantKind: SymbolicOp.VariantKind);
174 if (Spec != AArch64::S_None)
175 Add = MCSymbolRefExpr::create(Symbol: Sym, specifier: Spec, Ctx);
176 else
177 Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
178 } else {
179 Add = MCConstantExpr::create(Value: SymbolicOp.AddSymbol.Value, Ctx);
180 }
181 }
182
183 const MCExpr *Sub = nullptr;
184 if (SymbolicOp.SubtractSymbol.Present) {
185 if (SymbolicOp.SubtractSymbol.Name) {
186 StringRef Name(SymbolicOp.SubtractSymbol.Name);
187 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
188 Sub = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
189 } else {
190 Sub = MCConstantExpr::create(Value: SymbolicOp.SubtractSymbol.Value, Ctx);
191 }
192 }
193
194 const MCExpr *Off = nullptr;
195 if (SymbolicOp.Value != 0)
196 Off = MCConstantExpr::create(Value: SymbolicOp.Value, Ctx);
197
198 const MCExpr *Expr;
199 if (Sub) {
200 const MCExpr *LHS;
201 if (Add)
202 LHS = MCBinaryExpr::createSub(LHS: Add, RHS: Sub, Ctx);
203 else
204 LHS = MCUnaryExpr::createMinus(Expr: Sub, Ctx);
205 if (Off)
206 Expr = MCBinaryExpr::createAdd(LHS, RHS: Off, Ctx);
207 else
208 Expr = LHS;
209 } else if (Add) {
210 if (Off)
211 Expr = MCBinaryExpr::createAdd(LHS: Add, RHS: Off, Ctx);
212 else
213 Expr = Add;
214 } else {
215 if (Off)
216 Expr = Off;
217 else
218 Expr = MCConstantExpr::create(Value: 0, Ctx);
219 }
220
221 MI.addOperand(Op: MCOperand::createExpr(Val: Expr));
222
223 return true;
224}
225