1//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExternalSymbolizer.h"
10#include "MCTargetDesc/AArch64AddressingModes.h"
11#include "Utils/AArch64BaseInfo.h"
12#include "llvm/MC/MCContext.h"
13#include "llvm/MC/MCExpr.h"
14#include "llvm/MC/MCInst.h"
15#include "llvm/MC/MCRegisterInfo.h"
16#include "llvm/Support/Format.h"
17#include "llvm/Support/raw_ostream.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "aarch64-disassembler"
22
23static MCSymbolRefExpr::VariantKind
24getVariant(uint64_t LLVMDisassembler_VariantKind) {
25 switch (LLVMDisassembler_VariantKind) {
26 case LLVMDisassembler_VariantKind_None:
27 return MCSymbolRefExpr::VK_None;
28 case LLVMDisassembler_VariantKind_ARM64_PAGE:
29 return MCSymbolRefExpr::VK_PAGE;
30 case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
31 return MCSymbolRefExpr::VK_PAGEOFF;
32 case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
33 return MCSymbolRefExpr::VK_GOTPAGE;
34 case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
35 return MCSymbolRefExpr::VK_GOTPAGEOFF;
36 case LLVMDisassembler_VariantKind_ARM64_TLVP:
37 return MCSymbolRefExpr::VK_TLVPPAGE;
38 case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
39 return MCSymbolRefExpr::VK_TLVPPAGEOFF;
40 default:
41 llvm_unreachable("bad LLVMDisassembler_VariantKind");
42 }
43}
44
45/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
46/// operand in place of the immediate Value in the MCInst. The immediate
47/// Value has not had any PC adjustment made by the caller. If the instruction
48/// is a branch that adds the PC to the immediate Value then isBranch is
49/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
50/// symbolic information at the Address for this instrution. If that returns
51/// non-zero then the symbolic information it returns is used to create an
52/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo()
53/// returns zero and isBranch is Success then a symbol look up for
54/// Address + Value is done and if a symbol is found an MCExpr is created with
55/// that, else an MCExpr with Address + Value is created. If GetOpInfo()
56/// returns zero and isBranch is Fail then the Opcode of the MCInst is
57/// tested and for ADRP an other instructions that help to load of pointers
58/// a symbol look up is done to see it is returns a specific reference type
59/// to add to the comment stream. This function returns Success if it adds
60/// an operand to the MCInst and Fail otherwise.
61bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
62 MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
63 bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
64 if (!SymbolLookUp)
65 return false;
66 // FIXME: This method shares a lot of code with
67 // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
68 // refactor the MCExternalSymbolizer interface to allow more of this
69 // implementation to be shared.
70 //
71 struct LLVMOpInfo1 SymbolicOp;
72 memset(s: &SymbolicOp, c: '\0', n: sizeof(struct LLVMOpInfo1));
73 SymbolicOp.Value = Value;
74 uint64_t ReferenceType;
75 const char *ReferenceName;
76 if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
77 1, &SymbolicOp)) {
78 if (IsBranch) {
79 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
80 const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
81 Address, &ReferenceName);
82 if (Name) {
83 SymbolicOp.AddSymbol.Name = Name;
84 SymbolicOp.AddSymbol.Present = true;
85 SymbolicOp.Value = 0;
86 } else {
87 SymbolicOp.Value = Address + Value;
88 }
89 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
90 CommentStream << "symbol stub for: " << ReferenceName;
91 else if (ReferenceType ==
92 LLVMDisassembler_ReferenceType_Out_Objc_Message)
93 CommentStream << "Objc message: " << ReferenceName;
94 } else if (MI.getOpcode() == AArch64::ADRP) {
95 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
96 // otool expects the fully encoded ADRP instruction to be passed in as
97 // the value here, so reconstruct it:
98 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
99 uint32_t EncodedInst = 0x90000000;
100 EncodedInst |= (Value & 0x3) << 29; // immlo
101 EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
102 EncodedInst |= MCRI.getEncodingValue(RegNo: MI.getOperand(i: 0).getReg()); // reg
103 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
104 &ReferenceName);
105 CommentStream << format(Fmt: "0x%llx", Vals: (0xfffffffffffff000LL & Address) +
106 Value * 0x1000);
107 } else if (MI.getOpcode() == AArch64::ADDXri ||
108 MI.getOpcode() == AArch64::LDRXui ||
109 MI.getOpcode() == AArch64::LDRXl ||
110 MI.getOpcode() == AArch64::ADR) {
111 if (MI.getOpcode() == AArch64::ADDXri)
112 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
113 else if (MI.getOpcode() == AArch64::LDRXui)
114 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
115 if (MI.getOpcode() == AArch64::LDRXl) {
116 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
117 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
118 &ReferenceName);
119 } else if (MI.getOpcode() == AArch64::ADR) {
120 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
121 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
122 &ReferenceName);
123 } else {
124 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
125 // otool expects the fully encoded ADD/LDR instruction to be passed in
126 // as the value here, so reconstruct it:
127 unsigned EncodedInst =
128 MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
129 EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
130 EncodedInst |=
131 MCRI.getEncodingValue(RegNo: MI.getOperand(i: 1).getReg()) << 5; // Rn
132 EncodedInst |= MCRI.getEncodingValue(RegNo: MI.getOperand(i: 0).getReg()); // Rd
133
134 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
135 &ReferenceName);
136 }
137 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
138 CommentStream << "literal pool symbol address: " << ReferenceName;
139 else if (ReferenceType ==
140 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
141 CommentStream << "literal pool for: \"";
142 CommentStream.write_escaped(Str: ReferenceName);
143 CommentStream << "\"";
144 } else if (ReferenceType ==
145 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
146 CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
147 else if (ReferenceType ==
148 LLVMDisassembler_ReferenceType_Out_Objc_Message)
149 CommentStream << "Objc message: " << ReferenceName;
150 else if (ReferenceType ==
151 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
152 CommentStream << "Objc message ref: " << ReferenceName;
153 else if (ReferenceType ==
154 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
155 CommentStream << "Objc selector ref: " << ReferenceName;
156 else if (ReferenceType ==
157 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
158 CommentStream << "Objc class ref: " << ReferenceName;
159 // For these instructions, the SymbolLookUp() above is just to get the
160 // ReferenceType and ReferenceName. We want to make sure not to
161 // fall through so we don't build an MCExpr to leave the disassembly
162 // of the immediate values of these instructions to the InstPrinter.
163 return false;
164 } else {
165 return false;
166 }
167 }
168
169 const MCExpr *Add = nullptr;
170 if (SymbolicOp.AddSymbol.Present) {
171 if (SymbolicOp.AddSymbol.Name) {
172 StringRef Name(SymbolicOp.AddSymbol.Name);
173 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
174 MCSymbolRefExpr::VariantKind Variant = getVariant(LLVMDisassembler_VariantKind: SymbolicOp.VariantKind);
175 if (Variant != MCSymbolRefExpr::VK_None)
176 Add = MCSymbolRefExpr::create(Symbol: Sym, Kind: Variant, Ctx);
177 else
178 Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
179 } else {
180 Add = MCConstantExpr::create(Value: SymbolicOp.AddSymbol.Value, Ctx);
181 }
182 }
183
184 const MCExpr *Sub = nullptr;
185 if (SymbolicOp.SubtractSymbol.Present) {
186 if (SymbolicOp.SubtractSymbol.Name) {
187 StringRef Name(SymbolicOp.SubtractSymbol.Name);
188 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
189 Sub = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
190 } else {
191 Sub = MCConstantExpr::create(Value: SymbolicOp.SubtractSymbol.Value, Ctx);
192 }
193 }
194
195 const MCExpr *Off = nullptr;
196 if (SymbolicOp.Value != 0)
197 Off = MCConstantExpr::create(Value: SymbolicOp.Value, Ctx);
198
199 const MCExpr *Expr;
200 if (Sub) {
201 const MCExpr *LHS;
202 if (Add)
203 LHS = MCBinaryExpr::createSub(LHS: Add, RHS: Sub, Ctx);
204 else
205 LHS = MCUnaryExpr::createMinus(Expr: Sub, Ctx);
206 if (Off)
207 Expr = MCBinaryExpr::createAdd(LHS, RHS: Off, Ctx);
208 else
209 Expr = LHS;
210 } else if (Add) {
211 if (Off)
212 Expr = MCBinaryExpr::createAdd(LHS: Add, RHS: Off, Ctx);
213 else
214 Expr = Add;
215 } else {
216 if (Off)
217 Expr = Off;
218 else
219 Expr = MCConstantExpr::create(Value: 0, Ctx);
220 }
221
222 MI.addOperand(Op: MCOperand::createExpr(Val: Expr));
223
224 return true;
225}
226