1//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file is part of the WebAssembly Disassembler.
11///
12/// It contains code to translate the data produced by the decoder into
13/// MCInsts.
14///
15//===----------------------------------------------------------------------===//
16
17#include "MCTargetDesc/WebAssemblyMCAsmInfo.h"
18#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
19#include "TargetInfo/WebAssemblyTargetInfo.h"
20#include "llvm/BinaryFormat/Wasm.h"
21#include "llvm/MC/MCContext.h"
22#include "llvm/MC/MCDisassembler/MCDisassembler.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/MC/MCInstrInfo.h"
25#include "llvm/MC/MCSubtargetInfo.h"
26#include "llvm/MC/MCSymbol.h"
27#include "llvm/MC/MCSymbolWasm.h"
28#include "llvm/MC/TargetRegistry.h"
29#include "llvm/Support/Casting.h"
30#include "llvm/Support/Compiler.h"
31#include "llvm/Support/Endian.h"
32#include "llvm/Support/LEB128.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "wasm-disassembler"
37
38using DecodeStatus = MCDisassembler::DecodeStatus;
39
40#include "WebAssemblyGenDisassemblerTables.inc"
41
42static constexpr int WebAssemblyInstructionTableSize = 256;
43
44namespace {
45class WebAssemblyDisassembler final : public MCDisassembler {
46 std::unique_ptr<const MCInstrInfo> MCII;
47
48 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
49 ArrayRef<uint8_t> Bytes, uint64_t Address,
50 raw_ostream &CStream) const override;
51
52 Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
53 ArrayRef<uint8_t> Bytes,
54 uint64_t Address) const override;
55
56public:
57 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
58 std::unique_ptr<const MCInstrInfo> MCII)
59 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
60};
61} // end anonymous namespace
62
63static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
64 const MCSubtargetInfo &STI,
65 MCContext &Ctx) {
66 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
67 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
68}
69
70extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
71LLVMInitializeWebAssemblyDisassembler() {
72 // Register the disassembler for each target.
73 TargetRegistry::RegisterMCDisassembler(T&: getTheWebAssemblyTarget32(),
74 Fn: createWebAssemblyDisassembler);
75 TargetRegistry::RegisterMCDisassembler(T&: getTheWebAssemblyTarget64(),
76 Fn: createWebAssemblyDisassembler);
77}
78
79static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
80 if (Size >= Bytes.size())
81 return -1;
82 auto V = Bytes[Size];
83 Size++;
84 return V;
85}
86
87static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
88 bool Signed) {
89 unsigned N = 0;
90 const char *Error = nullptr;
91 Val = Signed ? decodeSLEB128(p: Bytes.data() + Size, n: &N,
92 end: Bytes.data() + Bytes.size(), error: &Error)
93 : static_cast<int64_t>(decodeULEB128(p: Bytes.data() + Size, n: &N,
94 end: Bytes.data() + Bytes.size(),
95 error: &Error));
96 if (Error)
97 return false;
98 Size += N;
99 return true;
100}
101
102static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
103 ArrayRef<uint8_t> Bytes, bool Signed) {
104 int64_t Val;
105 if (!nextLEB(Val, Bytes, Size, Signed))
106 return false;
107 MI.addOperand(Op: MCOperand::createImm(Val));
108 return true;
109}
110
111template <typename T>
112bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
113 if (Size + sizeof(T) > Bytes.size())
114 return false;
115 T Val =
116 support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size);
117 Size += sizeof(T);
118 if (std::is_floating_point<T>::value) {
119 MI.addOperand(
120 Op: MCOperand::createDFPImm(Val: bit_cast<uint64_t>(from: static_cast<double>(Val))));
121 } else {
122 MI.addOperand(Op: MCOperand::createImm(Val: static_cast<int64_t>(Val)));
123 }
124 return true;
125}
126
127Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
128 uint64_t &Size,
129 ArrayRef<uint8_t> Bytes,
130 uint64_t Address) const {
131 Size = 0;
132 if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) {
133 // Start of a code section: we're parsing only the function count.
134 int64_t FunctionCount;
135 if (!nextLEB(Val&: FunctionCount, Bytes, Size, Signed: false))
136 return false;
137 outs() << " # " << FunctionCount << " functions in section.";
138 } else {
139 // Parse the start of a single function.
140 int64_t BodySize, LocalEntryCount;
141 if (!nextLEB(Val&: BodySize, Bytes, Size, Signed: false) ||
142 !nextLEB(Val&: LocalEntryCount, Bytes, Size, Signed: false))
143 return false;
144 if (LocalEntryCount) {
145 outs() << " .local ";
146 for (int64_t I = 0; I < LocalEntryCount; I++) {
147 int64_t Count, Type;
148 if (!nextLEB(Val&: Count, Bytes, Size, Signed: false) ||
149 !nextLEB(Val&: Type, Bytes, Size, Signed: false))
150 return false;
151 for (int64_t J = 0; J < Count; J++) {
152 if (I || J)
153 outs() << ", ";
154 outs() << WebAssembly::anyTypeToString(Type);
155 }
156 }
157 }
158 }
159 outs() << "\n";
160 return true;
161}
162
163MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
164 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
165 raw_ostream &CS) const {
166 CommentStream = &CS;
167 Size = 0;
168 int Opc = nextByte(Bytes, Size);
169 if (Opc < 0)
170 return MCDisassembler::Fail;
171 const auto *WasmInst = &InstructionTable0[Opc];
172 // If this is a prefix byte, indirect to another table.
173 if (WasmInst->ET == ET_Prefix) {
174 WasmInst = nullptr;
175 // Linear search, so far only 4 entries.
176 for (const auto &[Prefix, Table] : PrefixTable) {
177 if (Prefix == Opc) {
178 WasmInst = Table;
179 break;
180 }
181 }
182 if (!WasmInst)
183 return MCDisassembler::Fail;
184 int64_t PrefixedOpc;
185 if (!nextLEB(Val&: PrefixedOpc, Bytes, Size, Signed: false))
186 return MCDisassembler::Fail;
187 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
188 return MCDisassembler::Fail;
189 WasmInst += PrefixedOpc;
190 }
191 if (WasmInst->ET == ET_Unused)
192 return MCDisassembler::Fail;
193 // At this point we must have a valid instruction to decode.
194 assert(WasmInst->ET == ET_Instruction);
195 MI.setOpcode(WasmInst->Opcode);
196 // Parse any operands.
197 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
198 auto OT = OperandTable[WasmInst->OperandStart + OPI];
199 switch (OT) {
200 // ULEB operands:
201 case WebAssembly::OPERAND_BASIC_BLOCK:
202 case WebAssembly::OPERAND_LOCAL:
203 case WebAssembly::OPERAND_GLOBAL:
204 case WebAssembly::OPERAND_FUNCTION32:
205 case WebAssembly::OPERAND_TABLE:
206 case WebAssembly::OPERAND_OFFSET32:
207 case WebAssembly::OPERAND_OFFSET64:
208 case WebAssembly::OPERAND_P2ALIGN:
209 case WebAssembly::OPERAND_TYPEINDEX:
210 case WebAssembly::OPERAND_TAG:
211 case MCOI::OPERAND_IMMEDIATE: {
212 if (!parseLEBImmediate(MI, Size, Bytes, Signed: false))
213 return MCDisassembler::Fail;
214 break;
215 }
216 // SLEB operands:
217 case WebAssembly::OPERAND_I32IMM:
218 case WebAssembly::OPERAND_I64IMM: {
219 if (!parseLEBImmediate(MI, Size, Bytes, Signed: true))
220 return MCDisassembler::Fail;
221 break;
222 }
223 // block_type operands:
224 case WebAssembly::OPERAND_SIGNATURE: {
225 int64_t Val;
226 uint64_t PrevSize = Size;
227 if (!nextLEB(Val, Bytes, Size, Signed: true))
228 return MCDisassembler::Fail;
229 if (Val < 0) {
230 // Negative values are single septet value types or empty types
231 if (Size != PrevSize + 1) {
232 MI.addOperand(
233 Op: MCOperand::createImm(Val: int64_t(WebAssembly::BlockType::Invalid)));
234 } else {
235 MI.addOperand(Op: MCOperand::createImm(Val: Val & 0x7f));
236 }
237 } else {
238 // We don't have access to the signature, so create a symbol without one
239 MCSymbol *Sym = getContext().createTempSymbol(Name: "typeindex", AlwaysAddSuffix: true);
240 auto *WasmSym = cast<MCSymbolWasm>(Val: Sym);
241 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
242 const MCExpr *Expr = MCSymbolRefExpr::create(
243 Symbol: WasmSym, specifier: WebAssembly::S_TYPEINDEX, Ctx&: getContext());
244 MI.addOperand(Op: MCOperand::createExpr(Val: Expr));
245 }
246 break;
247 }
248 // FP operands.
249 case WebAssembly::OPERAND_F32IMM: {
250 if (!parseImmediate<float>(MI, Size, Bytes))
251 return MCDisassembler::Fail;
252 break;
253 }
254 case WebAssembly::OPERAND_F64IMM: {
255 if (!parseImmediate<double>(MI, Size, Bytes))
256 return MCDisassembler::Fail;
257 break;
258 }
259 // Vector lane operands (not LEB encoded).
260 case WebAssembly::OPERAND_VEC_I8IMM: {
261 if (!parseImmediate<uint8_t>(MI, Size, Bytes))
262 return MCDisassembler::Fail;
263 break;
264 }
265 case WebAssembly::OPERAND_VEC_I16IMM: {
266 if (!parseImmediate<uint16_t>(MI, Size, Bytes))
267 return MCDisassembler::Fail;
268 break;
269 }
270 case WebAssembly::OPERAND_VEC_I32IMM: {
271 if (!parseImmediate<uint32_t>(MI, Size, Bytes))
272 return MCDisassembler::Fail;
273 break;
274 }
275 case WebAssembly::OPERAND_VEC_I64IMM: {
276 if (!parseImmediate<uint64_t>(MI, Size, Bytes))
277 return MCDisassembler::Fail;
278 break;
279 }
280 case WebAssembly::OPERAND_BRLIST: {
281 int64_t TargetTableLen;
282 if (!nextLEB(Val&: TargetTableLen, Bytes, Size, Signed: false))
283 return MCDisassembler::Fail;
284 for (int64_t I = 0; I < TargetTableLen; I++) {
285 if (!parseLEBImmediate(MI, Size, Bytes, Signed: false))
286 return MCDisassembler::Fail;
287 }
288 // Default case.
289 if (!parseLEBImmediate(MI, Size, Bytes, Signed: false))
290 return MCDisassembler::Fail;
291 break;
292 }
293 case WebAssembly::OPERAND_CATCH_LIST: {
294 if (!parseLEBImmediate(MI, Size, Bytes, Signed: false))
295 return MCDisassembler::Fail;
296 int64_t NumCatches = MI.getOperand(i: MI.getNumOperands() - 1).getImm();
297 for (int64_t I = 0; I < NumCatches; I++) {
298 if (!parseImmediate<uint8_t>(MI, Size, Bytes))
299 return MCDisassembler::Fail;
300 int64_t CatchOpcode = MI.getOperand(i: MI.getNumOperands() - 1).getImm();
301 if (CatchOpcode == wasm::WASM_OPCODE_CATCH ||
302 CatchOpcode == wasm::WASM_OPCODE_CATCH_REF) {
303 if (!parseLEBImmediate(MI, Size, Bytes, Signed: false)) // tag index
304 return MCDisassembler::Fail;
305 }
306 if (!parseLEBImmediate(MI, Size, Bytes, Signed: false)) // destination
307 return MCDisassembler::Fail;
308 }
309 break;
310 }
311 case MCOI::OPERAND_REGISTER:
312 // The tablegen header currently does not have any register operands since
313 // we use only the stack (_S) instructions.
314 // If you hit this that probably means a bad instruction definition in
315 // tablegen.
316 llvm_unreachable("Register operand in WebAssemblyDisassembler");
317 default:
318 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
319 }
320 }
321 return MCDisassembler::Success;
322}
323