1//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class implements the disassembler of strings of bytes written in
10// hexadecimal, from standard input or from a file.
11//
12//===----------------------------------------------------------------------===//
13
14#include "Disassembler.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCObjectFileInfo.h"
21#include "llvm/MC/MCRegisterInfo.h"
22#include "llvm/MC/MCStreamer.h"
23#include "llvm/MC/MCSubtargetInfo.h"
24#include "llvm/MC/TargetRegistry.h"
25#include "llvm/Support/MemoryBuffer.h"
26#include "llvm/Support/SourceMgr.h"
27#include "llvm/Support/TimeProfiler.h"
28#include "llvm/Support/raw_ostream.h"
29#include "llvm/TargetParser/Triple.h"
30
31using namespace llvm;
32
33typedef std::pair<std::vector<unsigned char>, std::vector<const char *>>
34 ByteArrayTy;
35
36static MCDisassembler::DecodeStatus getInstruction(const MCDisassembler &DisAsm,
37 const MCSubtargetInfo &STI,
38 MCInst &Inst, uint64_t &Size,
39 ArrayRef<uint8_t> Bytes,
40 uint64_t Address) {
41 if (STI.getTargetTriple().getArch() == Triple::hexagon)
42 return DisAsm.getInstructionBundle(Instr&: Inst, Size, Bytes, Address, CStream&: nulls());
43 return DisAsm.getInstruction(Instr&: Inst, Size, Bytes, Address, CStream&: nulls());
44}
45
46static bool printInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes,
47 SourceMgr &SM, MCStreamer &Streamer, bool InAtomicBlock,
48 const MCSubtargetInfo &STI, unsigned NumBenchmarkRuns) {
49 ArrayRef<uint8_t> Data(Bytes.first);
50
51 // Disassemble it to strings.
52 uint64_t Size;
53
54 for (uint64_t Index = 0; Index < Bytes.first.size(); Index += Size) {
55
56 MCInst Inst;
57 MCDisassembler::DecodeStatus S =
58 getInstruction(DisAsm, STI, Inst, Size, Bytes: Data.slice(N: Index), Address: Index);
59 switch (S) {
60 case MCDisassembler::Fail:
61 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Bytes.second[Index]),
62 Kind: SourceMgr::DK_Warning,
63 Msg: "invalid instruction encoding");
64 // Don't try to resynchronise the stream in a block
65 if (InAtomicBlock)
66 return true;
67
68 if (Size == 0)
69 Size = 1; // skip illegible bytes
70
71 break;
72
73 case MCDisassembler::SoftFail:
74 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Bytes.second[Index]),
75 Kind: SourceMgr::DK_Warning,
76 Msg: "potentially undefined instruction encoding");
77 [[fallthrough]];
78
79 case MCDisassembler::Success:
80 Streamer.emitInstruction(Inst, STI);
81 break;
82 }
83
84 if (S == MCDisassembler::Success && NumBenchmarkRuns != 0) {
85 // Benchmark mode, collect timing for decoding the instruction several
86 // times.
87 MCInst BMInst;
88 TimeTraceScope timeScope("getInstruction");
89 for (unsigned I = 0; I < NumBenchmarkRuns; ++I) {
90 BMInst.clear();
91 BMInst.setOpcode(0);
92 S = getInstruction(DisAsm, STI, Inst&: BMInst, Size, Bytes: Data.slice(N: Index), Address: Index);
93 }
94 }
95 }
96
97 return false;
98}
99
100static bool SkipToToken(StringRef &Str) {
101 for (;;) {
102 if (Str.empty())
103 return false;
104
105 // Strip horizontal whitespace and commas.
106 if (size_t Pos = Str.find_first_not_of(Chars: " \t\r\n,")) {
107 Str = Str.substr(Start: Pos);
108 continue;
109 }
110
111 // If this is the start of a comment, remove the rest of the line.
112 if (Str[0] == '#') {
113 Str = Str.substr(Start: Str.find_first_of(C: '\n'));
114 continue;
115 }
116 return true;
117 }
118}
119
120static bool byteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str,
121 SourceMgr &SM, bool HexBytes) {
122 while (SkipToToken(Str)) {
123 // Handled by higher level
124 if (Str[0] == '[' || Str[0] == ']')
125 return false;
126
127 // Get the current token.
128 size_t Next = Str.find_first_of(Chars: " \t\n\r,#[]");
129 StringRef Value = Str.substr(Start: 0, N: Next);
130
131 // Convert to a byte and add to the byte vector.
132 unsigned ByteVal;
133 if (HexBytes) {
134 if (Next < 2) {
135 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Value.data()),
136 Kind: SourceMgr::DK_Error, Msg: "expected two hex digits");
137 Str = Str.substr(Start: Next);
138 return true;
139 }
140 Next = 2;
141 unsigned C0 = hexDigitValue(C: Value[0]);
142 unsigned C1 = hexDigitValue(C: Value[1]);
143 if (C0 == -1u || C1 == -1u) {
144 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Value.data()),
145 Kind: SourceMgr::DK_Error, Msg: "invalid input token");
146 Str = Str.substr(Start: Next);
147 return true;
148 }
149 ByteVal = C0 * 16 + C1;
150 } else if (Value.getAsInteger(Radix: 0, Result&: ByteVal) || ByteVal > 255) {
151 // If we have an error, print it and skip to the end of line.
152 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Value.data()), Kind: SourceMgr::DK_Error,
153 Msg: "invalid input token");
154 Str = Str.substr(Start: Str.find(C: '\n'));
155 ByteArray.first.clear();
156 ByteArray.second.clear();
157 continue;
158 }
159
160 ByteArray.first.push_back(x: ByteVal);
161 ByteArray.second.push_back(x: Value.data());
162 Str = Str.substr(Start: Next);
163 }
164
165 return false;
166}
167
168int Disassembler::disassemble(const Target &T, MCSubtargetInfo &STI,
169 MCStreamer &Streamer, MemoryBuffer &Buffer,
170 SourceMgr &SM, MCContext &Ctx,
171 const MCTargetOptions &MCOptions, bool HexBytes,
172 unsigned NumBenchmarkRuns) {
173 const Triple &TheTriple = STI.getTargetTriple();
174 std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(TT: TheTriple));
175 if (!MRI) {
176 errs() << "error: no register info for target " << TheTriple.str() << '\n';
177 return -1;
178 }
179
180 std::unique_ptr<const MCAsmInfo> MAI(
181 T.createMCAsmInfo(MRI: *MRI, TheTriple, Options: MCOptions));
182 if (!MAI) {
183 errs() << "error: no assembly info for target " << TheTriple.str() << '\n';
184 return -1;
185 }
186
187 std::unique_ptr<const MCDisassembler> DisAsm(
188 T.createMCDisassembler(STI, Ctx));
189 if (!DisAsm) {
190 errs() << "error: no disassembler for target " << TheTriple.str() << '\n';
191 return -1;
192 }
193
194 bool ErrorOccurred = false;
195
196 // Convert the input to a vector for disassembly.
197 ByteArrayTy ByteArray;
198 StringRef Str = Buffer.getBuffer();
199 bool InAtomicBlock = false;
200
201 while (SkipToToken(Str)) {
202 ByteArray.first.clear();
203 ByteArray.second.clear();
204
205 if (Str[0] == '[') {
206 if (InAtomicBlock) {
207 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Str.data()), Kind: SourceMgr::DK_Error,
208 Msg: "nested atomic blocks make no sense");
209 ErrorOccurred = true;
210 }
211 InAtomicBlock = true;
212 Str = Str.drop_front();
213 continue;
214 } else if (Str[0] == ']') {
215 if (!InAtomicBlock) {
216 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Str.data()), Kind: SourceMgr::DK_Error,
217 Msg: "attempt to close atomic block without opening");
218 ErrorOccurred = true;
219 }
220 InAtomicBlock = false;
221 Str = Str.drop_front();
222 continue;
223 }
224
225 // It's a real token, get the bytes and emit them
226 ErrorOccurred |= byteArrayFromString(ByteArray, Str, SM, HexBytes);
227
228 if (!ByteArray.first.empty())
229 ErrorOccurred |= printInsts(DisAsm: *DisAsm, Bytes: ByteArray, SM, Streamer,
230 InAtomicBlock, STI, NumBenchmarkRuns);
231 }
232
233 if (InAtomicBlock) {
234 SM.PrintMessage(Loc: SMLoc::getFromPointer(Ptr: Str.data()), Kind: SourceMgr::DK_Error,
235 Msg: "unclosed atomic block");
236 ErrorOccurred = true;
237 }
238
239 return ErrorOccurred;
240}
241