1//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MCTargetDesc/BPFMCTargetDesc.h"
10#include "TargetInfo/BPFTargetInfo.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/StringSwitch.h"
13#include "llvm/MC/MCContext.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCInstrInfo.h"
17#include "llvm/MC/MCParser/MCAsmLexer.h"
18#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19#include "llvm/MC/MCParser/MCTargetAsmParser.h"
20#include "llvm/MC/MCRegisterInfo.h"
21#include "llvm/MC/MCStreamer.h"
22#include "llvm/MC/MCSubtargetInfo.h"
23#include "llvm/MC/TargetRegistry.h"
24#include "llvm/Support/Casting.h"
25
26using namespace llvm;
27
28namespace {
29struct BPFOperand;
30
31class BPFAsmParser : public MCTargetAsmParser {
32
33 SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34
35 bool PreMatchCheck(OperandVector &Operands);
36
37 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38 OperandVector &Operands, MCStreamer &Out,
39 uint64_t &ErrorInfo,
40 bool MatchingInlineAsm) override;
41
42 bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
44 SMLoc &EndLoc) override;
45
46 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
47 SMLoc NameLoc, OperandVector &Operands) override;
48
49 // "=" is used as assignment operator for assembly statment, so can't be used
50 // for symbol assignment.
51 bool equalIsAsmAssignment() override { return false; }
52 // "*" is used for dereferencing memory that it will be the start of
53 // statement.
54 bool starIsStartOfStatement() override { return true; }
55
56#define GET_ASSEMBLER_HEADER
57#include "BPFGenAsmMatcher.inc"
58
59 ParseStatus parseImmediate(OperandVector &Operands);
60 ParseStatus parseRegister(OperandVector &Operands);
61 ParseStatus parseOperandAsOperator(OperandVector &Operands);
62
63public:
64 enum BPFMatchResultTy {
65 Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
66#define GET_OPERAND_DIAGNOSTIC_TYPES
67#include "BPFGenAsmMatcher.inc"
68#undef GET_OPERAND_DIAGNOSTIC_TYPES
69 };
70
71 BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
72 const MCInstrInfo &MII, const MCTargetOptions &Options)
73 : MCTargetAsmParser(Options, STI, MII) {
74 setAvailableFeatures(ComputeAvailableFeatures(FB: STI.getFeatureBits()));
75 }
76};
77
78/// BPFOperand - Instances of this class represent a parsed machine
79/// instruction
80struct BPFOperand : public MCParsedAsmOperand {
81
82 enum KindTy {
83 Token,
84 Register,
85 Immediate,
86 } Kind;
87
88 struct RegOp {
89 unsigned RegNum;
90 };
91
92 struct ImmOp {
93 const MCExpr *Val;
94 };
95
96 SMLoc StartLoc, EndLoc;
97 union {
98 StringRef Tok;
99 RegOp Reg;
100 ImmOp Imm;
101 };
102
103 BPFOperand(KindTy K) : Kind(K) {}
104
105public:
106 BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
107 Kind = o.Kind;
108 StartLoc = o.StartLoc;
109 EndLoc = o.EndLoc;
110
111 switch (Kind) {
112 case Register:
113 Reg = o.Reg;
114 break;
115 case Immediate:
116 Imm = o.Imm;
117 break;
118 case Token:
119 Tok = o.Tok;
120 break;
121 }
122 }
123
124 bool isToken() const override { return Kind == Token; }
125 bool isReg() const override { return Kind == Register; }
126 bool isImm() const override { return Kind == Immediate; }
127 bool isMem() const override { return false; }
128
129 bool isConstantImm() const {
130 return isImm() && isa<MCConstantExpr>(Val: getImm());
131 }
132
133 int64_t getConstantImm() const {
134 const MCExpr *Val = getImm();
135 return static_cast<const MCConstantExpr *>(Val)->getValue();
136 }
137
138 bool isSImm16() const {
139 return (isConstantImm() && isInt<16>(x: getConstantImm()));
140 }
141
142 bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(Val: getImm()); }
143
144 bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
145
146 /// getStartLoc - Gets location of the first token of this operand
147 SMLoc getStartLoc() const override { return StartLoc; }
148 /// getEndLoc - Gets location of the last token of this operand
149 SMLoc getEndLoc() const override { return EndLoc; }
150
151 MCRegister getReg() const override {
152 assert(Kind == Register && "Invalid type access!");
153 return Reg.RegNum;
154 }
155
156 const MCExpr *getImm() const {
157 assert(Kind == Immediate && "Invalid type access!");
158 return Imm.Val;
159 }
160
161 StringRef getToken() const {
162 assert(Kind == Token && "Invalid type access!");
163 return Tok;
164 }
165
166 void print(raw_ostream &OS) const override {
167 switch (Kind) {
168 case Immediate:
169 OS << *getImm();
170 break;
171 case Register:
172 OS << "<register x";
173 OS << getReg() << ">";
174 break;
175 case Token:
176 OS << "'" << getToken() << "'";
177 break;
178 }
179 }
180
181 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
182 assert(Expr && "Expr shouldn't be null!");
183
184 if (auto *CE = dyn_cast<MCConstantExpr>(Val: Expr))
185 Inst.addOperand(Op: MCOperand::createImm(Val: CE->getValue()));
186 else
187 Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
188 }
189
190 // Used by the TableGen Code
191 void addRegOperands(MCInst &Inst, unsigned N) const {
192 assert(N == 1 && "Invalid number of operands!");
193 Inst.addOperand(Op: MCOperand::createReg(Reg: getReg()));
194 }
195
196 void addImmOperands(MCInst &Inst, unsigned N) const {
197 assert(N == 1 && "Invalid number of operands!");
198 addExpr(Inst, Expr: getImm());
199 }
200
201 static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
202 auto Op = std::make_unique<BPFOperand>(args: Token);
203 Op->Tok = Str;
204 Op->StartLoc = S;
205 Op->EndLoc = S;
206 return Op;
207 }
208
209 static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
210 SMLoc E) {
211 auto Op = std::make_unique<BPFOperand>(args: Register);
212 Op->Reg.RegNum = RegNo;
213 Op->StartLoc = S;
214 Op->EndLoc = E;
215 return Op;
216 }
217
218 static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
219 SMLoc E) {
220 auto Op = std::make_unique<BPFOperand>(args: Immediate);
221 Op->Imm.Val = Val;
222 Op->StartLoc = S;
223 Op->EndLoc = E;
224 return Op;
225 }
226
227 // Identifiers that can be used at the start of a statment.
228 static bool isValidIdAtStart(StringRef Name) {
229 return StringSwitch<bool>(Name.lower())
230 .Case(S: "if", Value: true)
231 .Case(S: "call", Value: true)
232 .Case(S: "callx", Value: true)
233 .Case(S: "goto", Value: true)
234 .Case(S: "gotol", Value: true)
235 .Case(S: "may_goto", Value: true)
236 .Case(S: "*", Value: true)
237 .Case(S: "exit", Value: true)
238 .Case(S: "lock", Value: true)
239 .Case(S: "ld_pseudo", Value: true)
240 .Default(Value: false);
241 }
242
243 // Identifiers that can be used in the middle of a statment.
244 static bool isValidIdInMiddle(StringRef Name) {
245 return StringSwitch<bool>(Name.lower())
246 .Case(S: "u64", Value: true)
247 .Case(S: "u32", Value: true)
248 .Case(S: "u16", Value: true)
249 .Case(S: "u8", Value: true)
250 .Case(S: "s32", Value: true)
251 .Case(S: "s16", Value: true)
252 .Case(S: "s8", Value: true)
253 .Case(S: "be64", Value: true)
254 .Case(S: "be32", Value: true)
255 .Case(S: "be16", Value: true)
256 .Case(S: "le64", Value: true)
257 .Case(S: "le32", Value: true)
258 .Case(S: "le16", Value: true)
259 .Case(S: "bswap16", Value: true)
260 .Case(S: "bswap32", Value: true)
261 .Case(S: "bswap64", Value: true)
262 .Case(S: "goto", Value: true)
263 .Case(S: "gotol", Value: true)
264 .Case(S: "ll", Value: true)
265 .Case(S: "skb", Value: true)
266 .Case(S: "s", Value: true)
267 .Case(S: "atomic_fetch_add", Value: true)
268 .Case(S: "atomic_fetch_and", Value: true)
269 .Case(S: "atomic_fetch_or", Value: true)
270 .Case(S: "atomic_fetch_xor", Value: true)
271 .Case(S: "xchg_64", Value: true)
272 .Case(S: "xchg32_32", Value: true)
273 .Case(S: "cmpxchg_64", Value: true)
274 .Case(S: "cmpxchg32_32", Value: true)
275 .Case(S: "addr_space_cast", Value: true)
276 .Default(Value: false);
277 }
278};
279} // end anonymous namespace.
280
281#define GET_REGISTER_MATCHER
282#define GET_MATCHER_IMPLEMENTATION
283#include "BPFGenAsmMatcher.inc"
284
285bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
286
287 if (Operands.size() == 4) {
288 // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
289 // reg1 must be the same as reg2
290 BPFOperand &Op0 = (BPFOperand &)*Operands[0];
291 BPFOperand &Op1 = (BPFOperand &)*Operands[1];
292 BPFOperand &Op2 = (BPFOperand &)*Operands[2];
293 BPFOperand &Op3 = (BPFOperand &)*Operands[3];
294 if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
295 && Op1.getToken() == "="
296 && (Op2.getToken() == "-" || Op2.getToken() == "be16"
297 || Op2.getToken() == "be32" || Op2.getToken() == "be64"
298 || Op2.getToken() == "le16" || Op2.getToken() == "le32"
299 || Op2.getToken() == "le64")
300 && Op0.getReg() != Op3.getReg())
301 return true;
302 }
303
304 return false;
305}
306
307bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
308 OperandVector &Operands,
309 MCStreamer &Out, uint64_t &ErrorInfo,
310 bool MatchingInlineAsm) {
311 MCInst Inst;
312 SMLoc ErrorLoc;
313
314 if (PreMatchCheck(Operands))
315 return Error(L: IDLoc, Msg: "additional inst constraint not met");
316
317 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, matchingInlineAsm: MatchingInlineAsm)) {
318 default:
319 break;
320 case Match_Success:
321 Inst.setLoc(IDLoc);
322 Out.emitInstruction(Inst, STI: getSTI());
323 return false;
324 case Match_MissingFeature:
325 return Error(L: IDLoc, Msg: "instruction use requires an option to be enabled");
326 case Match_MnemonicFail:
327 return Error(L: IDLoc, Msg: "unrecognized instruction mnemonic");
328 case Match_InvalidOperand:
329 ErrorLoc = IDLoc;
330
331 if (ErrorInfo != ~0U) {
332 if (ErrorInfo >= Operands.size())
333 return Error(L: ErrorLoc, Msg: "too few operands for instruction");
334
335 ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
336
337 if (ErrorLoc == SMLoc())
338 ErrorLoc = IDLoc;
339 }
340
341 return Error(L: ErrorLoc, Msg: "invalid operand for instruction");
342 case Match_InvalidBrTarget:
343 return Error(L: Operands[ErrorInfo]->getStartLoc(),
344 Msg: "operand is not an identifier or 16-bit signed integer");
345 case Match_InvalidSImm16:
346 return Error(L: Operands[ErrorInfo]->getStartLoc(),
347 Msg: "operand is not a 16-bit signed integer");
348 }
349
350 llvm_unreachable("Unknown match type detected!");
351}
352
353bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
354 SMLoc &EndLoc) {
355 if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
356 return Error(L: StartLoc, Msg: "invalid register name");
357 return false;
358}
359
360ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
361 SMLoc &EndLoc) {
362 const AsmToken &Tok = getParser().getTok();
363 StartLoc = Tok.getLoc();
364 EndLoc = Tok.getEndLoc();
365 Reg = BPF::NoRegister;
366 StringRef Name = getLexer().getTok().getIdentifier();
367
368 if (!MatchRegisterName(Name)) {
369 getParser().Lex(); // Eat identifier token.
370 return ParseStatus::Success;
371 }
372
373 return ParseStatus::NoMatch;
374}
375
376ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
377 SMLoc S = getLoc();
378
379 if (getLexer().getKind() == AsmToken::Identifier) {
380 StringRef Name = getLexer().getTok().getIdentifier();
381
382 if (BPFOperand::isValidIdInMiddle(Name)) {
383 getLexer().Lex();
384 Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S));
385 return ParseStatus::Success;
386 }
387
388 return ParseStatus::NoMatch;
389 }
390
391 switch (getLexer().getKind()) {
392 case AsmToken::Minus:
393 case AsmToken::Plus: {
394 if (getLexer().peekTok().is(K: AsmToken::Integer))
395 return ParseStatus::NoMatch;
396 [[fallthrough]];
397 }
398
399 case AsmToken::Equal:
400 case AsmToken::Greater:
401 case AsmToken::Less:
402 case AsmToken::Pipe:
403 case AsmToken::Star:
404 case AsmToken::LParen:
405 case AsmToken::RParen:
406 case AsmToken::LBrac:
407 case AsmToken::RBrac:
408 case AsmToken::Slash:
409 case AsmToken::Amp:
410 case AsmToken::Percent:
411 case AsmToken::Caret: {
412 StringRef Name = getLexer().getTok().getString();
413 getLexer().Lex();
414 Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S));
415
416 return ParseStatus::Success;
417 }
418
419 case AsmToken::EqualEqual:
420 case AsmToken::ExclaimEqual:
421 case AsmToken::GreaterEqual:
422 case AsmToken::GreaterGreater:
423 case AsmToken::LessEqual:
424 case AsmToken::LessLess: {
425 Operands.push_back(Elt: BPFOperand::createToken(
426 Str: getLexer().getTok().getString().substr(Start: 0, N: 1), S));
427 Operands.push_back(Elt: BPFOperand::createToken(
428 Str: getLexer().getTok().getString().substr(Start: 1, N: 1), S));
429 getLexer().Lex();
430
431 return ParseStatus::Success;
432 }
433
434 default:
435 break;
436 }
437
438 return ParseStatus::NoMatch;
439}
440
441ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
442 SMLoc S = getLoc();
443 SMLoc E = SMLoc::getFromPointer(Ptr: S.getPointer() - 1);
444
445 switch (getLexer().getKind()) {
446 default:
447 return ParseStatus::NoMatch;
448 case AsmToken::Identifier:
449 StringRef Name = getLexer().getTok().getIdentifier();
450 unsigned RegNo = MatchRegisterName(Name);
451
452 if (RegNo == 0)
453 return ParseStatus::NoMatch;
454
455 getLexer().Lex();
456 Operands.push_back(Elt: BPFOperand::createReg(RegNo, S, E));
457 }
458 return ParseStatus::Success;
459}
460
461ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
462 switch (getLexer().getKind()) {
463 default:
464 return ParseStatus::NoMatch;
465 case AsmToken::LParen:
466 case AsmToken::Minus:
467 case AsmToken::Plus:
468 case AsmToken::Integer:
469 case AsmToken::String:
470 case AsmToken::Identifier:
471 break;
472 }
473
474 const MCExpr *IdVal;
475 SMLoc S = getLoc();
476
477 if (getParser().parseExpression(Res&: IdVal))
478 return ParseStatus::Failure;
479
480 SMLoc E = SMLoc::getFromPointer(Ptr: S.getPointer() - 1);
481 Operands.push_back(Elt: BPFOperand::createImm(Val: IdVal, S, E));
482
483 return ParseStatus::Success;
484}
485
486/// ParseInstruction - Parse an BPF instruction which is in BPF verifier
487/// format.
488bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
489 SMLoc NameLoc, OperandVector &Operands) {
490 // The first operand could be either register or actually an operator.
491 unsigned RegNo = MatchRegisterName(Name);
492
493 if (RegNo != 0) {
494 SMLoc E = SMLoc::getFromPointer(Ptr: NameLoc.getPointer() - 1);
495 Operands.push_back(Elt: BPFOperand::createReg(RegNo, S: NameLoc, E));
496 } else if (BPFOperand::isValidIdAtStart (Name))
497 Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S: NameLoc));
498 else
499 return Error(L: NameLoc, Msg: "invalid register/token name");
500
501 while (!getLexer().is(K: AsmToken::EndOfStatement)) {
502 // Attempt to parse token as operator
503 if (parseOperandAsOperator(Operands).isSuccess())
504 continue;
505
506 // Attempt to parse token as register
507 if (parseRegister(Operands).isSuccess())
508 continue;
509
510 if (getLexer().is(K: AsmToken::Comma)) {
511 getLexer().Lex();
512 continue;
513 }
514
515 // Attempt to parse token as an immediate
516 if (!parseImmediate(Operands).isSuccess()) {
517 SMLoc Loc = getLexer().getLoc();
518 return Error(L: Loc, Msg: "unexpected token");
519 }
520 }
521
522 if (getLexer().isNot(K: AsmToken::EndOfStatement)) {
523 SMLoc Loc = getLexer().getLoc();
524
525 getParser().eatToEndOfStatement();
526
527 return Error(L: Loc, Msg: "unexpected token");
528 }
529
530 // Consume the EndOfStatement.
531 getParser().Lex();
532 return false;
533}
534
535extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
536 RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
537 RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
538 RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
539}
540