1 | //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "MCTargetDesc/BPFMCTargetDesc.h" |
10 | #include "TargetInfo/BPFTargetInfo.h" |
11 | #include "llvm/ADT/STLExtras.h" |
12 | #include "llvm/ADT/StringSwitch.h" |
13 | #include "llvm/MC/MCContext.h" |
14 | #include "llvm/MC/MCExpr.h" |
15 | #include "llvm/MC/MCInst.h" |
16 | #include "llvm/MC/MCInstrInfo.h" |
17 | #include "llvm/MC/MCParser/MCAsmLexer.h" |
18 | #include "llvm/MC/MCParser/MCParsedAsmOperand.h" |
19 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
20 | #include "llvm/MC/MCRegisterInfo.h" |
21 | #include "llvm/MC/MCStreamer.h" |
22 | #include "llvm/MC/MCSubtargetInfo.h" |
23 | #include "llvm/MC/TargetRegistry.h" |
24 | #include "llvm/Support/Casting.h" |
25 | |
26 | using namespace llvm; |
27 | |
28 | namespace { |
29 | struct BPFOperand; |
30 | |
31 | class BPFAsmParser : public MCTargetAsmParser { |
32 | |
33 | SMLoc getLoc() const { return getParser().getTok().getLoc(); } |
34 | |
35 | bool PreMatchCheck(OperandVector &Operands); |
36 | |
37 | bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
38 | OperandVector &Operands, MCStreamer &Out, |
39 | uint64_t &ErrorInfo, |
40 | bool MatchingInlineAsm) override; |
41 | |
42 | bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override; |
43 | ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
44 | SMLoc &EndLoc) override; |
45 | |
46 | bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, |
47 | SMLoc NameLoc, OperandVector &Operands) override; |
48 | |
49 | // "=" is used as assignment operator for assembly statment, so can't be used |
50 | // for symbol assignment. |
51 | bool equalIsAsmAssignment() override { return false; } |
52 | // "*" is used for dereferencing memory that it will be the start of |
53 | // statement. |
54 | bool starIsStartOfStatement() override { return true; } |
55 | |
56 | #define |
57 | #include "BPFGenAsmMatcher.inc" |
58 | |
59 | ParseStatus parseImmediate(OperandVector &Operands); |
60 | ParseStatus parseRegister(OperandVector &Operands); |
61 | ParseStatus parseOperandAsOperator(OperandVector &Operands); |
62 | |
63 | public: |
64 | enum BPFMatchResultTy { |
65 | Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, |
66 | #define GET_OPERAND_DIAGNOSTIC_TYPES |
67 | #include "BPFGenAsmMatcher.inc" |
68 | #undef GET_OPERAND_DIAGNOSTIC_TYPES |
69 | }; |
70 | |
71 | BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, |
72 | const MCInstrInfo &MII, const MCTargetOptions &Options) |
73 | : MCTargetAsmParser(Options, STI, MII) { |
74 | setAvailableFeatures(ComputeAvailableFeatures(FB: STI.getFeatureBits())); |
75 | } |
76 | }; |
77 | |
78 | /// BPFOperand - Instances of this class represent a parsed machine |
79 | /// instruction |
80 | struct BPFOperand : public MCParsedAsmOperand { |
81 | |
82 | enum KindTy { |
83 | Token, |
84 | Register, |
85 | Immediate, |
86 | } Kind; |
87 | |
88 | struct RegOp { |
89 | unsigned RegNum; |
90 | }; |
91 | |
92 | struct ImmOp { |
93 | const MCExpr *Val; |
94 | }; |
95 | |
96 | SMLoc StartLoc, EndLoc; |
97 | union { |
98 | StringRef Tok; |
99 | RegOp Reg; |
100 | ImmOp Imm; |
101 | }; |
102 | |
103 | BPFOperand(KindTy K) : Kind(K) {} |
104 | |
105 | public: |
106 | BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() { |
107 | Kind = o.Kind; |
108 | StartLoc = o.StartLoc; |
109 | EndLoc = o.EndLoc; |
110 | |
111 | switch (Kind) { |
112 | case Register: |
113 | Reg = o.Reg; |
114 | break; |
115 | case Immediate: |
116 | Imm = o.Imm; |
117 | break; |
118 | case Token: |
119 | Tok = o.Tok; |
120 | break; |
121 | } |
122 | } |
123 | |
124 | bool isToken() const override { return Kind == Token; } |
125 | bool isReg() const override { return Kind == Register; } |
126 | bool isImm() const override { return Kind == Immediate; } |
127 | bool isMem() const override { return false; } |
128 | |
129 | bool isConstantImm() const { |
130 | return isImm() && isa<MCConstantExpr>(Val: getImm()); |
131 | } |
132 | |
133 | int64_t getConstantImm() const { |
134 | const MCExpr *Val = getImm(); |
135 | return static_cast<const MCConstantExpr *>(Val)->getValue(); |
136 | } |
137 | |
138 | bool isSImm16() const { |
139 | return (isConstantImm() && isInt<16>(x: getConstantImm())); |
140 | } |
141 | |
142 | bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(Val: getImm()); } |
143 | |
144 | bool isBrTarget() const { return isSymbolRef() || isSImm16(); } |
145 | |
146 | /// getStartLoc - Gets location of the first token of this operand |
147 | SMLoc getStartLoc() const override { return StartLoc; } |
148 | /// getEndLoc - Gets location of the last token of this operand |
149 | SMLoc getEndLoc() const override { return EndLoc; } |
150 | |
151 | MCRegister getReg() const override { |
152 | assert(Kind == Register && "Invalid type access!" ); |
153 | return Reg.RegNum; |
154 | } |
155 | |
156 | const MCExpr *getImm() const { |
157 | assert(Kind == Immediate && "Invalid type access!" ); |
158 | return Imm.Val; |
159 | } |
160 | |
161 | StringRef getToken() const { |
162 | assert(Kind == Token && "Invalid type access!" ); |
163 | return Tok; |
164 | } |
165 | |
166 | void print(raw_ostream &OS) const override { |
167 | switch (Kind) { |
168 | case Immediate: |
169 | OS << *getImm(); |
170 | break; |
171 | case Register: |
172 | OS << "<register x" ; |
173 | OS << getReg() << ">" ; |
174 | break; |
175 | case Token: |
176 | OS << "'" << getToken() << "'" ; |
177 | break; |
178 | } |
179 | } |
180 | |
181 | void addExpr(MCInst &Inst, const MCExpr *Expr) const { |
182 | assert(Expr && "Expr shouldn't be null!" ); |
183 | |
184 | if (auto *CE = dyn_cast<MCConstantExpr>(Val: Expr)) |
185 | Inst.addOperand(Op: MCOperand::createImm(Val: CE->getValue())); |
186 | else |
187 | Inst.addOperand(Op: MCOperand::createExpr(Val: Expr)); |
188 | } |
189 | |
190 | // Used by the TableGen Code |
191 | void addRegOperands(MCInst &Inst, unsigned N) const { |
192 | assert(N == 1 && "Invalid number of operands!" ); |
193 | Inst.addOperand(Op: MCOperand::createReg(Reg: getReg())); |
194 | } |
195 | |
196 | void addImmOperands(MCInst &Inst, unsigned N) const { |
197 | assert(N == 1 && "Invalid number of operands!" ); |
198 | addExpr(Inst, Expr: getImm()); |
199 | } |
200 | |
201 | static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) { |
202 | auto Op = std::make_unique<BPFOperand>(args: Token); |
203 | Op->Tok = Str; |
204 | Op->StartLoc = S; |
205 | Op->EndLoc = S; |
206 | return Op; |
207 | } |
208 | |
209 | static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S, |
210 | SMLoc E) { |
211 | auto Op = std::make_unique<BPFOperand>(args: Register); |
212 | Op->Reg.RegNum = RegNo; |
213 | Op->StartLoc = S; |
214 | Op->EndLoc = E; |
215 | return Op; |
216 | } |
217 | |
218 | static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S, |
219 | SMLoc E) { |
220 | auto Op = std::make_unique<BPFOperand>(args: Immediate); |
221 | Op->Imm.Val = Val; |
222 | Op->StartLoc = S; |
223 | Op->EndLoc = E; |
224 | return Op; |
225 | } |
226 | |
227 | // Identifiers that can be used at the start of a statment. |
228 | static bool isValidIdAtStart(StringRef Name) { |
229 | return StringSwitch<bool>(Name.lower()) |
230 | .Case(S: "if" , Value: true) |
231 | .Case(S: "call" , Value: true) |
232 | .Case(S: "callx" , Value: true) |
233 | .Case(S: "goto" , Value: true) |
234 | .Case(S: "gotol" , Value: true) |
235 | .Case(S: "may_goto" , Value: true) |
236 | .Case(S: "*" , Value: true) |
237 | .Case(S: "exit" , Value: true) |
238 | .Case(S: "lock" , Value: true) |
239 | .Case(S: "ld_pseudo" , Value: true) |
240 | .Default(Value: false); |
241 | } |
242 | |
243 | // Identifiers that can be used in the middle of a statment. |
244 | static bool isValidIdInMiddle(StringRef Name) { |
245 | return StringSwitch<bool>(Name.lower()) |
246 | .Case(S: "u64" , Value: true) |
247 | .Case(S: "u32" , Value: true) |
248 | .Case(S: "u16" , Value: true) |
249 | .Case(S: "u8" , Value: true) |
250 | .Case(S: "s32" , Value: true) |
251 | .Case(S: "s16" , Value: true) |
252 | .Case(S: "s8" , Value: true) |
253 | .Case(S: "be64" , Value: true) |
254 | .Case(S: "be32" , Value: true) |
255 | .Case(S: "be16" , Value: true) |
256 | .Case(S: "le64" , Value: true) |
257 | .Case(S: "le32" , Value: true) |
258 | .Case(S: "le16" , Value: true) |
259 | .Case(S: "bswap16" , Value: true) |
260 | .Case(S: "bswap32" , Value: true) |
261 | .Case(S: "bswap64" , Value: true) |
262 | .Case(S: "goto" , Value: true) |
263 | .Case(S: "gotol" , Value: true) |
264 | .Case(S: "ll" , Value: true) |
265 | .Case(S: "skb" , Value: true) |
266 | .Case(S: "s" , Value: true) |
267 | .Case(S: "atomic_fetch_add" , Value: true) |
268 | .Case(S: "atomic_fetch_and" , Value: true) |
269 | .Case(S: "atomic_fetch_or" , Value: true) |
270 | .Case(S: "atomic_fetch_xor" , Value: true) |
271 | .Case(S: "xchg_64" , Value: true) |
272 | .Case(S: "xchg32_32" , Value: true) |
273 | .Case(S: "cmpxchg_64" , Value: true) |
274 | .Case(S: "cmpxchg32_32" , Value: true) |
275 | .Case(S: "addr_space_cast" , Value: true) |
276 | .Default(Value: false); |
277 | } |
278 | }; |
279 | } // end anonymous namespace. |
280 | |
281 | #define GET_REGISTER_MATCHER |
282 | #define GET_MATCHER_IMPLEMENTATION |
283 | #include "BPFGenAsmMatcher.inc" |
284 | |
285 | bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) { |
286 | |
287 | if (Operands.size() == 4) { |
288 | // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2", |
289 | // reg1 must be the same as reg2 |
290 | BPFOperand &Op0 = (BPFOperand &)*Operands[0]; |
291 | BPFOperand &Op1 = (BPFOperand &)*Operands[1]; |
292 | BPFOperand &Op2 = (BPFOperand &)*Operands[2]; |
293 | BPFOperand &Op3 = (BPFOperand &)*Operands[3]; |
294 | if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg() |
295 | && Op1.getToken() == "=" |
296 | && (Op2.getToken() == "-" || Op2.getToken() == "be16" |
297 | || Op2.getToken() == "be32" || Op2.getToken() == "be64" |
298 | || Op2.getToken() == "le16" || Op2.getToken() == "le32" |
299 | || Op2.getToken() == "le64" ) |
300 | && Op0.getReg() != Op3.getReg()) |
301 | return true; |
302 | } |
303 | |
304 | return false; |
305 | } |
306 | |
307 | bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
308 | OperandVector &Operands, |
309 | MCStreamer &Out, uint64_t &ErrorInfo, |
310 | bool MatchingInlineAsm) { |
311 | MCInst Inst; |
312 | SMLoc ErrorLoc; |
313 | |
314 | if (PreMatchCheck(Operands)) |
315 | return Error(L: IDLoc, Msg: "additional inst constraint not met" ); |
316 | |
317 | switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, matchingInlineAsm: MatchingInlineAsm)) { |
318 | default: |
319 | break; |
320 | case Match_Success: |
321 | Inst.setLoc(IDLoc); |
322 | Out.emitInstruction(Inst, STI: getSTI()); |
323 | return false; |
324 | case Match_MissingFeature: |
325 | return Error(L: IDLoc, Msg: "instruction use requires an option to be enabled" ); |
326 | case Match_MnemonicFail: |
327 | return Error(L: IDLoc, Msg: "unrecognized instruction mnemonic" ); |
328 | case Match_InvalidOperand: |
329 | ErrorLoc = IDLoc; |
330 | |
331 | if (ErrorInfo != ~0U) { |
332 | if (ErrorInfo >= Operands.size()) |
333 | return Error(L: ErrorLoc, Msg: "too few operands for instruction" ); |
334 | |
335 | ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc(); |
336 | |
337 | if (ErrorLoc == SMLoc()) |
338 | ErrorLoc = IDLoc; |
339 | } |
340 | |
341 | return Error(L: ErrorLoc, Msg: "invalid operand for instruction" ); |
342 | case Match_InvalidBrTarget: |
343 | return Error(L: Operands[ErrorInfo]->getStartLoc(), |
344 | Msg: "operand is not an identifier or 16-bit signed integer" ); |
345 | case Match_InvalidSImm16: |
346 | return Error(L: Operands[ErrorInfo]->getStartLoc(), |
347 | Msg: "operand is not a 16-bit signed integer" ); |
348 | } |
349 | |
350 | llvm_unreachable("Unknown match type detected!" ); |
351 | } |
352 | |
353 | bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, |
354 | SMLoc &EndLoc) { |
355 | if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess()) |
356 | return Error(L: StartLoc, Msg: "invalid register name" ); |
357 | return false; |
358 | } |
359 | |
360 | ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
361 | SMLoc &EndLoc) { |
362 | const AsmToken &Tok = getParser().getTok(); |
363 | StartLoc = Tok.getLoc(); |
364 | EndLoc = Tok.getEndLoc(); |
365 | Reg = BPF::NoRegister; |
366 | StringRef Name = getLexer().getTok().getIdentifier(); |
367 | |
368 | if (!MatchRegisterName(Name)) { |
369 | getParser().Lex(); // Eat identifier token. |
370 | return ParseStatus::Success; |
371 | } |
372 | |
373 | return ParseStatus::NoMatch; |
374 | } |
375 | |
376 | ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) { |
377 | SMLoc S = getLoc(); |
378 | |
379 | if (getLexer().getKind() == AsmToken::Identifier) { |
380 | StringRef Name = getLexer().getTok().getIdentifier(); |
381 | |
382 | if (BPFOperand::isValidIdInMiddle(Name)) { |
383 | getLexer().Lex(); |
384 | Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S)); |
385 | return ParseStatus::Success; |
386 | } |
387 | |
388 | return ParseStatus::NoMatch; |
389 | } |
390 | |
391 | switch (getLexer().getKind()) { |
392 | case AsmToken::Minus: |
393 | case AsmToken::Plus: { |
394 | if (getLexer().peekTok().is(K: AsmToken::Integer)) |
395 | return ParseStatus::NoMatch; |
396 | [[fallthrough]]; |
397 | } |
398 | |
399 | case AsmToken::Equal: |
400 | case AsmToken::Greater: |
401 | case AsmToken::Less: |
402 | case AsmToken::Pipe: |
403 | case AsmToken::Star: |
404 | case AsmToken::LParen: |
405 | case AsmToken::RParen: |
406 | case AsmToken::LBrac: |
407 | case AsmToken::RBrac: |
408 | case AsmToken::Slash: |
409 | case AsmToken::Amp: |
410 | case AsmToken::Percent: |
411 | case AsmToken::Caret: { |
412 | StringRef Name = getLexer().getTok().getString(); |
413 | getLexer().Lex(); |
414 | Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S)); |
415 | |
416 | return ParseStatus::Success; |
417 | } |
418 | |
419 | case AsmToken::EqualEqual: |
420 | case AsmToken::ExclaimEqual: |
421 | case AsmToken::GreaterEqual: |
422 | case AsmToken::GreaterGreater: |
423 | case AsmToken::LessEqual: |
424 | case AsmToken::LessLess: { |
425 | Operands.push_back(Elt: BPFOperand::createToken( |
426 | Str: getLexer().getTok().getString().substr(Start: 0, N: 1), S)); |
427 | Operands.push_back(Elt: BPFOperand::createToken( |
428 | Str: getLexer().getTok().getString().substr(Start: 1, N: 1), S)); |
429 | getLexer().Lex(); |
430 | |
431 | return ParseStatus::Success; |
432 | } |
433 | |
434 | default: |
435 | break; |
436 | } |
437 | |
438 | return ParseStatus::NoMatch; |
439 | } |
440 | |
441 | ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) { |
442 | SMLoc S = getLoc(); |
443 | SMLoc E = SMLoc::getFromPointer(Ptr: S.getPointer() - 1); |
444 | |
445 | switch (getLexer().getKind()) { |
446 | default: |
447 | return ParseStatus::NoMatch; |
448 | case AsmToken::Identifier: |
449 | StringRef Name = getLexer().getTok().getIdentifier(); |
450 | unsigned RegNo = MatchRegisterName(Name); |
451 | |
452 | if (RegNo == 0) |
453 | return ParseStatus::NoMatch; |
454 | |
455 | getLexer().Lex(); |
456 | Operands.push_back(Elt: BPFOperand::createReg(RegNo, S, E)); |
457 | } |
458 | return ParseStatus::Success; |
459 | } |
460 | |
461 | ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) { |
462 | switch (getLexer().getKind()) { |
463 | default: |
464 | return ParseStatus::NoMatch; |
465 | case AsmToken::LParen: |
466 | case AsmToken::Minus: |
467 | case AsmToken::Plus: |
468 | case AsmToken::Integer: |
469 | case AsmToken::String: |
470 | case AsmToken::Identifier: |
471 | break; |
472 | } |
473 | |
474 | const MCExpr *IdVal; |
475 | SMLoc S = getLoc(); |
476 | |
477 | if (getParser().parseExpression(Res&: IdVal)) |
478 | return ParseStatus::Failure; |
479 | |
480 | SMLoc E = SMLoc::getFromPointer(Ptr: S.getPointer() - 1); |
481 | Operands.push_back(Elt: BPFOperand::createImm(Val: IdVal, S, E)); |
482 | |
483 | return ParseStatus::Success; |
484 | } |
485 | |
486 | /// ParseInstruction - Parse an BPF instruction which is in BPF verifier |
487 | /// format. |
488 | bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, |
489 | SMLoc NameLoc, OperandVector &Operands) { |
490 | // The first operand could be either register or actually an operator. |
491 | unsigned RegNo = MatchRegisterName(Name); |
492 | |
493 | if (RegNo != 0) { |
494 | SMLoc E = SMLoc::getFromPointer(Ptr: NameLoc.getPointer() - 1); |
495 | Operands.push_back(Elt: BPFOperand::createReg(RegNo, S: NameLoc, E)); |
496 | } else if (BPFOperand::isValidIdAtStart (Name)) |
497 | Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S: NameLoc)); |
498 | else |
499 | return Error(L: NameLoc, Msg: "invalid register/token name" ); |
500 | |
501 | while (!getLexer().is(K: AsmToken::EndOfStatement)) { |
502 | // Attempt to parse token as operator |
503 | if (parseOperandAsOperator(Operands).isSuccess()) |
504 | continue; |
505 | |
506 | // Attempt to parse token as register |
507 | if (parseRegister(Operands).isSuccess()) |
508 | continue; |
509 | |
510 | if (getLexer().is(K: AsmToken::Comma)) { |
511 | getLexer().Lex(); |
512 | continue; |
513 | } |
514 | |
515 | // Attempt to parse token as an immediate |
516 | if (!parseImmediate(Operands).isSuccess()) { |
517 | SMLoc Loc = getLexer().getLoc(); |
518 | return Error(L: Loc, Msg: "unexpected token" ); |
519 | } |
520 | } |
521 | |
522 | if (getLexer().isNot(K: AsmToken::EndOfStatement)) { |
523 | SMLoc Loc = getLexer().getLoc(); |
524 | |
525 | getParser().eatToEndOfStatement(); |
526 | |
527 | return Error(L: Loc, Msg: "unexpected token" ); |
528 | } |
529 | |
530 | // Consume the EndOfStatement. |
531 | getParser().Lex(); |
532 | return false; |
533 | } |
534 | |
535 | extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() { |
536 | RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget()); |
537 | RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget()); |
538 | RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget()); |
539 | } |
540 | |