1 | //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "MCTargetDesc/BPFMCAsmInfo.h" |
10 | #include "MCTargetDesc/BPFMCTargetDesc.h" |
11 | #include "TargetInfo/BPFTargetInfo.h" |
12 | #include "llvm/ADT/StringSwitch.h" |
13 | #include "llvm/MC/MCContext.h" |
14 | #include "llvm/MC/MCExpr.h" |
15 | #include "llvm/MC/MCInst.h" |
16 | #include "llvm/MC/MCInstrInfo.h" |
17 | #include "llvm/MC/MCParser/AsmLexer.h" |
18 | #include "llvm/MC/MCParser/MCParsedAsmOperand.h" |
19 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
20 | #include "llvm/MC/MCStreamer.h" |
21 | #include "llvm/MC/MCSubtargetInfo.h" |
22 | #include "llvm/MC/TargetRegistry.h" |
23 | #include "llvm/Support/Casting.h" |
24 | #include "llvm/Support/Compiler.h" |
25 | |
26 | using namespace llvm; |
27 | |
28 | namespace { |
29 | struct BPFOperand; |
30 | |
31 | class BPFAsmParser : public MCTargetAsmParser { |
32 | |
33 | SMLoc getLoc() const { return getParser().getTok().getLoc(); } |
34 | |
35 | bool PreMatchCheck(OperandVector &Operands); |
36 | |
37 | bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
38 | OperandVector &Operands, MCStreamer &Out, |
39 | uint64_t &ErrorInfo, |
40 | bool MatchingInlineAsm) override; |
41 | |
42 | bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override; |
43 | ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
44 | SMLoc &EndLoc) override; |
45 | |
46 | bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, |
47 | SMLoc NameLoc, OperandVector &Operands) override; |
48 | |
49 | // "=" is used as assignment operator for assembly statment, so can't be used |
50 | // for symbol assignment. |
51 | bool equalIsAsmAssignment() override { return false; } |
52 | // "*" is used for dereferencing memory that it will be the start of |
53 | // statement. |
54 | bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override { |
55 | return Token == AsmToken::Star; |
56 | } |
57 | |
58 | #define |
59 | #include "BPFGenAsmMatcher.inc" |
60 | |
61 | ParseStatus parseImmediate(OperandVector &Operands); |
62 | ParseStatus parseRegister(OperandVector &Operands); |
63 | ParseStatus parseOperandAsOperator(OperandVector &Operands); |
64 | |
65 | public: |
66 | enum BPFMatchResultTy { |
67 | Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, |
68 | #define GET_OPERAND_DIAGNOSTIC_TYPES |
69 | #include "BPFGenAsmMatcher.inc" |
70 | #undef GET_OPERAND_DIAGNOSTIC_TYPES |
71 | }; |
72 | |
73 | BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, |
74 | const MCInstrInfo &MII, const MCTargetOptions &Options) |
75 | : MCTargetAsmParser(Options, STI, MII) { |
76 | setAvailableFeatures(ComputeAvailableFeatures(FB: STI.getFeatureBits())); |
77 | } |
78 | }; |
79 | |
80 | /// BPFOperand - Instances of this class represent a parsed machine |
81 | /// instruction |
82 | struct BPFOperand : public MCParsedAsmOperand { |
83 | |
84 | enum KindTy { |
85 | Token, |
86 | Register, |
87 | Immediate, |
88 | } Kind; |
89 | |
90 | struct RegOp { |
91 | MCRegister RegNum; |
92 | }; |
93 | |
94 | struct ImmOp { |
95 | const MCExpr *Val; |
96 | }; |
97 | |
98 | SMLoc StartLoc, EndLoc; |
99 | union { |
100 | StringRef Tok; |
101 | RegOp Reg; |
102 | ImmOp Imm; |
103 | }; |
104 | |
105 | BPFOperand(KindTy K) : Kind(K) {} |
106 | |
107 | public: |
108 | BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() { |
109 | Kind = o.Kind; |
110 | StartLoc = o.StartLoc; |
111 | EndLoc = o.EndLoc; |
112 | |
113 | switch (Kind) { |
114 | case Register: |
115 | Reg = o.Reg; |
116 | break; |
117 | case Immediate: |
118 | Imm = o.Imm; |
119 | break; |
120 | case Token: |
121 | Tok = o.Tok; |
122 | break; |
123 | } |
124 | } |
125 | |
126 | bool isToken() const override { return Kind == Token; } |
127 | bool isReg() const override { return Kind == Register; } |
128 | bool isImm() const override { return Kind == Immediate; } |
129 | bool isMem() const override { return false; } |
130 | |
131 | bool isConstantImm() const { |
132 | return isImm() && isa<MCConstantExpr>(Val: getImm()); |
133 | } |
134 | |
135 | int64_t getConstantImm() const { |
136 | const MCExpr *Val = getImm(); |
137 | return static_cast<const MCConstantExpr *>(Val)->getValue(); |
138 | } |
139 | |
140 | bool isSImm16() const { |
141 | return (isConstantImm() && isInt<16>(x: getConstantImm())); |
142 | } |
143 | |
144 | bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(Val: getImm()); } |
145 | |
146 | bool isBrTarget() const { return isSymbolRef() || isSImm16(); } |
147 | |
148 | /// getStartLoc - Gets location of the first token of this operand |
149 | SMLoc getStartLoc() const override { return StartLoc; } |
150 | /// getEndLoc - Gets location of the last token of this operand |
151 | SMLoc getEndLoc() const override { return EndLoc; } |
152 | |
153 | MCRegister getReg() const override { |
154 | assert(Kind == Register && "Invalid type access!" ); |
155 | return Reg.RegNum; |
156 | } |
157 | |
158 | const MCExpr *getImm() const { |
159 | assert(Kind == Immediate && "Invalid type access!" ); |
160 | return Imm.Val; |
161 | } |
162 | |
163 | StringRef getToken() const { |
164 | assert(Kind == Token && "Invalid type access!" ); |
165 | return Tok; |
166 | } |
167 | |
168 | void print(raw_ostream &OS, const MCAsmInfo &MAI) const override { |
169 | switch (Kind) { |
170 | case Immediate: |
171 | MAI.printExpr(OS, *getImm()); |
172 | break; |
173 | case Register: |
174 | OS << "<register x" ; |
175 | OS << getReg() << ">" ; |
176 | break; |
177 | case Token: |
178 | OS << "'" << getToken() << "'" ; |
179 | break; |
180 | } |
181 | } |
182 | |
183 | void addExpr(MCInst &Inst, const MCExpr *Expr) const { |
184 | assert(Expr && "Expr shouldn't be null!" ); |
185 | |
186 | if (auto *CE = dyn_cast<MCConstantExpr>(Val: Expr)) |
187 | Inst.addOperand(Op: MCOperand::createImm(Val: CE->getValue())); |
188 | else |
189 | Inst.addOperand(Op: MCOperand::createExpr(Val: Expr)); |
190 | } |
191 | |
192 | // Used by the TableGen Code |
193 | void addRegOperands(MCInst &Inst, unsigned N) const { |
194 | assert(N == 1 && "Invalid number of operands!" ); |
195 | Inst.addOperand(Op: MCOperand::createReg(Reg: getReg())); |
196 | } |
197 | |
198 | void addImmOperands(MCInst &Inst, unsigned N) const { |
199 | assert(N == 1 && "Invalid number of operands!" ); |
200 | addExpr(Inst, Expr: getImm()); |
201 | } |
202 | |
203 | static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) { |
204 | auto Op = std::make_unique<BPFOperand>(args: Token); |
205 | Op->Tok = Str; |
206 | Op->StartLoc = S; |
207 | Op->EndLoc = S; |
208 | return Op; |
209 | } |
210 | |
211 | static std::unique_ptr<BPFOperand> createReg(MCRegister Reg, SMLoc S, |
212 | SMLoc E) { |
213 | auto Op = std::make_unique<BPFOperand>(args: Register); |
214 | Op->Reg.RegNum = Reg; |
215 | Op->StartLoc = S; |
216 | Op->EndLoc = E; |
217 | return Op; |
218 | } |
219 | |
220 | static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S, |
221 | SMLoc E) { |
222 | auto Op = std::make_unique<BPFOperand>(args: Immediate); |
223 | Op->Imm.Val = Val; |
224 | Op->StartLoc = S; |
225 | Op->EndLoc = E; |
226 | return Op; |
227 | } |
228 | |
229 | // Identifiers that can be used at the start of a statment. |
230 | static bool isValidIdAtStart(StringRef Name) { |
231 | return StringSwitch<bool>(Name.lower()) |
232 | .Case(S: "if" , Value: true) |
233 | .Case(S: "call" , Value: true) |
234 | .Case(S: "callx" , Value: true) |
235 | .Case(S: "goto" , Value: true) |
236 | .Case(S: "gotol" , Value: true) |
237 | .Case(S: "may_goto" , Value: true) |
238 | .Case(S: "*" , Value: true) |
239 | .Case(S: "exit" , Value: true) |
240 | .Case(S: "lock" , Value: true) |
241 | .Case(S: "ld_pseudo" , Value: true) |
242 | .Case(S: "store_release" , Value: true) |
243 | .Default(Value: false); |
244 | } |
245 | |
246 | // Identifiers that can be used in the middle of a statment. |
247 | static bool isValidIdInMiddle(StringRef Name) { |
248 | return StringSwitch<bool>(Name.lower()) |
249 | .Case(S: "u64" , Value: true) |
250 | .Case(S: "u32" , Value: true) |
251 | .Case(S: "u16" , Value: true) |
252 | .Case(S: "u8" , Value: true) |
253 | .Case(S: "s32" , Value: true) |
254 | .Case(S: "s16" , Value: true) |
255 | .Case(S: "s8" , Value: true) |
256 | .Case(S: "be64" , Value: true) |
257 | .Case(S: "be32" , Value: true) |
258 | .Case(S: "be16" , Value: true) |
259 | .Case(S: "le64" , Value: true) |
260 | .Case(S: "le32" , Value: true) |
261 | .Case(S: "le16" , Value: true) |
262 | .Case(S: "bswap16" , Value: true) |
263 | .Case(S: "bswap32" , Value: true) |
264 | .Case(S: "bswap64" , Value: true) |
265 | .Case(S: "goto" , Value: true) |
266 | .Case(S: "ll" , Value: true) |
267 | .Case(S: "skb" , Value: true) |
268 | .Case(S: "s" , Value: true) |
269 | .Case(S: "atomic_fetch_add" , Value: true) |
270 | .Case(S: "atomic_fetch_and" , Value: true) |
271 | .Case(S: "atomic_fetch_or" , Value: true) |
272 | .Case(S: "atomic_fetch_xor" , Value: true) |
273 | .Case(S: "xchg_64" , Value: true) |
274 | .Case(S: "xchg32_32" , Value: true) |
275 | .Case(S: "cmpxchg_64" , Value: true) |
276 | .Case(S: "cmpxchg32_32" , Value: true) |
277 | .Case(S: "addr_space_cast" , Value: true) |
278 | .Case(S: "load_acquire" , Value: true) |
279 | .Default(Value: false); |
280 | } |
281 | }; |
282 | } // end anonymous namespace. |
283 | |
284 | #define GET_REGISTER_MATCHER |
285 | #define GET_MATCHER_IMPLEMENTATION |
286 | #include "BPFGenAsmMatcher.inc" |
287 | |
288 | bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) { |
289 | |
290 | if (Operands.size() == 4) { |
291 | // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2", |
292 | // reg1 must be the same as reg2 |
293 | BPFOperand &Op0 = (BPFOperand &)*Operands[0]; |
294 | BPFOperand &Op1 = (BPFOperand &)*Operands[1]; |
295 | BPFOperand &Op2 = (BPFOperand &)*Operands[2]; |
296 | BPFOperand &Op3 = (BPFOperand &)*Operands[3]; |
297 | if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg() |
298 | && Op1.getToken() == "=" |
299 | && (Op2.getToken() == "-" || Op2.getToken() == "be16" |
300 | || Op2.getToken() == "be32" || Op2.getToken() == "be64" |
301 | || Op2.getToken() == "le16" || Op2.getToken() == "le32" |
302 | || Op2.getToken() == "le64" ) |
303 | && Op0.getReg() != Op3.getReg()) |
304 | return true; |
305 | } |
306 | |
307 | return false; |
308 | } |
309 | |
310 | bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
311 | OperandVector &Operands, |
312 | MCStreamer &Out, uint64_t &ErrorInfo, |
313 | bool MatchingInlineAsm) { |
314 | MCInst Inst; |
315 | SMLoc ErrorLoc; |
316 | |
317 | if (PreMatchCheck(Operands)) |
318 | return Error(L: IDLoc, Msg: "additional inst constraint not met" ); |
319 | |
320 | switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, matchingInlineAsm: MatchingInlineAsm)) { |
321 | default: |
322 | break; |
323 | case Match_Success: |
324 | Inst.setLoc(IDLoc); |
325 | Out.emitInstruction(Inst, STI: getSTI()); |
326 | return false; |
327 | case Match_MissingFeature: |
328 | return Error(L: IDLoc, Msg: "instruction use requires an option to be enabled" ); |
329 | case Match_MnemonicFail: |
330 | return Error(L: IDLoc, Msg: "unrecognized instruction mnemonic" ); |
331 | case Match_InvalidOperand: |
332 | ErrorLoc = IDLoc; |
333 | |
334 | if (ErrorInfo != ~0U) { |
335 | if (ErrorInfo >= Operands.size()) |
336 | return Error(L: ErrorLoc, Msg: "too few operands for instruction" ); |
337 | |
338 | ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc(); |
339 | |
340 | if (ErrorLoc == SMLoc()) |
341 | ErrorLoc = IDLoc; |
342 | } |
343 | |
344 | return Error(L: ErrorLoc, Msg: "invalid operand for instruction" ); |
345 | case Match_InvalidBrTarget: |
346 | return Error(L: Operands[ErrorInfo]->getStartLoc(), |
347 | Msg: "operand is not an identifier or 16-bit signed integer" ); |
348 | case Match_InvalidSImm16: |
349 | return Error(L: Operands[ErrorInfo]->getStartLoc(), |
350 | Msg: "operand is not a 16-bit signed integer" ); |
351 | case Match_InvalidTiedOperand: |
352 | return Error(L: Operands[ErrorInfo]->getStartLoc(), |
353 | Msg: "operand is not the same as the dst register" ); |
354 | } |
355 | |
356 | llvm_unreachable("Unknown match type detected!" ); |
357 | } |
358 | |
359 | bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, |
360 | SMLoc &EndLoc) { |
361 | if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess()) |
362 | return Error(L: StartLoc, Msg: "invalid register name" ); |
363 | return false; |
364 | } |
365 | |
366 | ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
367 | SMLoc &EndLoc) { |
368 | const AsmToken &Tok = getParser().getTok(); |
369 | StartLoc = Tok.getLoc(); |
370 | EndLoc = Tok.getEndLoc(); |
371 | Reg = BPF::NoRegister; |
372 | StringRef Name = getLexer().getTok().getIdentifier(); |
373 | |
374 | if (!MatchRegisterName(Name)) { |
375 | getParser().Lex(); // Eat identifier token. |
376 | return ParseStatus::Success; |
377 | } |
378 | |
379 | return ParseStatus::NoMatch; |
380 | } |
381 | |
382 | ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) { |
383 | SMLoc S = getLoc(); |
384 | |
385 | if (getLexer().getKind() == AsmToken::Identifier) { |
386 | StringRef Name = getLexer().getTok().getIdentifier(); |
387 | |
388 | if (BPFOperand::isValidIdInMiddle(Name)) { |
389 | getLexer().Lex(); |
390 | Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S)); |
391 | return ParseStatus::Success; |
392 | } |
393 | |
394 | return ParseStatus::NoMatch; |
395 | } |
396 | |
397 | switch (getLexer().getKind()) { |
398 | case AsmToken::Minus: |
399 | case AsmToken::Plus: { |
400 | if (getLexer().peekTok().is(K: AsmToken::Integer)) |
401 | return ParseStatus::NoMatch; |
402 | [[fallthrough]]; |
403 | } |
404 | |
405 | case AsmToken::Equal: |
406 | case AsmToken::Greater: |
407 | case AsmToken::Less: |
408 | case AsmToken::Pipe: |
409 | case AsmToken::Star: |
410 | case AsmToken::LParen: |
411 | case AsmToken::RParen: |
412 | case AsmToken::LBrac: |
413 | case AsmToken::RBrac: |
414 | case AsmToken::Slash: |
415 | case AsmToken::Amp: |
416 | case AsmToken::Percent: |
417 | case AsmToken::Caret: { |
418 | StringRef Name = getLexer().getTok().getString(); |
419 | getLexer().Lex(); |
420 | Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S)); |
421 | |
422 | return ParseStatus::Success; |
423 | } |
424 | |
425 | case AsmToken::EqualEqual: |
426 | case AsmToken::ExclaimEqual: |
427 | case AsmToken::GreaterEqual: |
428 | case AsmToken::GreaterGreater: |
429 | case AsmToken::LessEqual: |
430 | case AsmToken::LessLess: { |
431 | Operands.push_back(Elt: BPFOperand::createToken( |
432 | Str: getLexer().getTok().getString().substr(Start: 0, N: 1), S)); |
433 | Operands.push_back(Elt: BPFOperand::createToken( |
434 | Str: getLexer().getTok().getString().substr(Start: 1, N: 1), S)); |
435 | getLexer().Lex(); |
436 | |
437 | return ParseStatus::Success; |
438 | } |
439 | |
440 | default: |
441 | break; |
442 | } |
443 | |
444 | return ParseStatus::NoMatch; |
445 | } |
446 | |
447 | ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) { |
448 | SMLoc S = getLoc(); |
449 | SMLoc E = SMLoc::getFromPointer(Ptr: S.getPointer() - 1); |
450 | |
451 | switch (getLexer().getKind()) { |
452 | default: |
453 | return ParseStatus::NoMatch; |
454 | case AsmToken::Identifier: |
455 | StringRef Name = getLexer().getTok().getIdentifier(); |
456 | MCRegister Reg = MatchRegisterName(Name); |
457 | |
458 | if (!Reg) |
459 | return ParseStatus::NoMatch; |
460 | |
461 | getLexer().Lex(); |
462 | Operands.push_back(Elt: BPFOperand::createReg(Reg, S, E)); |
463 | } |
464 | return ParseStatus::Success; |
465 | } |
466 | |
467 | ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) { |
468 | switch (getLexer().getKind()) { |
469 | default: |
470 | return ParseStatus::NoMatch; |
471 | case AsmToken::LParen: |
472 | case AsmToken::Minus: |
473 | case AsmToken::Plus: |
474 | case AsmToken::Integer: |
475 | case AsmToken::String: |
476 | case AsmToken::Identifier: |
477 | break; |
478 | } |
479 | |
480 | const MCExpr *IdVal; |
481 | SMLoc S = getLoc(); |
482 | |
483 | if (getParser().parseExpression(Res&: IdVal)) |
484 | return ParseStatus::Failure; |
485 | |
486 | SMLoc E = SMLoc::getFromPointer(Ptr: S.getPointer() - 1); |
487 | Operands.push_back(Elt: BPFOperand::createImm(Val: IdVal, S, E)); |
488 | |
489 | return ParseStatus::Success; |
490 | } |
491 | |
492 | /// Parse an BPF instruction which is in BPF verifier format. |
493 | bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, |
494 | SMLoc NameLoc, OperandVector &Operands) { |
495 | // The first operand could be either register or actually an operator. |
496 | MCRegister Reg = MatchRegisterName(Name); |
497 | |
498 | if (Reg) { |
499 | SMLoc E = SMLoc::getFromPointer(Ptr: NameLoc.getPointer() - 1); |
500 | Operands.push_back(Elt: BPFOperand::createReg(Reg, S: NameLoc, E)); |
501 | } else if (BPFOperand::isValidIdAtStart(Name)) |
502 | Operands.push_back(Elt: BPFOperand::createToken(Str: Name, S: NameLoc)); |
503 | else |
504 | return Error(L: NameLoc, Msg: "invalid register/token name" ); |
505 | |
506 | while (!getLexer().is(K: AsmToken::EndOfStatement)) { |
507 | // Attempt to parse token as operator |
508 | if (parseOperandAsOperator(Operands).isSuccess()) |
509 | continue; |
510 | |
511 | // Attempt to parse token as register |
512 | if (parseRegister(Operands).isSuccess()) |
513 | continue; |
514 | |
515 | if (getLexer().is(K: AsmToken::Comma)) { |
516 | getLexer().Lex(); |
517 | continue; |
518 | } |
519 | |
520 | // Attempt to parse token as an immediate |
521 | if (!parseImmediate(Operands).isSuccess()) { |
522 | SMLoc Loc = getLexer().getLoc(); |
523 | return Error(L: Loc, Msg: "unexpected token" ); |
524 | } |
525 | } |
526 | |
527 | if (getLexer().isNot(K: AsmToken::EndOfStatement)) { |
528 | SMLoc Loc = getLexer().getLoc(); |
529 | |
530 | getParser().eatToEndOfStatement(); |
531 | |
532 | return Error(L: Loc, Msg: "unexpected token" ); |
533 | } |
534 | |
535 | // Consume the EndOfStatement. |
536 | getParser().Lex(); |
537 | return false; |
538 | } |
539 | |
540 | extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() { |
541 | RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget()); |
542 | RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget()); |
543 | RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget()); |
544 | } |
545 | |