1//===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// --
8//
9// Note, this is for wasm, the binary format (analogous to ELF), not wasm,
10// the instruction set (analogous to x86), for which parsing code lives in
11// WebAssemblyAsmParser.
12//
13// This file contains processing for generic directives implemented using
14// MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in
15// WebAssemblyAsmParser.
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/BinaryFormat/Wasm.h"
21#include "llvm/MC/MCContext.h"
22#include "llvm/MC/MCObjectFileInfo.h"
23#include "llvm/MC/MCParser/MCAsmLexer.h"
24#include "llvm/MC/MCParser/MCAsmParser.h"
25#include "llvm/MC/MCParser/MCAsmParserExtension.h"
26#include "llvm/MC/MCSectionWasm.h"
27#include "llvm/MC/MCStreamer.h"
28#include "llvm/MC/MCSymbolWasm.h"
29#include "llvm/Support/Casting.h"
30#include <optional>
31
32using namespace llvm;
33
34namespace {
35
36class WasmAsmParser : public MCAsmParserExtension {
37 MCAsmParser *Parser = nullptr;
38 MCAsmLexer *Lexer = nullptr;
39
40 template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
41 void addDirectiveHandler(StringRef Directive) {
42 MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
43 this, HandleDirective<WasmAsmParser, HandlerMethod>);
44
45 getParser().addDirectiveHandler(Directive, Handler);
46 }
47
48public:
49 WasmAsmParser() { BracketExpressionsSupported = true; }
50
51 void Initialize(MCAsmParser &P) override {
52 Parser = &P;
53 Lexer = &Parser->getLexer();
54 // Call the base implementation.
55 this->MCAsmParserExtension::Initialize(Parser&: *Parser);
56
57 addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(Directive: ".text");
58 addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveData>(Directive: ".data");
59 addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(Directive: ".section");
60 addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(Directive: ".size");
61 addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(Directive: ".type");
62 addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(Directive: ".ident");
63 addDirectiveHandler<
64 &WasmAsmParser::ParseDirectiveSymbolAttribute>(Directive: ".weak");
65 addDirectiveHandler<
66 &WasmAsmParser::ParseDirectiveSymbolAttribute>(Directive: ".local");
67 addDirectiveHandler<
68 &WasmAsmParser::ParseDirectiveSymbolAttribute>(Directive: ".internal");
69 addDirectiveHandler<
70 &WasmAsmParser::ParseDirectiveSymbolAttribute>(Directive: ".hidden");
71 }
72
73 bool error(const StringRef &Msg, const AsmToken &Tok) {
74 return Parser->Error(L: Tok.getLoc(), Msg: Msg + Tok.getString());
75 }
76
77 bool isNext(AsmToken::TokenKind Kind) {
78 auto Ok = Lexer->is(K: Kind);
79 if (Ok)
80 Lex();
81 return Ok;
82 }
83
84 bool expect(AsmToken::TokenKind Kind, const char *KindName) {
85 if (!isNext(Kind))
86 return error(Msg: std::string("Expected ") + KindName + ", instead got: ",
87 Tok: Lexer->getTok());
88 return false;
89 }
90
91 bool parseSectionDirectiveText(StringRef, SMLoc) {
92 // FIXME: .text currently no-op.
93 return false;
94 }
95
96 bool parseSectionDirectiveData(StringRef, SMLoc) {
97 auto *S = getContext().getObjectFileInfo()->getDataSection();
98 getStreamer().switchSection(Section: S);
99 return false;
100 }
101
102 uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
103 uint32_t flags = 0;
104 for (char C : FlagStr) {
105 switch (C) {
106 case 'p':
107 Passive = true;
108 break;
109 case 'G':
110 Group = true;
111 break;
112 case 'T':
113 flags |= wasm::WASM_SEG_FLAG_TLS;
114 break;
115 case 'S':
116 flags |= wasm::WASM_SEG_FLAG_STRINGS;
117 break;
118 case 'R':
119 flags |= wasm::WASM_SEG_FLAG_RETAIN;
120 break;
121 default:
122 return -1U;
123 }
124 }
125 return flags;
126 }
127
128 bool parseGroup(StringRef &GroupName) {
129 if (Lexer->isNot(K: AsmToken::Comma))
130 return TokError(Msg: "expected group name");
131 Lex();
132 if (Lexer->is(K: AsmToken::Integer)) {
133 GroupName = getTok().getString();
134 Lex();
135 } else if (Parser->parseIdentifier(Res&: GroupName)) {
136 return TokError(Msg: "invalid group name");
137 }
138 if (Lexer->is(K: AsmToken::Comma)) {
139 Lex();
140 StringRef Linkage;
141 if (Parser->parseIdentifier(Res&: Linkage))
142 return TokError(Msg: "invalid linkage");
143 if (Linkage != "comdat")
144 return TokError(Msg: "Linkage must be 'comdat'");
145 }
146 return false;
147 }
148
149 bool parseSectionDirective(StringRef, SMLoc loc) {
150 StringRef Name;
151 if (Parser->parseIdentifier(Res&: Name))
152 return TokError(Msg: "expected identifier in directive");
153
154 if (expect(Kind: AsmToken::Comma, KindName: ","))
155 return true;
156
157 if (Lexer->isNot(K: AsmToken::String))
158 return error(Msg: "expected string in directive, instead got: ", Tok: Lexer->getTok());
159
160 auto Kind = StringSwitch<std::optional<SectionKind>>(Name)
161 .StartsWith(S: ".data", Value: SectionKind::getData())
162 .StartsWith(S: ".tdata", Value: SectionKind::getThreadData())
163 .StartsWith(S: ".tbss", Value: SectionKind::getThreadBSS())
164 .StartsWith(S: ".rodata", Value: SectionKind::getReadOnly())
165 .StartsWith(S: ".text", Value: SectionKind::getText())
166 .StartsWith(S: ".custom_section", Value: SectionKind::getMetadata())
167 .StartsWith(S: ".bss", Value: SectionKind::getBSS())
168 // See use of .init_array in WasmObjectWriter and
169 // TargetLoweringObjectFileWasm
170 .StartsWith(S: ".init_array", Value: SectionKind::getData())
171 .StartsWith(S: ".debug_", Value: SectionKind::getMetadata())
172 .Default(Value: SectionKind::getData());
173
174 // Update section flags if present in this .section directive
175 bool Passive = false;
176 bool Group = false;
177 uint32_t Flags =
178 parseSectionFlags(FlagStr: getTok().getStringContents(), Passive, Group);
179 if (Flags == -1U)
180 return TokError(Msg: "unknown flag");
181
182 Lex();
183
184 if (expect(Kind: AsmToken::Comma, KindName: ",") || expect(Kind: AsmToken::At, KindName: "@"))
185 return true;
186
187 StringRef GroupName;
188 if (Group && parseGroup(GroupName))
189 return true;
190
191 if (expect(Kind: AsmToken::EndOfStatement, KindName: "eol"))
192 return true;
193
194 // TODO: Parse UniqueID
195 MCSectionWasm *WS = getContext().getWasmSection(
196 Section: Name, K: *Kind, Flags, Group: GroupName, UniqueID: MCContext::GenericSectionID);
197
198 if (WS->getSegmentFlags() != Flags)
199 Parser->Error(L: loc, Msg: "changed section flags for " + Name +
200 ", expected: 0x" +
201 utohexstr(X: WS->getSegmentFlags()));
202
203 if (Passive) {
204 if (!WS->isWasmData())
205 return Parser->Error(L: loc, Msg: "Only data sections can be passive");
206 WS->setPassive();
207 }
208
209 getStreamer().switchSection(Section: WS);
210 return false;
211 }
212
213 // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
214 // so maybe could be shared somehow.
215 bool parseDirectiveSize(StringRef, SMLoc Loc) {
216 StringRef Name;
217 if (Parser->parseIdentifier(Res&: Name))
218 return TokError(Msg: "expected identifier in directive");
219 auto Sym = getContext().getOrCreateSymbol(Name);
220 if (expect(Kind: AsmToken::Comma, KindName: ","))
221 return true;
222 const MCExpr *Expr;
223 if (Parser->parseExpression(Res&: Expr))
224 return true;
225 if (expect(Kind: AsmToken::EndOfStatement, KindName: "eol"))
226 return true;
227 auto WasmSym = cast<MCSymbolWasm>(Val: Sym);
228 if (WasmSym->isFunction()) {
229 // Ignore .size directives for function symbols. They get their size
230 // set automatically based on their content.
231 Warning(L: Loc, Msg: ".size directive ignored for function symbols");
232 } else {
233 getStreamer().emitELFSize(Symbol: Sym, Value: Expr);
234 }
235 return false;
236 }
237
238 bool parseDirectiveType(StringRef, SMLoc) {
239 // This could be the start of a function, check if followed by
240 // "label,@function"
241 if (!Lexer->is(K: AsmToken::Identifier))
242 return error(Msg: "Expected label after .type directive, got: ",
243 Tok: Lexer->getTok());
244 auto WasmSym = cast<MCSymbolWasm>(
245 Val: getStreamer().getContext().getOrCreateSymbol(
246 Name: Lexer->getTok().getString()));
247 Lex();
248 if (!(isNext(Kind: AsmToken::Comma) && isNext(Kind: AsmToken::At) &&
249 Lexer->is(K: AsmToken::Identifier)))
250 return error(Msg: "Expected label,@type declaration, got: ", Tok: Lexer->getTok());
251 auto TypeName = Lexer->getTok().getString();
252 if (TypeName == "function") {
253 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
254 auto *Current =
255 cast<MCSectionWasm>(Val: getStreamer().getCurrentSectionOnly());
256 if (Current->getGroup())
257 WasmSym->setComdat(true);
258 } else if (TypeName == "global")
259 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
260 else if (TypeName == "object")
261 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
262 else
263 return error(Msg: "Unknown WASM symbol type: ", Tok: Lexer->getTok());
264 Lex();
265 return expect(Kind: AsmToken::EndOfStatement, KindName: "EOL");
266 }
267
268 // FIXME: Shared with ELF.
269 /// ParseDirectiveIdent
270 /// ::= .ident string
271 bool ParseDirectiveIdent(StringRef, SMLoc) {
272 if (getLexer().isNot(K: AsmToken::String))
273 return TokError(Msg: "unexpected token in '.ident' directive");
274 StringRef Data = getTok().getIdentifier();
275 Lex();
276 if (getLexer().isNot(K: AsmToken::EndOfStatement))
277 return TokError(Msg: "unexpected token in '.ident' directive");
278 Lex();
279 getStreamer().emitIdent(IdentString: Data);
280 return false;
281 }
282
283 // FIXME: Shared with ELF.
284 /// ParseDirectiveSymbolAttribute
285 /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
286 bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
287 MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
288 .Case(S: ".weak", Value: MCSA_Weak)
289 .Case(S: ".local", Value: MCSA_Local)
290 .Case(S: ".hidden", Value: MCSA_Hidden)
291 .Case(S: ".internal", Value: MCSA_Internal)
292 .Case(S: ".protected", Value: MCSA_Protected)
293 .Default(Value: MCSA_Invalid);
294 assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
295 if (getLexer().isNot(K: AsmToken::EndOfStatement)) {
296 while (true) {
297 StringRef Name;
298 if (getParser().parseIdentifier(Res&: Name))
299 return TokError(Msg: "expected identifier in directive");
300 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
301 getStreamer().emitSymbolAttribute(Symbol: Sym, Attribute: Attr);
302 if (getLexer().is(K: AsmToken::EndOfStatement))
303 break;
304 if (getLexer().isNot(K: AsmToken::Comma))
305 return TokError(Msg: "unexpected token in directive");
306 Lex();
307 }
308 }
309 Lex();
310 return false;
311 }
312};
313
314} // end anonymous namespace
315
316namespace llvm {
317
318MCAsmParserExtension *createWasmAsmParser() {
319 return new WasmAsmParser;
320}
321
322} // end namespace llvm
323