1//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Windows-specific.
10// A parser for the module-definition file (.def file).
11//
12// The format of module-definition files are described in this document:
13// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Object/COFFModuleDefinition.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/Object/COFFImportFile.h"
21#include "llvm/Object/Error.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/Path.h"
24
25using namespace llvm::COFF;
26using namespace llvm;
27
28namespace llvm {
29namespace object {
30
31enum Kind {
32 Unknown,
33 Eof,
34 Identifier,
35 Comma,
36 Equal,
37 EqualEqual,
38 KwBase,
39 KwConstant,
40 KwData,
41 KwExports,
42 KwExportAs,
43 KwHeapsize,
44 KwLibrary,
45 KwName,
46 KwNoname,
47 KwPrivate,
48 KwStacksize,
49 KwVersion,
50};
51
52struct Token {
53 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
54 Kind K;
55 StringRef Value;
56};
57
58static bool isDecorated(StringRef Sym, bool MingwDef) {
59 // In def files, the symbols can either be listed decorated or undecorated.
60 //
61 // - For cdecl symbols, only the undecorated form is allowed.
62 // - For fastcall and vectorcall symbols, both fully decorated or
63 // undecorated forms can be present.
64 // - For stdcall symbols in non-MinGW environments, the decorated form is
65 // fully decorated with leading underscore and trailing stack argument
66 // size - like "_Func@0".
67 // - In MinGW def files, a decorated stdcall symbol does not include the
68 // leading underscore though, like "Func@0".
69
70 // This function controls whether a leading underscore should be added to
71 // the given symbol name or not. For MinGW, treat a stdcall symbol name such
72 // as "Func@0" as undecorated, i.e. a leading underscore must be added.
73 // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
74 // as decorated, i.e. don't add any more leading underscores.
75 // We can't check for a leading underscore here, since function names
76 // themselves can start with an underscore, while a second one still needs
77 // to be added.
78 return Sym.starts_with(Prefix: "@") || Sym.contains(Other: "@@") || Sym.starts_with(Prefix: "?") ||
79 (!MingwDef && Sym.contains(C: '@'));
80}
81
82class Lexer {
83public:
84 Lexer(StringRef S) : Buf(S) {}
85
86 Token lex() {
87 Buf = Buf.trim();
88 if (Buf.empty())
89 return Token(Eof);
90
91 switch (Buf[0]) {
92 case '\0':
93 return Token(Eof);
94 case ';': {
95 size_t End = Buf.find(C: '\n');
96 Buf = (End == Buf.npos) ? "" : Buf.drop_front(N: End);
97 return lex();
98 }
99 case '=':
100 Buf = Buf.drop_front();
101 if (Buf.consume_front(Prefix: "="))
102 return Token(EqualEqual, "==");
103 return Token(Equal, "=");
104 case ',':
105 Buf = Buf.drop_front();
106 return Token(Comma, ",");
107 case '"': {
108 StringRef S;
109 std::tie(args&: S, args&: Buf) = Buf.substr(Start: 1).split(Separator: '"');
110 return Token(Identifier, S);
111 }
112 default: {
113 size_t End = Buf.find_first_of(Chars: "=,;\r\n \t\v");
114 StringRef Word = Buf.substr(Start: 0, N: End);
115 Kind K = llvm::StringSwitch<Kind>(Word)
116 .Case(S: "BASE", Value: KwBase)
117 .Case(S: "CONSTANT", Value: KwConstant)
118 .Case(S: "DATA", Value: KwData)
119 .Case(S: "EXPORTS", Value: KwExports)
120 .Case(S: "EXPORTAS", Value: KwExportAs)
121 .Case(S: "HEAPSIZE", Value: KwHeapsize)
122 .Case(S: "LIBRARY", Value: KwLibrary)
123 .Case(S: "NAME", Value: KwName)
124 .Case(S: "NONAME", Value: KwNoname)
125 .Case(S: "PRIVATE", Value: KwPrivate)
126 .Case(S: "STACKSIZE", Value: KwStacksize)
127 .Case(S: "VERSION", Value: KwVersion)
128 .Default(Value: Identifier);
129 Buf = (End == Buf.npos) ? "" : Buf.drop_front(N: End);
130 return Token(K, Word);
131 }
132 }
133 }
134
135private:
136 StringRef Buf;
137};
138
139class Parser {
140public:
141 explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
142 : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
143 if (Machine != IMAGE_FILE_MACHINE_I386)
144 AddUnderscores = false;
145 }
146
147 Expected<COFFModuleDefinition> parse() {
148 do {
149 if (Error Err = parseOne())
150 return std::move(Err);
151 } while (Tok.K != Eof);
152 return Info;
153 }
154
155private:
156 void read() {
157 if (Stack.empty()) {
158 Tok = Lex.lex();
159 return;
160 }
161 Tok = Stack.back();
162 Stack.pop_back();
163 }
164
165 Error readAsInt(uint64_t *I) {
166 read();
167 if (Tok.K != Identifier || Tok.Value.getAsInteger(Radix: 10, Result&: *I))
168 return createError(Err: "integer expected");
169 return Error::success();
170 }
171
172 Error expect(Kind Expected, StringRef Msg) {
173 read();
174 if (Tok.K != Expected)
175 return createError(Err: Msg);
176 return Error::success();
177 }
178
179 void unget() { Stack.push_back(x: Tok); }
180
181 Error parseOne() {
182 read();
183 switch (Tok.K) {
184 case Eof:
185 return Error::success();
186 case KwExports:
187 for (;;) {
188 read();
189 if (Tok.K != Identifier) {
190 unget();
191 return Error::success();
192 }
193 if (Error Err = parseExport())
194 return Err;
195 }
196 case KwHeapsize:
197 return parseNumbers(Reserve: &Info.HeapReserve, Commit: &Info.HeapCommit);
198 case KwStacksize:
199 return parseNumbers(Reserve: &Info.StackReserve, Commit: &Info.StackCommit);
200 case KwLibrary:
201 case KwName: {
202 bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203 std::string Name;
204 if (Error Err = parseName(Out: &Name, Baseaddr: &Info.ImageBase))
205 return Err;
206
207 Info.ImportName = Name;
208
209 // Set the output file, but don't override /out if it was already passed.
210 if (Info.OutputFile.empty()) {
211 Info.OutputFile = Name;
212 // Append the appropriate file extension if not already present.
213 if (!sys::path::has_extension(path: Name))
214 Info.OutputFile += IsDll ? ".dll" : ".exe";
215 }
216
217 return Error::success();
218 }
219 case KwVersion:
220 return parseVersion(Major: &Info.MajorImageVersion, Minor: &Info.MinorImageVersion);
221 default:
222 return createError(Err: "unknown directive: " + Tok.Value);
223 }
224 }
225
226 Error parseExport() {
227 COFFShortExport E;
228 E.Name = std::string(Tok.Value);
229 read();
230 if (Tok.K == Equal) {
231 read();
232 if (Tok.K != Identifier)
233 return createError(Err: "identifier expected, but got " + Tok.Value);
234 E.ExtName = E.Name;
235 E.Name = std::string(Tok.Value);
236 } else {
237 unget();
238 }
239
240 if (AddUnderscores) {
241 if (!isDecorated(Sym: E.Name, MingwDef))
242 E.Name = (std::string("_").append(str: E.Name));
243 if (!E.ExtName.empty() && !isDecorated(Sym: E.ExtName, MingwDef))
244 E.ExtName = (std::string("_").append(str: E.ExtName));
245 }
246
247 for (;;) {
248 read();
249 if (Tok.K == Identifier && Tok.Value[0] == '@') {
250 if (Tok.Value == "@") {
251 // "foo @ 10"
252 read();
253 Tok.Value.getAsInteger(Radix: 10, Result&: E.Ordinal);
254 } else if (Tok.Value.drop_front().getAsInteger(Radix: 10, Result&: E.Ordinal)) {
255 // "foo \n @bar" - Not an ordinal modifier at all, but the next
256 // export (fastcall decorated) - complete the current one.
257 unget();
258 Info.Exports.push_back(x: E);
259 return Error::success();
260 }
261 // "foo @10"
262 read();
263 if (Tok.K == KwNoname) {
264 E.Noname = true;
265 } else {
266 unget();
267 }
268 continue;
269 }
270 if (Tok.K == KwData) {
271 E.Data = true;
272 continue;
273 }
274 if (Tok.K == KwConstant) {
275 E.Constant = true;
276 continue;
277 }
278 if (Tok.K == KwPrivate) {
279 E.Private = true;
280 continue;
281 }
282 if (Tok.K == EqualEqual) {
283 read();
284 E.ImportName = std::string(Tok.Value);
285 continue;
286 }
287 // EXPORTAS must be at the end of export definition
288 if (Tok.K == KwExportAs) {
289 read();
290 if (Tok.K == Eof)
291 return createError(
292 Err: "unexpected end of file, EXPORTAS identifier expected");
293 E.ExportAs = std::string(Tok.Value);
294 } else {
295 unget();
296 }
297 Info.Exports.push_back(x: E);
298 return Error::success();
299 }
300 }
301
302 // HEAPSIZE/STACKSIZE reserve[,commit]
303 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
304 if (Error Err = readAsInt(I: Reserve))
305 return Err;
306 read();
307 if (Tok.K != Comma) {
308 unget();
309 Commit = nullptr;
310 return Error::success();
311 }
312 if (Error Err = readAsInt(I: Commit))
313 return Err;
314 return Error::success();
315 }
316
317 // NAME outputPath [BASE=address]
318 Error parseName(std::string *Out, uint64_t *Baseaddr) {
319 read();
320 if (Tok.K == Identifier) {
321 *Out = std::string(Tok.Value);
322 } else {
323 *Out = "";
324 unget();
325 return Error::success();
326 }
327 read();
328 if (Tok.K == KwBase) {
329 if (Error Err = expect(Expected: Equal, Msg: "'=' expected"))
330 return Err;
331 if (Error Err = readAsInt(I: Baseaddr))
332 return Err;
333 } else {
334 unget();
335 *Baseaddr = 0;
336 }
337 return Error::success();
338 }
339
340 // VERSION major[.minor]
341 Error parseVersion(uint32_t *Major, uint32_t *Minor) {
342 read();
343 if (Tok.K != Identifier)
344 return createError(Err: "identifier expected, but got " + Tok.Value);
345 StringRef V1, V2;
346 std::tie(args&: V1, args&: V2) = Tok.Value.split(Separator: '.');
347 if (V1.getAsInteger(Radix: 10, Result&: *Major))
348 return createError(Err: "integer expected, but got " + Tok.Value);
349 if (V2.empty())
350 *Minor = 0;
351 else if (V2.getAsInteger(Radix: 10, Result&: *Minor))
352 return createError(Err: "integer expected, but got " + Tok.Value);
353 return Error::success();
354 }
355
356 Lexer Lex;
357 Token Tok;
358 std::vector<Token> Stack;
359 MachineTypes Machine;
360 COFFModuleDefinition Info;
361 bool MingwDef;
362 bool AddUnderscores;
363};
364
365Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
366 MachineTypes Machine,
367 bool MingwDef,
368 bool AddUnderscores) {
369 return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
370}
371
372} // namespace object
373} // namespace llvm
374