1//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Windows-specific.
10// A parser for the module-definition file (.def file).
11//
12// The format of module-definition files are described in this document:
13// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Object/COFFModuleDefinition.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/Object/COFFImportFile.h"
21#include "llvm/Object/Error.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/Path.h"
24
25using namespace llvm::COFF;
26using namespace llvm;
27
28namespace llvm {
29namespace object {
30
31enum Kind {
32 Unknown,
33 Eof,
34 Identifier,
35 Comma,
36 Equal,
37 EqualEqual,
38 KwBase,
39 KwConstant,
40 KwData,
41 KwExports,
42 KwExportAs,
43 KwHeapsize,
44 KwLibrary,
45 KwName,
46 KwNoname,
47 KwPrivate,
48 KwStacksize,
49 KwVersion,
50};
51
52struct Token {
53 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
54 Kind K;
55 StringRef Value;
56};
57
58static bool isDecorated(StringRef Sym, bool MingwDef) {
59 // In def files, the symbols can either be listed decorated or undecorated.
60 //
61 // - For cdecl symbols, only the undecorated form is allowed.
62 // - For fastcall and vectorcall symbols, both fully decorated or
63 // undecorated forms can be present.
64 // - For stdcall symbols in non-MinGW environments, the decorated form is
65 // fully decorated with leading underscore and trailing stack argument
66 // size - like "_Func@0".
67 // - In MinGW def files, a decorated stdcall symbol does not include the
68 // leading underscore though, like "Func@0".
69
70 // This function controls whether a leading underscore should be added to
71 // the given symbol name or not. For MinGW, treat a stdcall symbol name such
72 // as "Func@0" as undecorated, i.e. a leading underscore must be added.
73 // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
74 // as decorated, i.e. don't add any more leading underscores.
75 // We can't check for a leading underscore here, since function names
76 // themselves can start with an underscore, while a second one still needs
77 // to be added.
78 return Sym.starts_with(Prefix: "@") || Sym.contains(Other: "@@") || Sym.starts_with(Prefix: "?") ||
79 (!MingwDef && Sym.contains(C: '@'));
80}
81
82class Lexer {
83public:
84 Lexer(StringRef S) : Buf(S) {}
85
86 Token lex() {
87 Buf = Buf.trim();
88 if (Buf.empty())
89 return Token(Eof);
90
91 switch (Buf[0]) {
92 case '\0':
93 return Token(Eof);
94 case ';': {
95 size_t End = Buf.find(C: '\n');
96 Buf = (End == Buf.npos) ? "" : Buf.drop_front(N: End);
97 return lex();
98 }
99 case '=':
100 Buf = Buf.drop_front();
101 if (Buf.consume_front(Prefix: "="))
102 return Token(EqualEqual, "==");
103 return Token(Equal, "=");
104 case ',':
105 Buf = Buf.drop_front();
106 return Token(Comma, ",");
107 case '"': {
108 StringRef S;
109 std::tie(args&: S, args&: Buf) = Buf.substr(Start: 1).split(Separator: '"');
110 return Token(Identifier, S);
111 }
112 default: {
113 size_t End = Buf.find_first_of(Chars: "=,;\r\n \t\v");
114 StringRef Word = Buf.substr(Start: 0, N: End);
115 Kind K = llvm::StringSwitch<Kind>(Word)
116 .Case(S: "BASE", Value: KwBase)
117 .Case(S: "CONSTANT", Value: KwConstant)
118 .Case(S: "DATA", Value: KwData)
119 .Case(S: "EXPORTS", Value: KwExports)
120 .Case(S: "EXPORTAS", Value: KwExportAs)
121 .Case(S: "HEAPSIZE", Value: KwHeapsize)
122 .Case(S: "LIBRARY", Value: KwLibrary)
123 .Case(S: "NAME", Value: KwName)
124 .Case(S: "NONAME", Value: KwNoname)
125 .Case(S: "PRIVATE", Value: KwPrivate)
126 .Case(S: "STACKSIZE", Value: KwStacksize)
127 .Case(S: "VERSION", Value: KwVersion)
128 .Default(Value: Identifier);
129 Buf = (End == Buf.npos) ? "" : Buf.drop_front(N: End);
130 return Token(K, Word);
131 }
132 }
133 }
134
135private:
136 StringRef Buf;
137};
138
139class Parser {
140public:
141 explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
142 : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
143 if (Machine != IMAGE_FILE_MACHINE_I386)
144 AddUnderscores = false;
145 }
146
147 Expected<COFFModuleDefinition> parse() {
148 do {
149 if (Error Err = parseOne())
150 return std::move(Err);
151 } while (Tok.K != Eof);
152 return Info;
153 }
154
155private:
156 void read() {
157 if (Stack.empty()) {
158 Tok = Lex.lex();
159 return;
160 }
161 Tok = Stack.back();
162 Stack.pop_back();
163 }
164
165 Error readAsInt(uint64_t *I) {
166 read();
167 if (Tok.K != Identifier || Tok.Value.getAsInteger(Radix: 10, Result&: *I))
168 return createError(Err: "integer expected");
169 return Error::success();
170 }
171
172 Error expect(Kind Expected, StringRef Msg) {
173 read();
174 if (Tok.K != Expected)
175 return createError(Err: Msg);
176 return Error::success();
177 }
178
179 void unget() { Stack.push_back(x: Tok); }
180
181 Error parseOne() {
182 read();
183 switch (Tok.K) {
184 case Eof:
185 return Error::success();
186 case KwExports:
187 for (;;) {
188 read();
189 if (Tok.K != Identifier) {
190 unget();
191 return Error::success();
192 }
193 if (Error Err = parseExport())
194 return Err;
195 }
196 case KwHeapsize:
197 return parseNumbers(Reserve: &Info.HeapReserve, Commit: &Info.HeapCommit);
198 case KwStacksize:
199 return parseNumbers(Reserve: &Info.StackReserve, Commit: &Info.StackCommit);
200 case KwLibrary:
201 case KwName: {
202 bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203 std::string Name;
204 if (Error Err = parseName(Out: &Name, Baseaddr: &Info.ImageBase))
205 return Err;
206
207 Info.ImportName = Name;
208
209 // Set the output file, but don't override /out if it was already passed.
210 if (Info.OutputFile.empty()) {
211 Info.OutputFile = Name;
212 // Append the appropriate file extension if not already present.
213 if (!sys::path::has_extension(path: Name))
214 Info.OutputFile += IsDll ? ".dll" : ".exe";
215 }
216
217 return Error::success();
218 }
219 case KwVersion:
220 return parseVersion(Major: &Info.MajorImageVersion, Minor: &Info.MinorImageVersion);
221 default:
222 return createError(Err: "unknown directive: " + Tok.Value);
223 }
224 }
225
226 Error parseExport() {
227 COFFShortExport E;
228 E.Name = std::string(Tok.Value);
229 read();
230 if (Tok.K == Equal) {
231 read();
232 if (Tok.K != Identifier)
233 return createError(Err: "identifier expected, but got " + Tok.Value);
234 E.ExtName = E.Name;
235 E.Name = std::string(Tok.Value);
236 } else {
237 unget();
238 }
239
240 if (AddUnderscores) {
241 // Don't add underscore if the name is already mangled or if it's a
242 // forward target.
243 if (!isDecorated(Sym: E.Name, MingwDef) &&
244 (E.ExtName.empty() || !StringRef(E.Name).contains(Other: ".")))
245 E.Name = (std::string("_").append(str: E.Name));
246 if (!E.ExtName.empty() && !isDecorated(Sym: E.ExtName, MingwDef))
247 E.ExtName = (std::string("_").append(str: E.ExtName));
248 }
249
250 for (;;) {
251 read();
252 if (Tok.K == Identifier && Tok.Value[0] == '@') {
253 if (Tok.Value == "@") {
254 // "foo @ 10"
255 read();
256 Tok.Value.getAsInteger(Radix: 10, Result&: E.Ordinal);
257 } else if (Tok.Value.drop_front().getAsInteger(Radix: 10, Result&: E.Ordinal)) {
258 // "foo \n @bar" - Not an ordinal modifier at all, but the next
259 // export (fastcall decorated) - complete the current one.
260 unget();
261 Info.Exports.push_back(x: E);
262 return Error::success();
263 }
264 // "foo @10"
265 read();
266 if (Tok.K == KwNoname) {
267 E.Noname = true;
268 } else {
269 unget();
270 }
271 continue;
272 }
273 if (Tok.K == KwData) {
274 E.Data = true;
275 continue;
276 }
277 if (Tok.K == KwConstant) {
278 E.Constant = true;
279 continue;
280 }
281 if (Tok.K == KwPrivate) {
282 E.Private = true;
283 continue;
284 }
285 if (Tok.K == EqualEqual) {
286 read();
287 E.ImportName = std::string(Tok.Value);
288 continue;
289 }
290 // EXPORTAS must be at the end of export definition
291 if (Tok.K == KwExportAs) {
292 read();
293 if (Tok.K == Eof)
294 return createError(
295 Err: "unexpected end of file, EXPORTAS identifier expected");
296 E.ExportAs = std::string(Tok.Value);
297 } else {
298 unget();
299 }
300 Info.Exports.push_back(x: E);
301 return Error::success();
302 }
303 }
304
305 // HEAPSIZE/STACKSIZE reserve[,commit]
306 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
307 if (Error Err = readAsInt(I: Reserve))
308 return Err;
309 read();
310 if (Tok.K != Comma) {
311 unget();
312 Commit = nullptr;
313 return Error::success();
314 }
315 if (Error Err = readAsInt(I: Commit))
316 return Err;
317 return Error::success();
318 }
319
320 // NAME outputPath [BASE=address]
321 Error parseName(std::string *Out, uint64_t *Baseaddr) {
322 read();
323 if (Tok.K == Identifier) {
324 *Out = std::string(Tok.Value);
325 } else {
326 *Out = "";
327 unget();
328 return Error::success();
329 }
330 read();
331 if (Tok.K == KwBase) {
332 if (Error Err = expect(Expected: Equal, Msg: "'=' expected"))
333 return Err;
334 if (Error Err = readAsInt(I: Baseaddr))
335 return Err;
336 } else {
337 unget();
338 *Baseaddr = 0;
339 }
340 return Error::success();
341 }
342
343 // VERSION major[.minor]
344 Error parseVersion(uint32_t *Major, uint32_t *Minor) {
345 read();
346 if (Tok.K != Identifier)
347 return createError(Err: "identifier expected, but got " + Tok.Value);
348 StringRef V1, V2;
349 std::tie(args&: V1, args&: V2) = Tok.Value.split(Separator: '.');
350 if (V1.getAsInteger(Radix: 10, Result&: *Major))
351 return createError(Err: "integer expected, but got " + Tok.Value);
352 if (V2.empty())
353 *Minor = 0;
354 else if (V2.getAsInteger(Radix: 10, Result&: *Minor))
355 return createError(Err: "integer expected, but got " + Tok.Value);
356 return Error::success();
357 }
358
359 Lexer Lex;
360 Token Tok;
361 std::vector<Token> Stack;
362 MachineTypes Machine;
363 COFFModuleDefinition Info;
364 bool MingwDef;
365 bool AddUnderscores;
366};
367
368Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
369 MachineTypes Machine,
370 bool MingwDef,
371 bool AddUnderscores) {
372 return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
373}
374
375} // namespace object
376} // namespace llvm
377