1 | //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Windows-specific. |
10 | // A parser for the module-definition file (.def file). |
11 | // |
12 | // The format of module-definition files are described in this document: |
13 | // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/Object/COFFModuleDefinition.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/ADT/StringSwitch.h" |
20 | #include "llvm/Object/COFFImportFile.h" |
21 | #include "llvm/Object/Error.h" |
22 | #include "llvm/Support/Error.h" |
23 | #include "llvm/Support/Path.h" |
24 | |
25 | using namespace llvm::COFF; |
26 | using namespace llvm; |
27 | |
28 | namespace llvm { |
29 | namespace object { |
30 | |
31 | enum Kind { |
32 | Unknown, |
33 | Eof, |
34 | Identifier, |
35 | Comma, |
36 | Equal, |
37 | EqualEqual, |
38 | KwBase, |
39 | KwConstant, |
40 | KwData, |
41 | KwExports, |
42 | KwExportAs, |
43 | KwHeapsize, |
44 | KwLibrary, |
45 | KwName, |
46 | KwNoname, |
47 | KwPrivate, |
48 | KwStacksize, |
49 | KwVersion, |
50 | }; |
51 | |
52 | struct Token { |
53 | explicit Token(Kind T = Unknown, StringRef S = "" ) : K(T), Value(S) {} |
54 | Kind K; |
55 | StringRef Value; |
56 | }; |
57 | |
58 | static bool isDecorated(StringRef Sym, bool MingwDef) { |
59 | // In def files, the symbols can either be listed decorated or undecorated. |
60 | // |
61 | // - For cdecl symbols, only the undecorated form is allowed. |
62 | // - For fastcall and vectorcall symbols, both fully decorated or |
63 | // undecorated forms can be present. |
64 | // - For stdcall symbols in non-MinGW environments, the decorated form is |
65 | // fully decorated with leading underscore and trailing stack argument |
66 | // size - like "_Func@0". |
67 | // - In MinGW def files, a decorated stdcall symbol does not include the |
68 | // leading underscore though, like "Func@0". |
69 | |
70 | // This function controls whether a leading underscore should be added to |
71 | // the given symbol name or not. For MinGW, treat a stdcall symbol name such |
72 | // as "Func@0" as undecorated, i.e. a leading underscore must be added. |
73 | // For non-MinGW, look for '@' in the whole string and consider "_Func@0" |
74 | // as decorated, i.e. don't add any more leading underscores. |
75 | // We can't check for a leading underscore here, since function names |
76 | // themselves can start with an underscore, while a second one still needs |
77 | // to be added. |
78 | return Sym.starts_with(Prefix: "@" ) || Sym.contains(Other: "@@" ) || Sym.starts_with(Prefix: "?" ) || |
79 | (!MingwDef && Sym.contains(C: '@')); |
80 | } |
81 | |
82 | class Lexer { |
83 | public: |
84 | Lexer(StringRef S) : Buf(S) {} |
85 | |
86 | Token lex() { |
87 | Buf = Buf.trim(); |
88 | if (Buf.empty()) |
89 | return Token(Eof); |
90 | |
91 | switch (Buf[0]) { |
92 | case '\0': |
93 | return Token(Eof); |
94 | case ';': { |
95 | size_t End = Buf.find(C: '\n'); |
96 | Buf = (End == Buf.npos) ? "" : Buf.drop_front(N: End); |
97 | return lex(); |
98 | } |
99 | case '=': |
100 | Buf = Buf.drop_front(); |
101 | if (Buf.consume_front(Prefix: "=" )) |
102 | return Token(EqualEqual, "==" ); |
103 | return Token(Equal, "=" ); |
104 | case ',': |
105 | Buf = Buf.drop_front(); |
106 | return Token(Comma, "," ); |
107 | case '"': { |
108 | StringRef S; |
109 | std::tie(args&: S, args&: Buf) = Buf.substr(Start: 1).split(Separator: '"'); |
110 | return Token(Identifier, S); |
111 | } |
112 | default: { |
113 | size_t End = Buf.find_first_of(Chars: "=,;\r\n \t\v" ); |
114 | StringRef Word = Buf.substr(Start: 0, N: End); |
115 | Kind K = llvm::StringSwitch<Kind>(Word) |
116 | .Case(S: "BASE" , Value: KwBase) |
117 | .Case(S: "CONSTANT" , Value: KwConstant) |
118 | .Case(S: "DATA" , Value: KwData) |
119 | .Case(S: "EXPORTS" , Value: KwExports) |
120 | .Case(S: "EXPORTAS" , Value: KwExportAs) |
121 | .Case(S: "HEAPSIZE" , Value: KwHeapsize) |
122 | .Case(S: "LIBRARY" , Value: KwLibrary) |
123 | .Case(S: "NAME" , Value: KwName) |
124 | .Case(S: "NONAME" , Value: KwNoname) |
125 | .Case(S: "PRIVATE" , Value: KwPrivate) |
126 | .Case(S: "STACKSIZE" , Value: KwStacksize) |
127 | .Case(S: "VERSION" , Value: KwVersion) |
128 | .Default(Value: Identifier); |
129 | Buf = (End == Buf.npos) ? "" : Buf.drop_front(N: End); |
130 | return Token(K, Word); |
131 | } |
132 | } |
133 | } |
134 | |
135 | private: |
136 | StringRef Buf; |
137 | }; |
138 | |
139 | class Parser { |
140 | public: |
141 | explicit Parser(StringRef S, MachineTypes M, bool B, bool AU) |
142 | : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) { |
143 | if (Machine != IMAGE_FILE_MACHINE_I386) |
144 | AddUnderscores = false; |
145 | } |
146 | |
147 | Expected<COFFModuleDefinition> parse() { |
148 | do { |
149 | if (Error Err = parseOne()) |
150 | return std::move(Err); |
151 | } while (Tok.K != Eof); |
152 | return Info; |
153 | } |
154 | |
155 | private: |
156 | void read() { |
157 | if (Stack.empty()) { |
158 | Tok = Lex.lex(); |
159 | return; |
160 | } |
161 | Tok = Stack.back(); |
162 | Stack.pop_back(); |
163 | } |
164 | |
165 | Error readAsInt(uint64_t *I) { |
166 | read(); |
167 | if (Tok.K != Identifier || Tok.Value.getAsInteger(Radix: 10, Result&: *I)) |
168 | return createError(Err: "integer expected" ); |
169 | return Error::success(); |
170 | } |
171 | |
172 | Error expect(Kind Expected, StringRef Msg) { |
173 | read(); |
174 | if (Tok.K != Expected) |
175 | return createError(Err: Msg); |
176 | return Error::success(); |
177 | } |
178 | |
179 | void unget() { Stack.push_back(x: Tok); } |
180 | |
181 | Error parseOne() { |
182 | read(); |
183 | switch (Tok.K) { |
184 | case Eof: |
185 | return Error::success(); |
186 | case KwExports: |
187 | for (;;) { |
188 | read(); |
189 | if (Tok.K != Identifier) { |
190 | unget(); |
191 | return Error::success(); |
192 | } |
193 | if (Error Err = parseExport()) |
194 | return Err; |
195 | } |
196 | case KwHeapsize: |
197 | return parseNumbers(Reserve: &Info.HeapReserve, Commit: &Info.HeapCommit); |
198 | case KwStacksize: |
199 | return parseNumbers(Reserve: &Info.StackReserve, Commit: &Info.StackCommit); |
200 | case KwLibrary: |
201 | case KwName: { |
202 | bool IsDll = Tok.K == KwLibrary; // Check before parseName. |
203 | std::string Name; |
204 | if (Error Err = parseName(Out: &Name, Baseaddr: &Info.ImageBase)) |
205 | return Err; |
206 | |
207 | Info.ImportName = Name; |
208 | |
209 | // Set the output file, but don't override /out if it was already passed. |
210 | if (Info.OutputFile.empty()) { |
211 | Info.OutputFile = Name; |
212 | // Append the appropriate file extension if not already present. |
213 | if (!sys::path::has_extension(path: Name)) |
214 | Info.OutputFile += IsDll ? ".dll" : ".exe" ; |
215 | } |
216 | |
217 | return Error::success(); |
218 | } |
219 | case KwVersion: |
220 | return parseVersion(Major: &Info.MajorImageVersion, Minor: &Info.MinorImageVersion); |
221 | default: |
222 | return createError(Err: "unknown directive: " + Tok.Value); |
223 | } |
224 | } |
225 | |
226 | Error parseExport() { |
227 | COFFShortExport E; |
228 | E.Name = std::string(Tok.Value); |
229 | read(); |
230 | if (Tok.K == Equal) { |
231 | read(); |
232 | if (Tok.K != Identifier) |
233 | return createError(Err: "identifier expected, but got " + Tok.Value); |
234 | E.ExtName = E.Name; |
235 | E.Name = std::string(Tok.Value); |
236 | } else { |
237 | unget(); |
238 | } |
239 | |
240 | if (AddUnderscores) { |
241 | if (!isDecorated(Sym: E.Name, MingwDef)) |
242 | E.Name = (std::string("_" ).append(str: E.Name)); |
243 | if (!E.ExtName.empty() && !isDecorated(Sym: E.ExtName, MingwDef)) |
244 | E.ExtName = (std::string("_" ).append(str: E.ExtName)); |
245 | } |
246 | |
247 | for (;;) { |
248 | read(); |
249 | if (Tok.K == Identifier && Tok.Value[0] == '@') { |
250 | if (Tok.Value == "@" ) { |
251 | // "foo @ 10" |
252 | read(); |
253 | Tok.Value.getAsInteger(Radix: 10, Result&: E.Ordinal); |
254 | } else if (Tok.Value.drop_front().getAsInteger(Radix: 10, Result&: E.Ordinal)) { |
255 | // "foo \n @bar" - Not an ordinal modifier at all, but the next |
256 | // export (fastcall decorated) - complete the current one. |
257 | unget(); |
258 | Info.Exports.push_back(x: E); |
259 | return Error::success(); |
260 | } |
261 | // "foo @10" |
262 | read(); |
263 | if (Tok.K == KwNoname) { |
264 | E.Noname = true; |
265 | } else { |
266 | unget(); |
267 | } |
268 | continue; |
269 | } |
270 | if (Tok.K == KwData) { |
271 | E.Data = true; |
272 | continue; |
273 | } |
274 | if (Tok.K == KwConstant) { |
275 | E.Constant = true; |
276 | continue; |
277 | } |
278 | if (Tok.K == KwPrivate) { |
279 | E.Private = true; |
280 | continue; |
281 | } |
282 | if (Tok.K == EqualEqual) { |
283 | read(); |
284 | E.ImportName = std::string(Tok.Value); |
285 | continue; |
286 | } |
287 | // EXPORTAS must be at the end of export definition |
288 | if (Tok.K == KwExportAs) { |
289 | read(); |
290 | if (Tok.K == Eof) |
291 | return createError( |
292 | Err: "unexpected end of file, EXPORTAS identifier expected" ); |
293 | E.ExportAs = std::string(Tok.Value); |
294 | } else { |
295 | unget(); |
296 | } |
297 | Info.Exports.push_back(x: E); |
298 | return Error::success(); |
299 | } |
300 | } |
301 | |
302 | // HEAPSIZE/STACKSIZE reserve[,commit] |
303 | Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { |
304 | if (Error Err = readAsInt(I: Reserve)) |
305 | return Err; |
306 | read(); |
307 | if (Tok.K != Comma) { |
308 | unget(); |
309 | Commit = nullptr; |
310 | return Error::success(); |
311 | } |
312 | if (Error Err = readAsInt(I: Commit)) |
313 | return Err; |
314 | return Error::success(); |
315 | } |
316 | |
317 | // NAME outputPath [BASE=address] |
318 | Error parseName(std::string *Out, uint64_t *Baseaddr) { |
319 | read(); |
320 | if (Tok.K == Identifier) { |
321 | *Out = std::string(Tok.Value); |
322 | } else { |
323 | *Out = "" ; |
324 | unget(); |
325 | return Error::success(); |
326 | } |
327 | read(); |
328 | if (Tok.K == KwBase) { |
329 | if (Error Err = expect(Expected: Equal, Msg: "'=' expected" )) |
330 | return Err; |
331 | if (Error Err = readAsInt(I: Baseaddr)) |
332 | return Err; |
333 | } else { |
334 | unget(); |
335 | *Baseaddr = 0; |
336 | } |
337 | return Error::success(); |
338 | } |
339 | |
340 | // VERSION major[.minor] |
341 | Error parseVersion(uint32_t *Major, uint32_t *Minor) { |
342 | read(); |
343 | if (Tok.K != Identifier) |
344 | return createError(Err: "identifier expected, but got " + Tok.Value); |
345 | StringRef V1, V2; |
346 | std::tie(args&: V1, args&: V2) = Tok.Value.split(Separator: '.'); |
347 | if (V1.getAsInteger(Radix: 10, Result&: *Major)) |
348 | return createError(Err: "integer expected, but got " + Tok.Value); |
349 | if (V2.empty()) |
350 | *Minor = 0; |
351 | else if (V2.getAsInteger(Radix: 10, Result&: *Minor)) |
352 | return createError(Err: "integer expected, but got " + Tok.Value); |
353 | return Error::success(); |
354 | } |
355 | |
356 | Lexer Lex; |
357 | Token Tok; |
358 | std::vector<Token> Stack; |
359 | MachineTypes Machine; |
360 | COFFModuleDefinition Info; |
361 | bool MingwDef; |
362 | bool AddUnderscores; |
363 | }; |
364 | |
365 | Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, |
366 | MachineTypes Machine, |
367 | bool MingwDef, |
368 | bool AddUnderscores) { |
369 | return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse(); |
370 | } |
371 | |
372 | } // namespace object |
373 | } // namespace llvm |
374 | |