1 | //===--- DLangDemangle.cpp ------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file defines a demangler for the D programming language as specified |
11 | /// in the ABI specification, available at: |
12 | /// https://dlang.org/spec/abi.html#name_mangling |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/Demangle/Demangle.h" |
17 | #include "llvm/Demangle/StringViewExtras.h" |
18 | #include "llvm/Demangle/Utility.h" |
19 | |
20 | #include <cctype> |
21 | #include <cstring> |
22 | #include <limits> |
23 | #include <string_view> |
24 | |
25 | using namespace llvm; |
26 | using llvm::itanium_demangle::OutputBuffer; |
27 | using llvm::itanium_demangle::starts_with; |
28 | |
29 | namespace { |
30 | |
31 | /// Demangle information structure. |
32 | struct Demangler { |
33 | /// Initialize the information structure we use to pass around information. |
34 | /// |
35 | /// \param Mangled String to demangle. |
36 | Demangler(std::string_view Mangled); |
37 | |
38 | /// Extract and demangle the mangled symbol and append it to the output |
39 | /// string. |
40 | /// |
41 | /// \param Demangled Output buffer to write the demangled name. |
42 | /// |
43 | /// \return The remaining string on success or nullptr on failure. |
44 | /// |
45 | /// \see https://dlang.org/spec/abi.html#name_mangling . |
46 | /// \see https://dlang.org/spec/abi.html#MangledName . |
47 | const char *parseMangle(OutputBuffer *Demangled); |
48 | |
49 | private: |
50 | /// Extract and demangle a given mangled symbol and append it to the output |
51 | /// string. |
52 | /// |
53 | /// \param Demangled output buffer to write the demangled name. |
54 | /// \param Mangled mangled symbol to be demangled. |
55 | /// |
56 | /// \see https://dlang.org/spec/abi.html#name_mangling . |
57 | /// \see https://dlang.org/spec/abi.html#MangledName . |
58 | void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled); |
59 | |
60 | /// Extract the number from a given string. |
61 | /// |
62 | /// \param Mangled string to extract the number. |
63 | /// \param Ret assigned result value. |
64 | /// |
65 | /// \note Ret larger than UINT_MAX is considered a failure. |
66 | /// |
67 | /// \see https://dlang.org/spec/abi.html#Number . |
68 | void decodeNumber(std::string_view &Mangled, unsigned long &Ret); |
69 | |
70 | /// Extract the back reference position from a given string. |
71 | /// |
72 | /// \param Mangled string to extract the back reference position. |
73 | /// \param Ret assigned result value. |
74 | /// |
75 | /// \return true on success, false on error. |
76 | /// |
77 | /// \note Ret is always >= 0 on success, and unspecified on failure |
78 | /// |
79 | /// \see https://dlang.org/spec/abi.html#back_ref . |
80 | /// \see https://dlang.org/spec/abi.html#NumberBackRef . |
81 | bool decodeBackrefPos(std::string_view &Mangled, long &Ret); |
82 | |
83 | /// Extract the symbol pointed by the back reference form a given string. |
84 | /// |
85 | /// \param Mangled string to extract the back reference position. |
86 | /// \param Ret assigned result value. |
87 | /// |
88 | /// \return true on success, false on error. |
89 | /// |
90 | /// \see https://dlang.org/spec/abi.html#back_ref . |
91 | bool decodeBackref(std::string_view &Mangled, std::string_view &Ret); |
92 | |
93 | /// Extract and demangle backreferenced symbol from a given mangled symbol |
94 | /// and append it to the output string. |
95 | /// |
96 | /// \param Demangled output buffer to write the demangled name. |
97 | /// \param Mangled mangled symbol to be demangled. |
98 | /// |
99 | /// \see https://dlang.org/spec/abi.html#back_ref . |
100 | /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . |
101 | void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled); |
102 | |
103 | /// Extract and demangle backreferenced type from a given mangled symbol |
104 | /// and append it to the output string. |
105 | /// |
106 | /// \param Mangled mangled symbol to be demangled. |
107 | /// |
108 | /// \see https://dlang.org/spec/abi.html#back_ref . |
109 | /// \see https://dlang.org/spec/abi.html#TypeBackRef . |
110 | void parseTypeBackref(std::string_view &Mangled); |
111 | |
112 | /// Check whether it is the beginning of a symbol name. |
113 | /// |
114 | /// \param Mangled string to extract the symbol name. |
115 | /// |
116 | /// \return true on success, false otherwise. |
117 | /// |
118 | /// \see https://dlang.org/spec/abi.html#SymbolName . |
119 | bool isSymbolName(std::string_view Mangled); |
120 | |
121 | /// Extract and demangle an identifier from a given mangled symbol append it |
122 | /// to the output string. |
123 | /// |
124 | /// \param Demangled Output buffer to write the demangled name. |
125 | /// \param Mangled Mangled symbol to be demangled. |
126 | /// |
127 | /// \see https://dlang.org/spec/abi.html#SymbolName . |
128 | void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled); |
129 | |
130 | /// Extract and demangle the plain identifier from a given mangled symbol and |
131 | /// prepend/append it to the output string, with a special treatment for some |
132 | /// magic compiler generated symbols. |
133 | /// |
134 | /// \param Demangled Output buffer to write the demangled name. |
135 | /// \param Mangled Mangled symbol to be demangled. |
136 | /// \param Len Length of the mangled symbol name. |
137 | /// |
138 | /// \see https://dlang.org/spec/abi.html#LName . |
139 | void parseLName(OutputBuffer *Demangled, std::string_view &Mangled, |
140 | unsigned long Len); |
141 | |
142 | /// Extract and demangle the qualified symbol from a given mangled symbol |
143 | /// append it to the output string. |
144 | /// |
145 | /// \param Demangled Output buffer to write the demangled name. |
146 | /// \param Mangled Mangled symbol to be demangled. |
147 | /// |
148 | /// \see https://dlang.org/spec/abi.html#QualifiedName . |
149 | void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled); |
150 | |
151 | /// Extract and demangle a type from a given mangled symbol append it to |
152 | /// the output string. |
153 | /// |
154 | /// \param Mangled mangled symbol to be demangled. |
155 | /// |
156 | /// \return true on success, false on error. |
157 | /// |
158 | /// \see https://dlang.org/spec/abi.html#Type . |
159 | bool parseType(std::string_view &Mangled); |
160 | |
161 | /// An immutable view of the string we are demangling. |
162 | const std::string_view Str; |
163 | /// The index of the last back reference. |
164 | int LastBackref; |
165 | }; |
166 | |
167 | } // namespace |
168 | |
169 | void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) { |
170 | // Clear Mangled if trying to extract something that isn't a digit. |
171 | if (Mangled.empty()) { |
172 | Mangled = {}; |
173 | return; |
174 | } |
175 | |
176 | if (!std::isdigit(Mangled.front())) { |
177 | Mangled = {}; |
178 | return; |
179 | } |
180 | |
181 | unsigned long Val = 0; |
182 | |
183 | do { |
184 | unsigned long Digit = Mangled[0] - '0'; |
185 | |
186 | // Check for overflow. |
187 | if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) { |
188 | Mangled = {}; |
189 | return; |
190 | } |
191 | |
192 | Val = Val * 10 + Digit; |
193 | Mangled.remove_prefix(n: 1); |
194 | } while (!Mangled.empty() && std::isdigit(Mangled.front())); |
195 | |
196 | if (Mangled.empty()) { |
197 | Mangled = {}; |
198 | return; |
199 | } |
200 | |
201 | Ret = Val; |
202 | } |
203 | |
204 | bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) { |
205 | // Return nullptr if trying to extract something that isn't a digit |
206 | if (Mangled.empty()) { |
207 | Mangled = {}; |
208 | return false; |
209 | } |
210 | // Any identifier or non-basic type that has been emitted to the mangled |
211 | // symbol before will not be emitted again, but is referenced by a special |
212 | // sequence encoding the relative position of the original occurrence in the |
213 | // mangled symbol name. |
214 | // Numbers in back references are encoded with base 26 by upper case letters |
215 | // A-Z for higher digits but lower case letters a-z for the last digit. |
216 | // NumberBackRef: |
217 | // [a-z] |
218 | // [A-Z] NumberBackRef |
219 | // ^ |
220 | unsigned long Val = 0; |
221 | |
222 | while (!Mangled.empty() && std::isalpha(Mangled.front())) { |
223 | // Check for overflow |
224 | if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26) |
225 | break; |
226 | |
227 | Val *= 26; |
228 | |
229 | if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { |
230 | Val += Mangled[0] - 'a'; |
231 | if ((long)Val <= 0) |
232 | break; |
233 | Ret = Val; |
234 | Mangled.remove_prefix(n: 1); |
235 | return true; |
236 | } |
237 | |
238 | Val += Mangled[0] - 'A'; |
239 | Mangled.remove_prefix(n: 1); |
240 | } |
241 | |
242 | Mangled = {}; |
243 | return false; |
244 | } |
245 | |
246 | bool Demangler::decodeBackref(std::string_view &Mangled, |
247 | std::string_view &Ret) { |
248 | assert(!Mangled.empty() && Mangled.front() == 'Q' && |
249 | "Invalid back reference!" ); |
250 | Ret = {}; |
251 | |
252 | // Position of 'Q' |
253 | const char *Qpos = Mangled.data(); |
254 | long RefPos; |
255 | Mangled.remove_prefix(n: 1); |
256 | |
257 | if (!decodeBackrefPos(Mangled, Ret&: RefPos)) { |
258 | Mangled = {}; |
259 | return false; |
260 | } |
261 | |
262 | if (RefPos > Qpos - Str.data()) { |
263 | Mangled = {}; |
264 | return false; |
265 | } |
266 | |
267 | // Set the position of the back reference. |
268 | Ret = Qpos - RefPos; |
269 | |
270 | return true; |
271 | } |
272 | |
273 | void Demangler::parseSymbolBackref(OutputBuffer *Demangled, |
274 | std::string_view &Mangled) { |
275 | // An identifier back reference always points to a digit 0 to 9. |
276 | // IdentifierBackRef: |
277 | // Q NumberBackRef |
278 | // ^ |
279 | unsigned long Len; |
280 | |
281 | // Get position of the back reference |
282 | std::string_view Backref; |
283 | if (!decodeBackref(Mangled, Ret&: Backref)) { |
284 | Mangled = {}; |
285 | return; |
286 | } |
287 | |
288 | // Must point to a simple identifier |
289 | decodeNumber(Mangled&: Backref, Ret&: Len); |
290 | if (Backref.empty() || Backref.length() < Len) { |
291 | Mangled = {}; |
292 | return; |
293 | } |
294 | |
295 | parseLName(Demangled, Mangled&: Backref, Len); |
296 | if (Backref.empty()) |
297 | Mangled = {}; |
298 | } |
299 | |
300 | void Demangler::parseTypeBackref(std::string_view &Mangled) { |
301 | // A type back reference always points to a letter. |
302 | // TypeBackRef: |
303 | // Q NumberBackRef |
304 | // ^ |
305 | |
306 | // If we appear to be moving backwards through the mangle string, then |
307 | // bail as this may be a recursive back reference. |
308 | if (Mangled.data() - Str.data() >= LastBackref) { |
309 | Mangled = {}; |
310 | return; |
311 | } |
312 | |
313 | int SaveRefPos = LastBackref; |
314 | LastBackref = Mangled.data() - Str.data(); |
315 | |
316 | // Get position of the back reference. |
317 | std::string_view Backref; |
318 | if (!decodeBackref(Mangled, Ret&: Backref)) { |
319 | Mangled = {}; |
320 | return; |
321 | } |
322 | |
323 | // Can't decode back reference. |
324 | if (Backref.empty()) { |
325 | Mangled = {}; |
326 | return; |
327 | } |
328 | |
329 | // TODO: Add support for function type back references. |
330 | if (!parseType(Mangled&: Backref)) |
331 | Mangled = {}; |
332 | |
333 | LastBackref = SaveRefPos; |
334 | |
335 | if (Backref.empty()) |
336 | Mangled = {}; |
337 | } |
338 | |
339 | bool Demangler::isSymbolName(std::string_view Mangled) { |
340 | long Ret; |
341 | const char *Qref = Mangled.data(); |
342 | |
343 | if (std::isdigit(Mangled.front())) |
344 | return true; |
345 | |
346 | // TODO: Handle template instances. |
347 | |
348 | if (Mangled.front() != 'Q') |
349 | return false; |
350 | |
351 | Mangled.remove_prefix(n: 1); |
352 | bool Valid = decodeBackrefPos(Mangled, Ret); |
353 | if (!Valid || Ret > Qref - Str.data()) |
354 | return false; |
355 | |
356 | return std::isdigit(Qref[-Ret]); |
357 | } |
358 | |
359 | void Demangler::parseMangle(OutputBuffer *Demangled, |
360 | std::string_view &Mangled) { |
361 | // A D mangled symbol is comprised of both scope and type information. |
362 | // MangleName: |
363 | // _D QualifiedName Type |
364 | // _D QualifiedName Z |
365 | // ^ |
366 | // The caller should have guaranteed that the start pointer is at the |
367 | // above location. |
368 | // Note that type is never a function type, but only the return type of |
369 | // a function or the type of a variable. |
370 | Mangled.remove_prefix(n: 2); |
371 | |
372 | parseQualified(Demangled, Mangled); |
373 | |
374 | if (Mangled.empty()) { |
375 | Mangled = {}; |
376 | return; |
377 | } |
378 | |
379 | // Artificial symbols end with 'Z' and have no type. |
380 | if (Mangled.front() == 'Z') { |
381 | Mangled.remove_prefix(n: 1); |
382 | } else if (!parseType(Mangled)) |
383 | Mangled = {}; |
384 | } |
385 | |
386 | void Demangler::parseQualified(OutputBuffer *Demangled, |
387 | std::string_view &Mangled) { |
388 | // Qualified names are identifiers separated by their encoded length. |
389 | // Nested functions also encode their argument types without specifying |
390 | // what they return. |
391 | // QualifiedName: |
392 | // SymbolFunctionName |
393 | // SymbolFunctionName QualifiedName |
394 | // ^ |
395 | // SymbolFunctionName: |
396 | // SymbolName |
397 | // SymbolName TypeFunctionNoReturn |
398 | // SymbolName M TypeFunctionNoReturn |
399 | // SymbolName M TypeModifiers TypeFunctionNoReturn |
400 | // The start pointer should be at the above location. |
401 | |
402 | // Whether it has more than one symbol |
403 | size_t NotFirst = false; |
404 | do { |
405 | // Skip over anonymous symbols. |
406 | if (!Mangled.empty() && Mangled.front() == '0') { |
407 | do |
408 | Mangled.remove_prefix(n: 1); |
409 | while (!Mangled.empty() && Mangled.front() == '0'); |
410 | |
411 | continue; |
412 | } |
413 | |
414 | if (NotFirst) |
415 | *Demangled << '.'; |
416 | NotFirst = true; |
417 | |
418 | parseIdentifier(Demangled, Mangled); |
419 | } while (!Mangled.empty() && isSymbolName(Mangled)); |
420 | } |
421 | |
422 | void Demangler::parseIdentifier(OutputBuffer *Demangled, |
423 | std::string_view &Mangled) { |
424 | if (Mangled.empty()) { |
425 | Mangled = {}; |
426 | return; |
427 | } |
428 | |
429 | if (Mangled.front() == 'Q') |
430 | return parseSymbolBackref(Demangled, Mangled); |
431 | |
432 | // TODO: Parse lengthless template instances. |
433 | |
434 | unsigned long Len; |
435 | decodeNumber(Mangled, Ret&: Len); |
436 | |
437 | if (Mangled.empty()) { |
438 | Mangled = {}; |
439 | return; |
440 | } |
441 | if (!Len || Mangled.length() < Len) { |
442 | Mangled = {}; |
443 | return; |
444 | } |
445 | |
446 | // TODO: Parse template instances with a length prefix. |
447 | |
448 | // There can be multiple different declarations in the same function that |
449 | // have the same mangled name. To make the mangled names unique, a fake |
450 | // parent in the form `__Sddd' is added to the symbol. |
451 | if (Len >= 4 && starts_with(haystack: Mangled, needle: "__S" )) { |
452 | const size_t SuffixLen = Mangled.length() - Len; |
453 | std::string_view P = Mangled.substr(pos: 3); |
454 | while (P.length() > SuffixLen && std::isdigit(P.front())) |
455 | P.remove_prefix(n: 1); |
456 | if (P.length() == SuffixLen) { |
457 | // Skip over the fake parent. |
458 | Mangled.remove_prefix(n: Len); |
459 | return parseIdentifier(Demangled, Mangled); |
460 | } |
461 | |
462 | // Else demangle it as a plain identifier. |
463 | } |
464 | |
465 | parseLName(Demangled, Mangled, Len); |
466 | } |
467 | |
468 | bool Demangler::parseType(std::string_view &Mangled) { |
469 | if (Mangled.empty()) { |
470 | Mangled = {}; |
471 | return false; |
472 | } |
473 | |
474 | switch (Mangled.front()) { |
475 | // TODO: Parse type qualifiers. |
476 | // TODO: Parse function types. |
477 | // TODO: Parse compound types. |
478 | // TODO: Parse delegate types. |
479 | // TODO: Parse tuple types. |
480 | |
481 | // Basic types. |
482 | case 'i': |
483 | Mangled.remove_prefix(n: 1); |
484 | // TODO: Add type name dumping |
485 | return true; |
486 | |
487 | // TODO: Add support for the rest of the basic types. |
488 | |
489 | // Back referenced type. |
490 | case 'Q': { |
491 | parseTypeBackref(Mangled); |
492 | return true; |
493 | } |
494 | |
495 | default: // unhandled. |
496 | Mangled = {}; |
497 | return false; |
498 | } |
499 | } |
500 | |
501 | void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled, |
502 | unsigned long Len) { |
503 | switch (Len) { |
504 | case 6: |
505 | if (starts_with(haystack: Mangled, needle: "__initZ" )) { |
506 | // The static initializer for a given symbol. |
507 | Demangled->prepend(R: "initializer for " ); |
508 | Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
509 | Mangled.remove_prefix(n: Len); |
510 | return; |
511 | } |
512 | if (starts_with(haystack: Mangled, needle: "__vtblZ" )) { |
513 | // The vtable symbol for a given class. |
514 | Demangled->prepend(R: "vtable for " ); |
515 | Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
516 | Mangled.remove_prefix(n: Len); |
517 | return; |
518 | } |
519 | break; |
520 | |
521 | case 7: |
522 | if (starts_with(haystack: Mangled, needle: "__ClassZ" )) { |
523 | // The classinfo symbol for a given class. |
524 | Demangled->prepend(R: "ClassInfo for " ); |
525 | Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
526 | Mangled.remove_prefix(n: Len); |
527 | return; |
528 | } |
529 | break; |
530 | |
531 | case 11: |
532 | if (starts_with(haystack: Mangled, needle: "__InterfaceZ" )) { |
533 | // The interface symbol for a given class. |
534 | Demangled->prepend(R: "Interface for " ); |
535 | Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
536 | Mangled.remove_prefix(n: Len); |
537 | return; |
538 | } |
539 | break; |
540 | |
541 | case 12: |
542 | if (starts_with(haystack: Mangled, needle: "__ModuleInfoZ" )) { |
543 | // The ModuleInfo symbol for a given module. |
544 | Demangled->prepend(R: "ModuleInfo for " ); |
545 | Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
546 | Mangled.remove_prefix(n: Len); |
547 | return; |
548 | } |
549 | break; |
550 | } |
551 | |
552 | *Demangled << Mangled.substr(pos: 0, n: Len); |
553 | Mangled.remove_prefix(n: Len); |
554 | } |
555 | |
556 | Demangler::Demangler(std::string_view Mangled) |
557 | : Str(Mangled), LastBackref(Mangled.length()) {} |
558 | |
559 | const char *Demangler::parseMangle(OutputBuffer *Demangled) { |
560 | std::string_view M(this->Str); |
561 | parseMangle(Demangled, Mangled&: M); |
562 | return M.data(); |
563 | } |
564 | |
565 | char *llvm::dlangDemangle(std::string_view MangledName) { |
566 | if (MangledName.empty() || !starts_with(haystack: MangledName, needle: "_D" )) |
567 | return nullptr; |
568 | |
569 | OutputBuffer Demangled; |
570 | if (MangledName == "_Dmain" ) { |
571 | Demangled << "D main" ; |
572 | } else { |
573 | |
574 | Demangler D(MangledName); |
575 | const char *M = D.parseMangle(Demangled: &Demangled); |
576 | |
577 | // Check that the entire symbol was successfully demangled. |
578 | if (M == nullptr || *M != '\0') { |
579 | std::free(ptr: Demangled.getBuffer()); |
580 | return nullptr; |
581 | } |
582 | } |
583 | |
584 | // OutputBuffer's internal buffer is not null terminated and therefore we need |
585 | // to add it to comply with C null terminated strings. |
586 | if (Demangled.getCurrentPosition() > 0) { |
587 | Demangled << '\0'; |
588 | Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); |
589 | return Demangled.getBuffer(); |
590 | } |
591 | |
592 | std::free(ptr: Demangled.getBuffer()); |
593 | return nullptr; |
594 | } |
595 | |