1//===--- DLangDemangle.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines a demangler for the D programming language as specified
11/// in the ABI specification, available at:
12/// https://dlang.org/spec/abi.html#name_mangling
13///
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Demangle/Demangle.h"
17#include "llvm/Demangle/StringViewExtras.h"
18#include "llvm/Demangle/Utility.h"
19
20#include <cctype>
21#include <cstring>
22#include <limits>
23#include <string_view>
24
25using namespace llvm;
26using llvm::itanium_demangle::OutputBuffer;
27using llvm::itanium_demangle::starts_with;
28
29namespace {
30
31/// Demangle information structure.
32struct Demangler {
33 /// Initialize the information structure we use to pass around information.
34 ///
35 /// \param Mangled String to demangle.
36 Demangler(std::string_view Mangled);
37
38 /// Extract and demangle the mangled symbol and append it to the output
39 /// string.
40 ///
41 /// \param Demangled Output buffer to write the demangled name.
42 ///
43 /// \return The remaining string on success or nullptr on failure.
44 ///
45 /// \see https://dlang.org/spec/abi.html#name_mangling .
46 /// \see https://dlang.org/spec/abi.html#MangledName .
47 const char *parseMangle(OutputBuffer *Demangled);
48
49private:
50 /// Extract and demangle a given mangled symbol and append it to the output
51 /// string.
52 ///
53 /// \param Demangled output buffer to write the demangled name.
54 /// \param Mangled mangled symbol to be demangled.
55 ///
56 /// \see https://dlang.org/spec/abi.html#name_mangling .
57 /// \see https://dlang.org/spec/abi.html#MangledName .
58 void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled);
59
60 /// Extract the number from a given string.
61 ///
62 /// \param Mangled string to extract the number.
63 /// \param Ret assigned result value.
64 ///
65 /// \note Ret larger than UINT_MAX is considered a failure.
66 ///
67 /// \see https://dlang.org/spec/abi.html#Number .
68 void decodeNumber(std::string_view &Mangled, unsigned long &Ret);
69
70 /// Extract the back reference position from a given string.
71 ///
72 /// \param Mangled string to extract the back reference position.
73 /// \param Ret assigned result value.
74 ///
75 /// \return true on success, false on error.
76 ///
77 /// \note Ret is always >= 0 on success, and unspecified on failure
78 ///
79 /// \see https://dlang.org/spec/abi.html#back_ref .
80 /// \see https://dlang.org/spec/abi.html#NumberBackRef .
81 bool decodeBackrefPos(std::string_view &Mangled, long &Ret);
82
83 /// Extract the symbol pointed by the back reference form a given string.
84 ///
85 /// \param Mangled string to extract the back reference position.
86 /// \param Ret assigned result value.
87 ///
88 /// \return true on success, false on error.
89 ///
90 /// \see https://dlang.org/spec/abi.html#back_ref .
91 bool decodeBackref(std::string_view &Mangled, std::string_view &Ret);
92
93 /// Extract and demangle backreferenced symbol from a given mangled symbol
94 /// and append it to the output string.
95 ///
96 /// \param Demangled output buffer to write the demangled name.
97 /// \param Mangled mangled symbol to be demangled.
98 ///
99 /// \see https://dlang.org/spec/abi.html#back_ref .
100 /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
101 void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled);
102
103 /// Extract and demangle backreferenced type from a given mangled symbol
104 /// and append it to the output string.
105 ///
106 /// \param Mangled mangled symbol to be demangled.
107 ///
108 /// \see https://dlang.org/spec/abi.html#back_ref .
109 /// \see https://dlang.org/spec/abi.html#TypeBackRef .
110 void parseTypeBackref(std::string_view &Mangled);
111
112 /// Check whether it is the beginning of a symbol name.
113 ///
114 /// \param Mangled string to extract the symbol name.
115 ///
116 /// \return true on success, false otherwise.
117 ///
118 /// \see https://dlang.org/spec/abi.html#SymbolName .
119 bool isSymbolName(std::string_view Mangled);
120
121 /// Extract and demangle an identifier from a given mangled symbol append it
122 /// to the output string.
123 ///
124 /// \param Demangled Output buffer to write the demangled name.
125 /// \param Mangled Mangled symbol to be demangled.
126 ///
127 /// \see https://dlang.org/spec/abi.html#SymbolName .
128 void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled);
129
130 /// Extract and demangle the plain identifier from a given mangled symbol and
131 /// prepend/append it to the output string, with a special treatment for some
132 /// magic compiler generated symbols.
133 ///
134 /// \param Demangled Output buffer to write the demangled name.
135 /// \param Mangled Mangled symbol to be demangled.
136 /// \param Len Length of the mangled symbol name.
137 ///
138 /// \see https://dlang.org/spec/abi.html#LName .
139 void parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
140 unsigned long Len);
141
142 /// Extract and demangle the qualified symbol from a given mangled symbol
143 /// append it to the output string.
144 ///
145 /// \param Demangled Output buffer to write the demangled name.
146 /// \param Mangled Mangled symbol to be demangled.
147 ///
148 /// \see https://dlang.org/spec/abi.html#QualifiedName .
149 void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled);
150
151 /// Extract and demangle a type from a given mangled symbol append it to
152 /// the output string.
153 ///
154 /// \param Mangled mangled symbol to be demangled.
155 ///
156 /// \return true on success, false on error.
157 ///
158 /// \see https://dlang.org/spec/abi.html#Type .
159 bool parseType(std::string_view &Mangled);
160
161 /// An immutable view of the string we are demangling.
162 const std::string_view Str;
163 /// The index of the last back reference.
164 int LastBackref;
165};
166
167} // namespace
168
169void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) {
170 // Clear Mangled if trying to extract something that isn't a digit.
171 if (Mangled.empty()) {
172 Mangled = {};
173 return;
174 }
175
176 if (!std::isdigit(Mangled.front())) {
177 Mangled = {};
178 return;
179 }
180
181 unsigned long Val = 0;
182
183 do {
184 unsigned long Digit = Mangled[0] - '0';
185
186 // Check for overflow.
187 if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) {
188 Mangled = {};
189 return;
190 }
191
192 Val = Val * 10 + Digit;
193 Mangled.remove_prefix(n: 1);
194 } while (!Mangled.empty() && std::isdigit(Mangled.front()));
195
196 if (Mangled.empty()) {
197 Mangled = {};
198 return;
199 }
200
201 Ret = Val;
202}
203
204bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) {
205 // Return nullptr if trying to extract something that isn't a digit
206 if (Mangled.empty()) {
207 Mangled = {};
208 return false;
209 }
210 // Any identifier or non-basic type that has been emitted to the mangled
211 // symbol before will not be emitted again, but is referenced by a special
212 // sequence encoding the relative position of the original occurrence in the
213 // mangled symbol name.
214 // Numbers in back references are encoded with base 26 by upper case letters
215 // A-Z for higher digits but lower case letters a-z for the last digit.
216 // NumberBackRef:
217 // [a-z]
218 // [A-Z] NumberBackRef
219 // ^
220 unsigned long Val = 0;
221
222 while (!Mangled.empty() && std::isalpha(Mangled.front())) {
223 // Check for overflow
224 if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
225 break;
226
227 Val *= 26;
228
229 if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
230 Val += Mangled[0] - 'a';
231 if ((long)Val <= 0)
232 break;
233 Ret = Val;
234 Mangled.remove_prefix(n: 1);
235 return true;
236 }
237
238 Val += Mangled[0] - 'A';
239 Mangled.remove_prefix(n: 1);
240 }
241
242 Mangled = {};
243 return false;
244}
245
246bool Demangler::decodeBackref(std::string_view &Mangled,
247 std::string_view &Ret) {
248 assert(!Mangled.empty() && Mangled.front() == 'Q' &&
249 "Invalid back reference!");
250 Ret = {};
251
252 // Position of 'Q'
253 const char *Qpos = Mangled.data();
254 long RefPos;
255 Mangled.remove_prefix(n: 1);
256
257 if (!decodeBackrefPos(Mangled, Ret&: RefPos)) {
258 Mangled = {};
259 return false;
260 }
261
262 if (RefPos > Qpos - Str.data()) {
263 Mangled = {};
264 return false;
265 }
266
267 // Set the position of the back reference.
268 Ret = Qpos - RefPos;
269
270 return true;
271}
272
273void Demangler::parseSymbolBackref(OutputBuffer *Demangled,
274 std::string_view &Mangled) {
275 // An identifier back reference always points to a digit 0 to 9.
276 // IdentifierBackRef:
277 // Q NumberBackRef
278 // ^
279 unsigned long Len;
280
281 // Get position of the back reference
282 std::string_view Backref;
283 if (!decodeBackref(Mangled, Ret&: Backref)) {
284 Mangled = {};
285 return;
286 }
287
288 // Must point to a simple identifier
289 decodeNumber(Mangled&: Backref, Ret&: Len);
290 if (Backref.empty() || Backref.length() < Len) {
291 Mangled = {};
292 return;
293 }
294
295 parseLName(Demangled, Mangled&: Backref, Len);
296 if (Backref.empty())
297 Mangled = {};
298}
299
300void Demangler::parseTypeBackref(std::string_view &Mangled) {
301 // A type back reference always points to a letter.
302 // TypeBackRef:
303 // Q NumberBackRef
304 // ^
305
306 // If we appear to be moving backwards through the mangle string, then
307 // bail as this may be a recursive back reference.
308 if (Mangled.data() - Str.data() >= LastBackref) {
309 Mangled = {};
310 return;
311 }
312
313 int SaveRefPos = LastBackref;
314 LastBackref = Mangled.data() - Str.data();
315
316 // Get position of the back reference.
317 std::string_view Backref;
318 if (!decodeBackref(Mangled, Ret&: Backref)) {
319 Mangled = {};
320 return;
321 }
322
323 // Can't decode back reference.
324 if (Backref.empty()) {
325 Mangled = {};
326 return;
327 }
328
329 // TODO: Add support for function type back references.
330 if (!parseType(Mangled&: Backref))
331 Mangled = {};
332
333 LastBackref = SaveRefPos;
334
335 if (Backref.empty())
336 Mangled = {};
337}
338
339bool Demangler::isSymbolName(std::string_view Mangled) {
340 long Ret;
341 const char *Qref = Mangled.data();
342
343 if (std::isdigit(Mangled.front()))
344 return true;
345
346 // TODO: Handle template instances.
347
348 if (Mangled.front() != 'Q')
349 return false;
350
351 Mangled.remove_prefix(n: 1);
352 bool Valid = decodeBackrefPos(Mangled, Ret);
353 if (!Valid || Ret > Qref - Str.data())
354 return false;
355
356 return std::isdigit(Qref[-Ret]);
357}
358
359void Demangler::parseMangle(OutputBuffer *Demangled,
360 std::string_view &Mangled) {
361 // A D mangled symbol is comprised of both scope and type information.
362 // MangleName:
363 // _D QualifiedName Type
364 // _D QualifiedName Z
365 // ^
366 // The caller should have guaranteed that the start pointer is at the
367 // above location.
368 // Note that type is never a function type, but only the return type of
369 // a function or the type of a variable.
370 Mangled.remove_prefix(n: 2);
371
372 parseQualified(Demangled, Mangled);
373
374 if (Mangled.empty()) {
375 Mangled = {};
376 return;
377 }
378
379 // Artificial symbols end with 'Z' and have no type.
380 if (Mangled.front() == 'Z') {
381 Mangled.remove_prefix(n: 1);
382 } else if (!parseType(Mangled))
383 Mangled = {};
384}
385
386void Demangler::parseQualified(OutputBuffer *Demangled,
387 std::string_view &Mangled) {
388 // Qualified names are identifiers separated by their encoded length.
389 // Nested functions also encode their argument types without specifying
390 // what they return.
391 // QualifiedName:
392 // SymbolFunctionName
393 // SymbolFunctionName QualifiedName
394 // ^
395 // SymbolFunctionName:
396 // SymbolName
397 // SymbolName TypeFunctionNoReturn
398 // SymbolName M TypeFunctionNoReturn
399 // SymbolName M TypeModifiers TypeFunctionNoReturn
400 // The start pointer should be at the above location.
401
402 // Whether it has more than one symbol
403 size_t NotFirst = false;
404 do {
405 // Skip over anonymous symbols.
406 if (!Mangled.empty() && Mangled.front() == '0') {
407 do
408 Mangled.remove_prefix(n: 1);
409 while (!Mangled.empty() && Mangled.front() == '0');
410
411 continue;
412 }
413
414 if (NotFirst)
415 *Demangled << '.';
416 NotFirst = true;
417
418 parseIdentifier(Demangled, Mangled);
419 } while (!Mangled.empty() && isSymbolName(Mangled));
420}
421
422void Demangler::parseIdentifier(OutputBuffer *Demangled,
423 std::string_view &Mangled) {
424 if (Mangled.empty()) {
425 Mangled = {};
426 return;
427 }
428
429 if (Mangled.front() == 'Q')
430 return parseSymbolBackref(Demangled, Mangled);
431
432 // TODO: Parse lengthless template instances.
433
434 unsigned long Len;
435 decodeNumber(Mangled, Ret&: Len);
436
437 if (Mangled.empty()) {
438 Mangled = {};
439 return;
440 }
441 if (!Len || Mangled.length() < Len) {
442 Mangled = {};
443 return;
444 }
445
446 // TODO: Parse template instances with a length prefix.
447
448 // There can be multiple different declarations in the same function that
449 // have the same mangled name. To make the mangled names unique, a fake
450 // parent in the form `__Sddd' is added to the symbol.
451 if (Len >= 4 && starts_with(haystack: Mangled, needle: "__S")) {
452 const size_t SuffixLen = Mangled.length() - Len;
453 std::string_view P = Mangled.substr(pos: 3);
454 while (P.length() > SuffixLen && std::isdigit(P.front()))
455 P.remove_prefix(n: 1);
456 if (P.length() == SuffixLen) {
457 // Skip over the fake parent.
458 Mangled.remove_prefix(n: Len);
459 return parseIdentifier(Demangled, Mangled);
460 }
461
462 // Else demangle it as a plain identifier.
463 }
464
465 parseLName(Demangled, Mangled, Len);
466}
467
468bool Demangler::parseType(std::string_view &Mangled) {
469 if (Mangled.empty()) {
470 Mangled = {};
471 return false;
472 }
473
474 switch (Mangled.front()) {
475 // TODO: Parse type qualifiers.
476 // TODO: Parse function types.
477 // TODO: Parse compound types.
478 // TODO: Parse delegate types.
479 // TODO: Parse tuple types.
480
481 // Basic types.
482 case 'i':
483 Mangled.remove_prefix(n: 1);
484 // TODO: Add type name dumping
485 return true;
486
487 // TODO: Add support for the rest of the basic types.
488
489 // Back referenced type.
490 case 'Q': {
491 parseTypeBackref(Mangled);
492 return true;
493 }
494
495 default: // unhandled.
496 Mangled = {};
497 return false;
498 }
499}
500
501void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
502 unsigned long Len) {
503 switch (Len) {
504 case 6:
505 if (starts_with(haystack: Mangled, needle: "__initZ")) {
506 // The static initializer for a given symbol.
507 Demangled->prepend(R: "initializer for ");
508 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
509 Mangled.remove_prefix(n: Len);
510 return;
511 }
512 if (starts_with(haystack: Mangled, needle: "__vtblZ")) {
513 // The vtable symbol for a given class.
514 Demangled->prepend(R: "vtable for ");
515 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
516 Mangled.remove_prefix(n: Len);
517 return;
518 }
519 break;
520
521 case 7:
522 if (starts_with(haystack: Mangled, needle: "__ClassZ")) {
523 // The classinfo symbol for a given class.
524 Demangled->prepend(R: "ClassInfo for ");
525 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
526 Mangled.remove_prefix(n: Len);
527 return;
528 }
529 break;
530
531 case 11:
532 if (starts_with(haystack: Mangled, needle: "__InterfaceZ")) {
533 // The interface symbol for a given class.
534 Demangled->prepend(R: "Interface for ");
535 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
536 Mangled.remove_prefix(n: Len);
537 return;
538 }
539 break;
540
541 case 12:
542 if (starts_with(haystack: Mangled, needle: "__ModuleInfoZ")) {
543 // The ModuleInfo symbol for a given module.
544 Demangled->prepend(R: "ModuleInfo for ");
545 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
546 Mangled.remove_prefix(n: Len);
547 return;
548 }
549 break;
550 }
551
552 *Demangled << Mangled.substr(pos: 0, n: Len);
553 Mangled.remove_prefix(n: Len);
554}
555
556Demangler::Demangler(std::string_view Mangled)
557 : Str(Mangled), LastBackref(Mangled.length()) {}
558
559const char *Demangler::parseMangle(OutputBuffer *Demangled) {
560 std::string_view M(this->Str);
561 parseMangle(Demangled, Mangled&: M);
562 return M.data();
563}
564
565char *llvm::dlangDemangle(std::string_view MangledName) {
566 if (MangledName.empty() || !starts_with(haystack: MangledName, needle: "_D"))
567 return nullptr;
568
569 OutputBuffer Demangled;
570 if (MangledName == "_Dmain") {
571 Demangled << "D main";
572 } else {
573
574 Demangler D(MangledName);
575 const char *M = D.parseMangle(Demangled: &Demangled);
576
577 // Check that the entire symbol was successfully demangled.
578 if (M == nullptr || *M != '\0') {
579 std::free(ptr: Demangled.getBuffer());
580 return nullptr;
581 }
582 }
583
584 // OutputBuffer's internal buffer is not null terminated and therefore we need
585 // to add it to comply with C null terminated strings.
586 if (Demangled.getCurrentPosition() > 0) {
587 Demangled << '\0';
588 Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
589 return Demangled.getBuffer();
590 }
591
592 std::free(ptr: Demangled.getBuffer());
593 return nullptr;
594}
595