1 | //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a demangler for Rust v0 mangled symbols as specified in |
10 | // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Demangle/Demangle.h" |
15 | #include "llvm/Demangle/StringViewExtras.h" |
16 | #include "llvm/Demangle/Utility.h" |
17 | |
18 | #include <algorithm> |
19 | #include <cassert> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <limits> |
23 | #include <string_view> |
24 | |
25 | using namespace llvm; |
26 | |
27 | using llvm::itanium_demangle::OutputBuffer; |
28 | using llvm::itanium_demangle::ScopedOverride; |
29 | using llvm::itanium_demangle::starts_with; |
30 | |
31 | namespace { |
32 | |
33 | struct Identifier { |
34 | std::string_view Name; |
35 | bool Punycode; |
36 | |
37 | bool empty() const { return Name.empty(); } |
38 | }; |
39 | |
40 | enum class BasicType { |
41 | Bool, |
42 | Char, |
43 | I8, |
44 | I16, |
45 | I32, |
46 | I64, |
47 | I128, |
48 | ISize, |
49 | U8, |
50 | U16, |
51 | U32, |
52 | U64, |
53 | U128, |
54 | USize, |
55 | F32, |
56 | F64, |
57 | Str, |
58 | Placeholder, |
59 | Unit, |
60 | Variadic, |
61 | Never, |
62 | }; |
63 | |
64 | enum class IsInType { |
65 | No, |
66 | Yes, |
67 | }; |
68 | |
69 | enum class LeaveGenericsOpen { |
70 | No, |
71 | Yes, |
72 | }; |
73 | |
74 | class Demangler { |
75 | // Maximum recursion level. Used to avoid stack overflow. |
76 | size_t MaxRecursionLevel; |
77 | // Current recursion level. |
78 | size_t RecursionLevel; |
79 | size_t BoundLifetimes; |
80 | // Input string that is being demangled with "_R" prefix removed. |
81 | std::string_view Input; |
82 | // Position in the input string. |
83 | size_t Position; |
84 | // When true, print methods append the output to the stream. |
85 | // When false, the output is suppressed. |
86 | bool Print; |
87 | // True if an error occurred. |
88 | bool Error; |
89 | |
90 | public: |
91 | // Demangled output. |
92 | OutputBuffer Output; |
93 | |
94 | Demangler(size_t MaxRecursionLevel = 500); |
95 | |
96 | bool demangle(std::string_view MangledName); |
97 | |
98 | private: |
99 | bool demanglePath(IsInType Type, |
100 | LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); |
101 | void demangleImplPath(IsInType InType); |
102 | void demangleGenericArg(); |
103 | void demangleType(); |
104 | void demangleFnSig(); |
105 | void demangleDynBounds(); |
106 | void demangleDynTrait(); |
107 | void demangleOptionalBinder(); |
108 | void demangleConst(); |
109 | void demangleConstInt(); |
110 | void demangleConstBool(); |
111 | void demangleConstChar(); |
112 | |
113 | template <typename Callable> void demangleBackref(Callable Demangler) { |
114 | uint64_t Backref = parseBase62Number(); |
115 | if (Error || Backref >= Position) { |
116 | Error = true; |
117 | return; |
118 | } |
119 | |
120 | if (!Print) |
121 | return; |
122 | |
123 | ScopedOverride<size_t> SavePosition(Position, Position); |
124 | Position = Backref; |
125 | Demangler(); |
126 | } |
127 | |
128 | Identifier parseIdentifier(); |
129 | uint64_t parseOptionalBase62Number(char Tag); |
130 | uint64_t parseBase62Number(); |
131 | uint64_t parseDecimalNumber(); |
132 | uint64_t parseHexNumber(std::string_view &HexDigits); |
133 | |
134 | void print(char C); |
135 | void print(std::string_view S); |
136 | void printDecimalNumber(uint64_t N); |
137 | void printBasicType(BasicType); |
138 | void printLifetime(uint64_t Index); |
139 | void printIdentifier(Identifier Ident); |
140 | |
141 | char look() const; |
142 | char consume(); |
143 | bool consumeIf(char Prefix); |
144 | |
145 | bool addAssign(uint64_t &A, uint64_t B); |
146 | bool mulAssign(uint64_t &A, uint64_t B); |
147 | }; |
148 | |
149 | } // namespace |
150 | |
151 | char *llvm::rustDemangle(std::string_view MangledName) { |
152 | // Return early if mangled name doesn't look like a Rust symbol. |
153 | if (MangledName.empty() || !starts_with(haystack: MangledName, needle: "_R" )) |
154 | return nullptr; |
155 | |
156 | Demangler D; |
157 | if (!D.demangle(MangledName)) { |
158 | std::free(ptr: D.Output.getBuffer()); |
159 | return nullptr; |
160 | } |
161 | |
162 | D.Output += '\0'; |
163 | |
164 | return D.Output.getBuffer(); |
165 | } |
166 | |
167 | Demangler::Demangler(size_t MaxRecursionLevel) |
168 | : MaxRecursionLevel(MaxRecursionLevel) {} |
169 | |
170 | static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } |
171 | |
172 | static inline bool isHexDigit(const char C) { |
173 | return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); |
174 | } |
175 | |
176 | static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } |
177 | |
178 | static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } |
179 | |
180 | /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. |
181 | static inline bool isValid(const char C) { |
182 | return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; |
183 | } |
184 | |
185 | // Demangles Rust v0 mangled symbol. Returns true when successful, and false |
186 | // otherwise. The demangled symbol is stored in Output field. It is |
187 | // responsibility of the caller to free the memory behind the output stream. |
188 | // |
189 | // <symbol-name> = "_R" <path> [<instantiating-crate>] |
190 | bool Demangler::demangle(std::string_view Mangled) { |
191 | Position = 0; |
192 | Error = false; |
193 | Print = true; |
194 | RecursionLevel = 0; |
195 | BoundLifetimes = 0; |
196 | |
197 | if (!starts_with(haystack: Mangled, needle: "_R" )) { |
198 | Error = true; |
199 | return false; |
200 | } |
201 | Mangled.remove_prefix(n: 2); |
202 | size_t Dot = Mangled.find(c: '.'); |
203 | Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(pos: 0, n: Dot); |
204 | |
205 | demanglePath(Type: IsInType::No); |
206 | |
207 | if (Position != Input.size()) { |
208 | ScopedOverride<bool> SavePrint(Print, false); |
209 | demanglePath(Type: IsInType::No); |
210 | } |
211 | |
212 | if (Position != Input.size()) |
213 | Error = true; |
214 | |
215 | if (Dot != std::string_view::npos) { |
216 | print(S: " (" ); |
217 | print(S: Mangled.substr(pos: Dot)); |
218 | print(S: ")" ); |
219 | } |
220 | |
221 | return !Error; |
222 | } |
223 | |
224 | // Demangles a path. InType indicates whether a path is inside a type. When |
225 | // LeaveOpen is true, a closing `>` after generic arguments is omitted from the |
226 | // output. Return value indicates whether generics arguments have been left |
227 | // open. |
228 | // |
229 | // <path> = "C" <identifier> // crate root |
230 | // | "M" <impl-path> <type> // <T> (inherent impl) |
231 | // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) |
232 | // | "Y" <type> <path> // <T as Trait> (trait definition) |
233 | // | "N" <ns> <path> <identifier> // ...::ident (nested path) |
234 | // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) |
235 | // | <backref> |
236 | // <identifier> = [<disambiguator>] <undisambiguated-identifier> |
237 | // <ns> = "C" // closure |
238 | // | "S" // shim |
239 | // | <A-Z> // other special namespaces |
240 | // | <a-z> // internal namespaces |
241 | bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { |
242 | if (Error || RecursionLevel >= MaxRecursionLevel) { |
243 | Error = true; |
244 | return false; |
245 | } |
246 | ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
247 | |
248 | switch (consume()) { |
249 | case 'C': { |
250 | parseOptionalBase62Number(Tag: 's'); |
251 | printIdentifier(Ident: parseIdentifier()); |
252 | break; |
253 | } |
254 | case 'M': { |
255 | demangleImplPath(InType); |
256 | print(S: "<" ); |
257 | demangleType(); |
258 | print(S: ">" ); |
259 | break; |
260 | } |
261 | case 'X': { |
262 | demangleImplPath(InType); |
263 | print(S: "<" ); |
264 | demangleType(); |
265 | print(S: " as " ); |
266 | demanglePath(InType: IsInType::Yes); |
267 | print(S: ">" ); |
268 | break; |
269 | } |
270 | case 'Y': { |
271 | print(S: "<" ); |
272 | demangleType(); |
273 | print(S: " as " ); |
274 | demanglePath(InType: IsInType::Yes); |
275 | print(S: ">" ); |
276 | break; |
277 | } |
278 | case 'N': { |
279 | char NS = consume(); |
280 | if (!isLower(C: NS) && !isUpper(C: NS)) { |
281 | Error = true; |
282 | break; |
283 | } |
284 | demanglePath(InType); |
285 | |
286 | uint64_t Disambiguator = parseOptionalBase62Number(Tag: 's'); |
287 | Identifier Ident = parseIdentifier(); |
288 | |
289 | if (isUpper(C: NS)) { |
290 | // Special namespaces |
291 | print(S: "::{" ); |
292 | if (NS == 'C') |
293 | print(S: "closure" ); |
294 | else if (NS == 'S') |
295 | print(S: "shim" ); |
296 | else |
297 | print(C: NS); |
298 | if (!Ident.empty()) { |
299 | print(S: ":" ); |
300 | printIdentifier(Ident); |
301 | } |
302 | print(C: '#'); |
303 | printDecimalNumber(N: Disambiguator); |
304 | print(C: '}'); |
305 | } else { |
306 | // Implementation internal namespaces. |
307 | if (!Ident.empty()) { |
308 | print(S: "::" ); |
309 | printIdentifier(Ident); |
310 | } |
311 | } |
312 | break; |
313 | } |
314 | case 'I': { |
315 | demanglePath(InType); |
316 | // Omit "::" when in a type, where it is optional. |
317 | if (InType == IsInType::No) |
318 | print(S: "::" ); |
319 | print(S: "<" ); |
320 | for (size_t I = 0; !Error && !consumeIf(Prefix: 'E'); ++I) { |
321 | if (I > 0) |
322 | print(S: ", " ); |
323 | demangleGenericArg(); |
324 | } |
325 | if (LeaveOpen == LeaveGenericsOpen::Yes) |
326 | return true; |
327 | else |
328 | print(S: ">" ); |
329 | break; |
330 | } |
331 | case 'B': { |
332 | bool IsOpen = false; |
333 | demangleBackref(Demangler: [&] { IsOpen = demanglePath(InType, LeaveOpen); }); |
334 | return IsOpen; |
335 | } |
336 | default: |
337 | Error = true; |
338 | break; |
339 | } |
340 | |
341 | return false; |
342 | } |
343 | |
344 | // <impl-path> = [<disambiguator>] <path> |
345 | // <disambiguator> = "s" <base-62-number> |
346 | void Demangler::demangleImplPath(IsInType InType) { |
347 | ScopedOverride<bool> SavePrint(Print, false); |
348 | parseOptionalBase62Number(Tag: 's'); |
349 | demanglePath(InType); |
350 | } |
351 | |
352 | // <generic-arg> = <lifetime> |
353 | // | <type> |
354 | // | "K" <const> |
355 | // <lifetime> = "L" <base-62-number> |
356 | void Demangler::demangleGenericArg() { |
357 | if (consumeIf(Prefix: 'L')) |
358 | printLifetime(Index: parseBase62Number()); |
359 | else if (consumeIf(Prefix: 'K')) |
360 | demangleConst(); |
361 | else |
362 | demangleType(); |
363 | } |
364 | |
365 | // <basic-type> = "a" // i8 |
366 | // | "b" // bool |
367 | // | "c" // char |
368 | // | "d" // f64 |
369 | // | "e" // str |
370 | // | "f" // f32 |
371 | // | "h" // u8 |
372 | // | "i" // isize |
373 | // | "j" // usize |
374 | // | "l" // i32 |
375 | // | "m" // u32 |
376 | // | "n" // i128 |
377 | // | "o" // u128 |
378 | // | "s" // i16 |
379 | // | "t" // u16 |
380 | // | "u" // () |
381 | // | "v" // ... |
382 | // | "x" // i64 |
383 | // | "y" // u64 |
384 | // | "z" // ! |
385 | // | "p" // placeholder (e.g. for generic params), shown as _ |
386 | static bool parseBasicType(char C, BasicType &Type) { |
387 | switch (C) { |
388 | case 'a': |
389 | Type = BasicType::I8; |
390 | return true; |
391 | case 'b': |
392 | Type = BasicType::Bool; |
393 | return true; |
394 | case 'c': |
395 | Type = BasicType::Char; |
396 | return true; |
397 | case 'd': |
398 | Type = BasicType::F64; |
399 | return true; |
400 | case 'e': |
401 | Type = BasicType::Str; |
402 | return true; |
403 | case 'f': |
404 | Type = BasicType::F32; |
405 | return true; |
406 | case 'h': |
407 | Type = BasicType::U8; |
408 | return true; |
409 | case 'i': |
410 | Type = BasicType::ISize; |
411 | return true; |
412 | case 'j': |
413 | Type = BasicType::USize; |
414 | return true; |
415 | case 'l': |
416 | Type = BasicType::I32; |
417 | return true; |
418 | case 'm': |
419 | Type = BasicType::U32; |
420 | return true; |
421 | case 'n': |
422 | Type = BasicType::I128; |
423 | return true; |
424 | case 'o': |
425 | Type = BasicType::U128; |
426 | return true; |
427 | case 'p': |
428 | Type = BasicType::Placeholder; |
429 | return true; |
430 | case 's': |
431 | Type = BasicType::I16; |
432 | return true; |
433 | case 't': |
434 | Type = BasicType::U16; |
435 | return true; |
436 | case 'u': |
437 | Type = BasicType::Unit; |
438 | return true; |
439 | case 'v': |
440 | Type = BasicType::Variadic; |
441 | return true; |
442 | case 'x': |
443 | Type = BasicType::I64; |
444 | return true; |
445 | case 'y': |
446 | Type = BasicType::U64; |
447 | return true; |
448 | case 'z': |
449 | Type = BasicType::Never; |
450 | return true; |
451 | default: |
452 | return false; |
453 | } |
454 | } |
455 | |
456 | void Demangler::printBasicType(BasicType Type) { |
457 | switch (Type) { |
458 | case BasicType::Bool: |
459 | print(S: "bool" ); |
460 | break; |
461 | case BasicType::Char: |
462 | print(S: "char" ); |
463 | break; |
464 | case BasicType::I8: |
465 | print(S: "i8" ); |
466 | break; |
467 | case BasicType::I16: |
468 | print(S: "i16" ); |
469 | break; |
470 | case BasicType::I32: |
471 | print(S: "i32" ); |
472 | break; |
473 | case BasicType::I64: |
474 | print(S: "i64" ); |
475 | break; |
476 | case BasicType::I128: |
477 | print(S: "i128" ); |
478 | break; |
479 | case BasicType::ISize: |
480 | print(S: "isize" ); |
481 | break; |
482 | case BasicType::U8: |
483 | print(S: "u8" ); |
484 | break; |
485 | case BasicType::U16: |
486 | print(S: "u16" ); |
487 | break; |
488 | case BasicType::U32: |
489 | print(S: "u32" ); |
490 | break; |
491 | case BasicType::U64: |
492 | print(S: "u64" ); |
493 | break; |
494 | case BasicType::U128: |
495 | print(S: "u128" ); |
496 | break; |
497 | case BasicType::USize: |
498 | print(S: "usize" ); |
499 | break; |
500 | case BasicType::F32: |
501 | print(S: "f32" ); |
502 | break; |
503 | case BasicType::F64: |
504 | print(S: "f64" ); |
505 | break; |
506 | case BasicType::Str: |
507 | print(S: "str" ); |
508 | break; |
509 | case BasicType::Placeholder: |
510 | print(S: "_" ); |
511 | break; |
512 | case BasicType::Unit: |
513 | print(S: "()" ); |
514 | break; |
515 | case BasicType::Variadic: |
516 | print(S: "..." ); |
517 | break; |
518 | case BasicType::Never: |
519 | print(S: "!" ); |
520 | break; |
521 | } |
522 | } |
523 | |
524 | // <type> = | <basic-type> |
525 | // | <path> // named type |
526 | // | "A" <type> <const> // [T; N] |
527 | // | "S" <type> // [T] |
528 | // | "T" {<type>} "E" // (T1, T2, T3, ...) |
529 | // | "R" [<lifetime>] <type> // &T |
530 | // | "Q" [<lifetime>] <type> // &mut T |
531 | // | "P" <type> // *const T |
532 | // | "O" <type> // *mut T |
533 | // | "F" <fn-sig> // fn(...) -> ... |
534 | // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a |
535 | // | <backref> // backref |
536 | void Demangler::demangleType() { |
537 | if (Error || RecursionLevel >= MaxRecursionLevel) { |
538 | Error = true; |
539 | return; |
540 | } |
541 | ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
542 | |
543 | size_t Start = Position; |
544 | char C = consume(); |
545 | BasicType Type; |
546 | if (parseBasicType(C, Type)) |
547 | return printBasicType(Type); |
548 | |
549 | switch (C) { |
550 | case 'A': |
551 | print(S: "[" ); |
552 | demangleType(); |
553 | print(S: "; " ); |
554 | demangleConst(); |
555 | print(S: "]" ); |
556 | break; |
557 | case 'S': |
558 | print(S: "[" ); |
559 | demangleType(); |
560 | print(S: "]" ); |
561 | break; |
562 | case 'T': { |
563 | print(S: "(" ); |
564 | size_t I = 0; |
565 | for (; !Error && !consumeIf(Prefix: 'E'); ++I) { |
566 | if (I > 0) |
567 | print(S: ", " ); |
568 | demangleType(); |
569 | } |
570 | if (I == 1) |
571 | print(S: "," ); |
572 | print(S: ")" ); |
573 | break; |
574 | } |
575 | case 'R': |
576 | case 'Q': |
577 | print(C: '&'); |
578 | if (consumeIf(Prefix: 'L')) { |
579 | if (auto Lifetime = parseBase62Number()) { |
580 | printLifetime(Index: Lifetime); |
581 | print(C: ' '); |
582 | } |
583 | } |
584 | if (C == 'Q') |
585 | print(S: "mut " ); |
586 | demangleType(); |
587 | break; |
588 | case 'P': |
589 | print(S: "*const " ); |
590 | demangleType(); |
591 | break; |
592 | case 'O': |
593 | print(S: "*mut " ); |
594 | demangleType(); |
595 | break; |
596 | case 'F': |
597 | demangleFnSig(); |
598 | break; |
599 | case 'D': |
600 | demangleDynBounds(); |
601 | if (consumeIf(Prefix: 'L')) { |
602 | if (auto Lifetime = parseBase62Number()) { |
603 | print(S: " + " ); |
604 | printLifetime(Index: Lifetime); |
605 | } |
606 | } else { |
607 | Error = true; |
608 | } |
609 | break; |
610 | case 'B': |
611 | demangleBackref(Demangler: [&] { demangleType(); }); |
612 | break; |
613 | default: |
614 | Position = Start; |
615 | demanglePath(InType: IsInType::Yes); |
616 | break; |
617 | } |
618 | } |
619 | |
620 | // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> |
621 | // <abi> = "C" |
622 | // | <undisambiguated-identifier> |
623 | void Demangler::demangleFnSig() { |
624 | ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); |
625 | demangleOptionalBinder(); |
626 | |
627 | if (consumeIf(Prefix: 'U')) |
628 | print(S: "unsafe " ); |
629 | |
630 | if (consumeIf(Prefix: 'K')) { |
631 | print(S: "extern \"" ); |
632 | if (consumeIf(Prefix: 'C')) { |
633 | print(S: "C" ); |
634 | } else { |
635 | Identifier Ident = parseIdentifier(); |
636 | if (Ident.Punycode) |
637 | Error = true; |
638 | for (char C : Ident.Name) { |
639 | // When mangling ABI string, the "-" is replaced with "_". |
640 | if (C == '_') |
641 | C = '-'; |
642 | print(C); |
643 | } |
644 | } |
645 | print(S: "\" " ); |
646 | } |
647 | |
648 | print(S: "fn(" ); |
649 | for (size_t I = 0; !Error && !consumeIf(Prefix: 'E'); ++I) { |
650 | if (I > 0) |
651 | print(S: ", " ); |
652 | demangleType(); |
653 | } |
654 | print(S: ")" ); |
655 | |
656 | if (consumeIf(Prefix: 'u')) { |
657 | // Skip the unit type from the output. |
658 | } else { |
659 | print(S: " -> " ); |
660 | demangleType(); |
661 | } |
662 | } |
663 | |
664 | // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" |
665 | void Demangler::demangleDynBounds() { |
666 | ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); |
667 | print(S: "dyn " ); |
668 | demangleOptionalBinder(); |
669 | for (size_t I = 0; !Error && !consumeIf(Prefix: 'E'); ++I) { |
670 | if (I > 0) |
671 | print(S: " + " ); |
672 | demangleDynTrait(); |
673 | } |
674 | } |
675 | |
676 | // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} |
677 | // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> |
678 | void Demangler::demangleDynTrait() { |
679 | bool IsOpen = demanglePath(InType: IsInType::Yes, LeaveOpen: LeaveGenericsOpen::Yes); |
680 | while (!Error && consumeIf(Prefix: 'p')) { |
681 | if (!IsOpen) { |
682 | IsOpen = true; |
683 | print(C: '<'); |
684 | } else { |
685 | print(S: ", " ); |
686 | } |
687 | print(S: parseIdentifier().Name); |
688 | print(S: " = " ); |
689 | demangleType(); |
690 | } |
691 | if (IsOpen) |
692 | print(S: ">" ); |
693 | } |
694 | |
695 | // Demangles optional binder and updates the number of bound lifetimes. |
696 | // |
697 | // <binder> = "G" <base-62-number> |
698 | void Demangler::demangleOptionalBinder() { |
699 | uint64_t Binder = parseOptionalBase62Number(Tag: 'G'); |
700 | if (Error || Binder == 0) |
701 | return; |
702 | |
703 | // In valid inputs each bound lifetime is referenced later. Referencing a |
704 | // lifetime requires at least one byte of input. Reject inputs that are too |
705 | // short to reference all bound lifetimes. Otherwise demangling of invalid |
706 | // binders could generate excessive amounts of output. |
707 | if (Binder >= Input.size() - BoundLifetimes) { |
708 | Error = true; |
709 | return; |
710 | } |
711 | |
712 | print(S: "for<" ); |
713 | for (size_t I = 0; I != Binder; ++I) { |
714 | BoundLifetimes += 1; |
715 | if (I > 0) |
716 | print(S: ", " ); |
717 | printLifetime(Index: 1); |
718 | } |
719 | print(S: "> " ); |
720 | } |
721 | |
722 | // <const> = <basic-type> <const-data> |
723 | // | "p" // placeholder |
724 | // | <backref> |
725 | void Demangler::demangleConst() { |
726 | if (Error || RecursionLevel >= MaxRecursionLevel) { |
727 | Error = true; |
728 | return; |
729 | } |
730 | ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
731 | |
732 | char C = consume(); |
733 | BasicType Type; |
734 | if (parseBasicType(C, Type)) { |
735 | switch (Type) { |
736 | case BasicType::I8: |
737 | case BasicType::I16: |
738 | case BasicType::I32: |
739 | case BasicType::I64: |
740 | case BasicType::I128: |
741 | case BasicType::ISize: |
742 | case BasicType::U8: |
743 | case BasicType::U16: |
744 | case BasicType::U32: |
745 | case BasicType::U64: |
746 | case BasicType::U128: |
747 | case BasicType::USize: |
748 | demangleConstInt(); |
749 | break; |
750 | case BasicType::Bool: |
751 | demangleConstBool(); |
752 | break; |
753 | case BasicType::Char: |
754 | demangleConstChar(); |
755 | break; |
756 | case BasicType::Placeholder: |
757 | print(C: '_'); |
758 | break; |
759 | default: |
760 | Error = true; |
761 | break; |
762 | } |
763 | } else if (C == 'B') { |
764 | demangleBackref(Demangler: [&] { demangleConst(); }); |
765 | } else { |
766 | Error = true; |
767 | } |
768 | } |
769 | |
770 | // <const-data> = ["n"] <hex-number> |
771 | void Demangler::demangleConstInt() { |
772 | if (consumeIf(Prefix: 'n')) |
773 | print(C: '-'); |
774 | |
775 | std::string_view HexDigits; |
776 | uint64_t Value = parseHexNumber(HexDigits); |
777 | if (HexDigits.size() <= 16) { |
778 | printDecimalNumber(N: Value); |
779 | } else { |
780 | print(S: "0x" ); |
781 | print(S: HexDigits); |
782 | } |
783 | } |
784 | |
785 | // <const-data> = "0_" // false |
786 | // | "1_" // true |
787 | void Demangler::demangleConstBool() { |
788 | std::string_view HexDigits; |
789 | parseHexNumber(HexDigits); |
790 | if (HexDigits == "0" ) |
791 | print(S: "false" ); |
792 | else if (HexDigits == "1" ) |
793 | print(S: "true" ); |
794 | else |
795 | Error = true; |
796 | } |
797 | |
798 | /// Returns true if CodePoint represents a printable ASCII character. |
799 | static bool isAsciiPrintable(uint64_t CodePoint) { |
800 | return 0x20 <= CodePoint && CodePoint <= 0x7e; |
801 | } |
802 | |
803 | // <const-data> = <hex-number> |
804 | void Demangler::demangleConstChar() { |
805 | std::string_view HexDigits; |
806 | uint64_t CodePoint = parseHexNumber(HexDigits); |
807 | if (Error || HexDigits.size() > 6) { |
808 | Error = true; |
809 | return; |
810 | } |
811 | |
812 | print(S: "'" ); |
813 | switch (CodePoint) { |
814 | case '\t': |
815 | print(S: R"(\t)" ); |
816 | break; |
817 | case '\r': |
818 | print(S: R"(\r)" ); |
819 | break; |
820 | case '\n': |
821 | print(S: R"(\n)" ); |
822 | break; |
823 | case '\\': |
824 | print(S: R"(\\)" ); |
825 | break; |
826 | case '"': |
827 | print(S: R"(")" ); |
828 | break; |
829 | case '\'': |
830 | print(S: R"(\')" ); |
831 | break; |
832 | default: |
833 | if (isAsciiPrintable(CodePoint)) { |
834 | char C = CodePoint; |
835 | print(C); |
836 | } else { |
837 | print(S: R"(\u{)" ); |
838 | print(S: HexDigits); |
839 | print(C: '}'); |
840 | } |
841 | break; |
842 | } |
843 | print(C: '\''); |
844 | } |
845 | |
846 | // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> |
847 | Identifier Demangler::parseIdentifier() { |
848 | bool Punycode = consumeIf(Prefix: 'u'); |
849 | uint64_t Bytes = parseDecimalNumber(); |
850 | |
851 | // Underscore resolves the ambiguity when identifier starts with a decimal |
852 | // digit or another underscore. |
853 | consumeIf(Prefix: '_'); |
854 | |
855 | if (Error || Bytes > Input.size() - Position) { |
856 | Error = true; |
857 | return {}; |
858 | } |
859 | std::string_view S = Input.substr(pos: Position, n: Bytes); |
860 | Position += Bytes; |
861 | |
862 | if (!std::all_of(first: S.begin(), last: S.end(), pred: isValid)) { |
863 | Error = true; |
864 | return {}; |
865 | } |
866 | |
867 | return {.Name: S, .Punycode: Punycode}; |
868 | } |
869 | |
870 | // Parses optional base 62 number. The presence of a number is determined using |
871 | // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise |
872 | // |
873 | // This function is indended for parsing disambiguators and binders which when |
874 | // not present have their value interpreted as 0, and otherwise as decoded |
875 | // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is |
876 | // 2. When "G" is absent value is 0. |
877 | uint64_t Demangler::parseOptionalBase62Number(char Tag) { |
878 | if (!consumeIf(Prefix: Tag)) |
879 | return 0; |
880 | |
881 | uint64_t N = parseBase62Number(); |
882 | if (Error || !addAssign(A&: N, B: 1)) |
883 | return 0; |
884 | |
885 | return N; |
886 | } |
887 | |
888 | // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by |
889 | // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, |
890 | // "1_" encodes 2, etc. |
891 | // |
892 | // <base-62-number> = {<0-9a-zA-Z>} "_" |
893 | uint64_t Demangler::parseBase62Number() { |
894 | if (consumeIf(Prefix: '_')) |
895 | return 0; |
896 | |
897 | uint64_t Value = 0; |
898 | |
899 | while (true) { |
900 | uint64_t Digit; |
901 | char C = consume(); |
902 | |
903 | if (C == '_') { |
904 | break; |
905 | } else if (isDigit(C)) { |
906 | Digit = C - '0'; |
907 | } else if (isLower(C)) { |
908 | Digit = 10 + (C - 'a'); |
909 | } else if (isUpper(C)) { |
910 | Digit = 10 + 26 + (C - 'A'); |
911 | } else { |
912 | Error = true; |
913 | return 0; |
914 | } |
915 | |
916 | if (!mulAssign(A&: Value, B: 62)) |
917 | return 0; |
918 | |
919 | if (!addAssign(A&: Value, B: Digit)) |
920 | return 0; |
921 | } |
922 | |
923 | if (!addAssign(A&: Value, B: 1)) |
924 | return 0; |
925 | |
926 | return Value; |
927 | } |
928 | |
929 | // Parses a decimal number that had been encoded without any leading zeros. |
930 | // |
931 | // <decimal-number> = "0" |
932 | // | <1-9> {<0-9>} |
933 | uint64_t Demangler::parseDecimalNumber() { |
934 | char C = look(); |
935 | if (!isDigit(C)) { |
936 | Error = true; |
937 | return 0; |
938 | } |
939 | |
940 | if (C == '0') { |
941 | consume(); |
942 | return 0; |
943 | } |
944 | |
945 | uint64_t Value = 0; |
946 | |
947 | while (isDigit(C: look())) { |
948 | if (!mulAssign(A&: Value, B: 10)) { |
949 | Error = true; |
950 | return 0; |
951 | } |
952 | |
953 | uint64_t D = consume() - '0'; |
954 | if (!addAssign(A&: Value, B: D)) |
955 | return 0; |
956 | } |
957 | |
958 | return Value; |
959 | } |
960 | |
961 | // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed |
962 | // value and stores hex digits in HexDigits. The return value is unspecified if |
963 | // HexDigits.size() > 16. |
964 | // |
965 | // <hex-number> = "0_" |
966 | // | <1-9a-f> {<0-9a-f>} "_" |
967 | uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) { |
968 | size_t Start = Position; |
969 | uint64_t Value = 0; |
970 | |
971 | if (!isHexDigit(C: look())) |
972 | Error = true; |
973 | |
974 | if (consumeIf(Prefix: '0')) { |
975 | if (!consumeIf(Prefix: '_')) |
976 | Error = true; |
977 | } else { |
978 | while (!Error && !consumeIf(Prefix: '_')) { |
979 | char C = consume(); |
980 | Value *= 16; |
981 | if (isDigit(C)) |
982 | Value += C - '0'; |
983 | else if ('a' <= C && C <= 'f') |
984 | Value += 10 + (C - 'a'); |
985 | else |
986 | Error = true; |
987 | } |
988 | } |
989 | |
990 | if (Error) { |
991 | HexDigits = std::string_view(); |
992 | return 0; |
993 | } |
994 | |
995 | size_t End = Position - 1; |
996 | assert(Start < End); |
997 | HexDigits = Input.substr(pos: Start, n: End - Start); |
998 | return Value; |
999 | } |
1000 | |
1001 | void Demangler::print(char C) { |
1002 | if (Error || !Print) |
1003 | return; |
1004 | |
1005 | Output += C; |
1006 | } |
1007 | |
1008 | void Demangler::print(std::string_view S) { |
1009 | if (Error || !Print) |
1010 | return; |
1011 | |
1012 | Output += S; |
1013 | } |
1014 | |
1015 | void Demangler::printDecimalNumber(uint64_t N) { |
1016 | if (Error || !Print) |
1017 | return; |
1018 | |
1019 | Output << N; |
1020 | } |
1021 | |
1022 | // Prints a lifetime. An index 0 always represents an erased lifetime. Indices |
1023 | // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes |
1024 | // bound by one of the enclosing binders. |
1025 | void Demangler::printLifetime(uint64_t Index) { |
1026 | if (Index == 0) { |
1027 | print(S: "'_" ); |
1028 | return; |
1029 | } |
1030 | |
1031 | if (Index - 1 >= BoundLifetimes) { |
1032 | Error = true; |
1033 | return; |
1034 | } |
1035 | |
1036 | uint64_t Depth = BoundLifetimes - Index; |
1037 | print(C: '\''); |
1038 | if (Depth < 26) { |
1039 | char C = 'a' + Depth; |
1040 | print(C); |
1041 | } else { |
1042 | print(C: 'z'); |
1043 | printDecimalNumber(N: Depth - 26 + 1); |
1044 | } |
1045 | } |
1046 | |
1047 | static inline bool decodePunycodeDigit(char C, size_t &Value) { |
1048 | if (isLower(C)) { |
1049 | Value = C - 'a'; |
1050 | return true; |
1051 | } |
1052 | |
1053 | if (isDigit(C)) { |
1054 | Value = 26 + (C - '0'); |
1055 | return true; |
1056 | } |
1057 | |
1058 | return false; |
1059 | } |
1060 | |
1061 | static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { |
1062 | char *Buffer = Output.getBuffer(); |
1063 | char *Start = Buffer + StartIdx; |
1064 | char *End = Buffer + Output.getCurrentPosition(); |
1065 | Output.setCurrentPosition(std::remove(first: Start, last: End, value: '\0') - Buffer); |
1066 | } |
1067 | |
1068 | // Encodes code point as UTF-8 and stores results in Output. Returns false if |
1069 | // CodePoint is not a valid unicode scalar value. |
1070 | static inline bool encodeUTF8(size_t CodePoint, char *Output) { |
1071 | if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) |
1072 | return false; |
1073 | |
1074 | if (CodePoint <= 0x7F) { |
1075 | Output[0] = CodePoint; |
1076 | return true; |
1077 | } |
1078 | |
1079 | if (CodePoint <= 0x7FF) { |
1080 | Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); |
1081 | Output[1] = 0x80 | (CodePoint & 0x3F); |
1082 | return true; |
1083 | } |
1084 | |
1085 | if (CodePoint <= 0xFFFF) { |
1086 | Output[0] = 0xE0 | (CodePoint >> 12); |
1087 | Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); |
1088 | Output[2] = 0x80 | (CodePoint & 0x3F); |
1089 | return true; |
1090 | } |
1091 | |
1092 | if (CodePoint <= 0x10FFFF) { |
1093 | Output[0] = 0xF0 | (CodePoint >> 18); |
1094 | Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); |
1095 | Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); |
1096 | Output[3] = 0x80 | (CodePoint & 0x3F); |
1097 | return true; |
1098 | } |
1099 | |
1100 | return false; |
1101 | } |
1102 | |
1103 | // Decodes string encoded using punycode and appends results to Output. |
1104 | // Returns true if decoding was successful. |
1105 | static bool decodePunycode(std::string_view Input, OutputBuffer &Output) { |
1106 | size_t OutputSize = Output.getCurrentPosition(); |
1107 | size_t InputIdx = 0; |
1108 | |
1109 | // Rust uses an underscore as a delimiter. |
1110 | size_t DelimiterPos = std::string_view::npos; |
1111 | for (size_t I = 0; I != Input.size(); ++I) |
1112 | if (Input[I] == '_') |
1113 | DelimiterPos = I; |
1114 | |
1115 | if (DelimiterPos != std::string_view::npos) { |
1116 | // Copy basic code points before the last delimiter to the output. |
1117 | for (; InputIdx != DelimiterPos; ++InputIdx) { |
1118 | char C = Input[InputIdx]; |
1119 | if (!isValid(C)) |
1120 | return false; |
1121 | // Code points are padded with zeros while decoding is in progress. |
1122 | char UTF8[4] = {C}; |
1123 | Output += std::string_view(UTF8, 4); |
1124 | } |
1125 | // Skip over the delimiter. |
1126 | ++InputIdx; |
1127 | } |
1128 | |
1129 | size_t Base = 36; |
1130 | size_t Skew = 38; |
1131 | size_t Bias = 72; |
1132 | size_t N = 0x80; |
1133 | size_t TMin = 1; |
1134 | size_t TMax = 26; |
1135 | size_t Damp = 700; |
1136 | |
1137 | auto Adapt = [&](size_t Delta, size_t NumPoints) { |
1138 | Delta /= Damp; |
1139 | Delta += Delta / NumPoints; |
1140 | Damp = 2; |
1141 | |
1142 | size_t K = 0; |
1143 | while (Delta > (Base - TMin) * TMax / 2) { |
1144 | Delta /= Base - TMin; |
1145 | K += Base; |
1146 | } |
1147 | return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); |
1148 | }; |
1149 | |
1150 | // Main decoding loop. |
1151 | for (size_t I = 0; InputIdx != Input.size(); I += 1) { |
1152 | size_t OldI = I; |
1153 | size_t W = 1; |
1154 | size_t Max = std::numeric_limits<size_t>::max(); |
1155 | for (size_t K = Base; true; K += Base) { |
1156 | if (InputIdx == Input.size()) |
1157 | return false; |
1158 | char C = Input[InputIdx++]; |
1159 | size_t Digit = 0; |
1160 | if (!decodePunycodeDigit(C, Value&: Digit)) |
1161 | return false; |
1162 | |
1163 | if (Digit > (Max - I) / W) |
1164 | return false; |
1165 | I += Digit * W; |
1166 | |
1167 | size_t T; |
1168 | if (K <= Bias) |
1169 | T = TMin; |
1170 | else if (K >= Bias + TMax) |
1171 | T = TMax; |
1172 | else |
1173 | T = K - Bias; |
1174 | |
1175 | if (Digit < T) |
1176 | break; |
1177 | |
1178 | if (W > Max / (Base - T)) |
1179 | return false; |
1180 | W *= (Base - T); |
1181 | } |
1182 | size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; |
1183 | Bias = Adapt(I - OldI, NumPoints); |
1184 | |
1185 | if (I / NumPoints > Max - N) |
1186 | return false; |
1187 | N += I / NumPoints; |
1188 | I = I % NumPoints; |
1189 | |
1190 | // Insert N at position I in the output. |
1191 | char UTF8[4] = {}; |
1192 | if (!encodeUTF8(CodePoint: N, Output: UTF8)) |
1193 | return false; |
1194 | Output.insert(Pos: OutputSize + I * 4, S: UTF8, N: 4); |
1195 | } |
1196 | |
1197 | removeNullBytes(Output, StartIdx: OutputSize); |
1198 | return true; |
1199 | } |
1200 | |
1201 | void Demangler::printIdentifier(Identifier Ident) { |
1202 | if (Error || !Print) |
1203 | return; |
1204 | |
1205 | if (Ident.Punycode) { |
1206 | if (!decodePunycode(Input: Ident.Name, Output)) |
1207 | Error = true; |
1208 | } else { |
1209 | print(S: Ident.Name); |
1210 | } |
1211 | } |
1212 | |
1213 | char Demangler::look() const { |
1214 | if (Error || Position >= Input.size()) |
1215 | return 0; |
1216 | |
1217 | return Input[Position]; |
1218 | } |
1219 | |
1220 | char Demangler::consume() { |
1221 | if (Error || Position >= Input.size()) { |
1222 | Error = true; |
1223 | return 0; |
1224 | } |
1225 | |
1226 | return Input[Position++]; |
1227 | } |
1228 | |
1229 | bool Demangler::consumeIf(char Prefix) { |
1230 | if (Error || Position >= Input.size() || Input[Position] != Prefix) |
1231 | return false; |
1232 | |
1233 | Position += 1; |
1234 | return true; |
1235 | } |
1236 | |
1237 | /// Computes A + B. When computation wraps around sets the error and returns |
1238 | /// false. Otherwise assigns the result to A and returns true. |
1239 | bool Demangler::addAssign(uint64_t &A, uint64_t B) { |
1240 | if (A > std::numeric_limits<uint64_t>::max() - B) { |
1241 | Error = true; |
1242 | return false; |
1243 | } |
1244 | |
1245 | A += B; |
1246 | return true; |
1247 | } |
1248 | |
1249 | /// Computes A * B. When computation wraps around sets the error and returns |
1250 | /// false. Otherwise assigns the result to A and returns true. |
1251 | bool Demangler::mulAssign(uint64_t &A, uint64_t B) { |
1252 | if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { |
1253 | Error = true; |
1254 | return false; |
1255 | } |
1256 | |
1257 | A *= B; |
1258 | return true; |
1259 | } |
1260 | |