1// Copyright 2021 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package demangle 6 7import ( 8 "fmt" 9 "math" 10 "math/bits" 11 "strings" 12 "unicode/utf8" 13) 14 15// rustToString demangles a Rust symbol. 16func rustToString(name string, options []Option) (ret string, err error) { 17 if !strings.HasPrefix(name, "_R") { 18 return "", ErrNotMangledName 19 } 20 21 // When the demangling routines encounter an error, they panic 22 // with a value of type demangleErr. 23 defer func() { 24 if r := recover(); r != nil { 25 if de, ok := r.(demangleErr); ok { 26 ret = "" 27 err = de 28 return 29 } 30 panic(r) 31 } 32 }() 33 34 suffix := "" 35 dot := strings.Index(name, ".") 36 if dot >= 0 { 37 suffix = name[dot:] 38 name = name[:dot] 39 } 40 41 name = name[2:] 42 rst := &rustState{orig: name, str: name} 43 44 for _, o := range options { 45 if o == NoTemplateParams { 46 rst.noGenericArgs = true 47 } else if isMaxLength(o) { 48 rst.max = maxLength(o) 49 } 50 } 51 52 rst.symbolName() 53 54 if len(rst.str) > 0 { 55 rst.fail("unparsed characters at end of mangled name") 56 } 57 58 if suffix != "" { 59 llvmStyle := false 60 for _, o := range options { 61 if o == LLVMStyle { 62 llvmStyle = true 63 break 64 } 65 } 66 if llvmStyle { 67 rst.skip = false 68 rst.writeString(" (") 69 rst.writeString(suffix) 70 rst.writeByte(')') 71 } 72 } 73 74 s := rst.buf.String() 75 if rst.max > 0 && len(s) > rst.max { 76 s = s[:rst.max] 77 } 78 return s, nil 79} 80 81// A rustState holds the current state of demangling a Rust string. 82type rustState struct { 83 orig string // the original string being demangled 84 str string // remainder of string to demangle 85 off int // offset of str within original string 86 buf strings.Builder // demangled string being built 87 skip bool // don't print, just skip 88 lifetimes int64 // number of bound lifetimes 89 last byte // last byte written to buffer 90 noGenericArgs bool // don't demangle generic arguments 91 max int // maximum output length 92} 93 94// fail panics with demangleErr, to be caught in rustToString. 95func (rst *rustState) fail(err string) { 96 panic(demangleErr{err: err, off: rst.off}) 97} 98 99// advance advances the current string offset. 100func (rst *rustState) advance(add int) { 101 if len(rst.str) < add { 102 panic("internal error") 103 } 104 rst.str = rst.str[add:] 105 rst.off += add 106} 107 108// checkChar requires that the next character in the string be c, 109// and advances past it. 110func (rst *rustState) checkChar(c byte) { 111 if len(rst.str) == 0 || rst.str[0] != c { 112 rst.fail("expected " + string(c)) 113 } 114 rst.advance(1) 115} 116 117// writeByte writes a byte to the buffer. 118func (rst *rustState) writeByte(c byte) { 119 if rst.skip { 120 return 121 } 122 if rst.max > 0 && rst.buf.Len() > rst.max { 123 rst.skip = true 124 return 125 } 126 rst.last = c 127 rst.buf.WriteByte(c) 128} 129 130// writeString writes a string to the buffer. 131func (rst *rustState) writeString(s string) { 132 if rst.skip { 133 return 134 } 135 if rst.max > 0 && rst.buf.Len() > rst.max { 136 rst.skip = true 137 return 138 } 139 if len(s) > 0 { 140 rst.last = s[len(s)-1] 141 rst.buf.WriteString(s) 142 } 143} 144 145// symbolName parses: 146// 147// <symbol-name> = "_R" [<decimal-number>] <path> [<instantiating-crate>] 148// <instantiating-crate> = <path> 149// 150// We've already skipped the "_R". 151func (rst *rustState) symbolName() { 152 if len(rst.str) < 1 { 153 rst.fail("expected symbol-name") 154 } 155 156 if isDigit(rst.str[0]) { 157 rst.fail("unsupported Rust encoding version") 158 } 159 160 rst.path(true) 161 162 if len(rst.str) > 0 { 163 rst.skip = true 164 rst.path(false) 165 } 166} 167 168// path parses: 169// 170// <path> = "C" <identifier> // crate root 171// | "M" <impl-path> <type> // <T> (inherent impl) 172// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 173// | "Y" <type> <path> // <T as Trait> (trait definition) 174// | "N" <namespace> <path> <identifier> // ...::ident (nested path) 175// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 176// | <backref> 177// <namespace> = "C" // closure 178// | "S" // shim 179// | <A-Z> // other special namespaces 180// | <a-z> // internal namespaces 181// 182// needsSeparator is true if we need to write out :: for a generic; 183// it is passed as false if we are in the middle of a type. 184func (rst *rustState) path(needsSeparator bool) { 185 if len(rst.str) < 1 { 186 rst.fail("expected path") 187 } 188 switch c := rst.str[0]; c { 189 case 'C': 190 rst.advance(1) 191 _, ident := rst.identifier() 192 rst.writeString(ident) 193 case 'M', 'X': 194 rst.advance(1) 195 rst.implPath() 196 rst.writeByte('<') 197 rst.demangleType() 198 if c == 'X' { 199 rst.writeString(" as ") 200 rst.path(false) 201 } 202 rst.writeByte('>') 203 case 'Y': 204 rst.advance(1) 205 rst.writeByte('<') 206 rst.demangleType() 207 rst.writeString(" as ") 208 rst.path(false) 209 rst.writeByte('>') 210 case 'N': 211 rst.advance(1) 212 213 if len(rst.str) < 1 { 214 rst.fail("expected namespace") 215 } 216 ns := rst.str[0] 217 switch { 218 case ns >= 'a' && ns <= 'z': 219 case ns >= 'A' && ns <= 'Z': 220 default: 221 rst.fail("invalid namespace character") 222 } 223 rst.advance(1) 224 225 rst.path(needsSeparator) 226 227 dis, ident := rst.identifier() 228 229 if ns >= 'A' && ns <= 'Z' { 230 rst.writeString("::{") 231 switch ns { 232 case 'C': 233 rst.writeString("closure") 234 case 'S': 235 rst.writeString("shim") 236 default: 237 rst.writeByte(ns) 238 } 239 if len(ident) > 0 { 240 rst.writeByte(':') 241 rst.writeString(ident) 242 } 243 if !rst.skip { 244 fmt.Fprintf(&rst.buf, "#%d}", dis) 245 rst.last = '}' 246 } 247 } else { 248 rst.writeString("::") 249 rst.writeString(ident) 250 } 251 case 'I': 252 rst.advance(1) 253 rst.path(needsSeparator) 254 if needsSeparator { 255 rst.writeString("::") 256 } 257 rst.writeByte('<') 258 rst.genericArgs() 259 rst.writeByte('>') 260 rst.checkChar('E') 261 case 'B': 262 rst.backref(func() { rst.path(needsSeparator) }) 263 default: 264 rst.fail("unrecognized letter in path") 265 } 266} 267 268// implPath parses: 269// 270// <impl-path> = [<disambiguator>] <path> 271func (rst *rustState) implPath() { 272 // This path is not part of the demangled string. 273 hold := rst.skip 274 rst.skip = true 275 defer func() { 276 rst.skip = hold 277 }() 278 279 rst.disambiguator() 280 rst.path(false) 281} 282 283// identifier parses: 284// 285// <identifier> = [<disambiguator>] <undisambiguated-identifier> 286// 287// It returns the disambiguator and the identifier. 288func (rst *rustState) identifier() (int64, string) { 289 dis := rst.disambiguator() 290 ident, _ := rst.undisambiguatedIdentifier() 291 return dis, ident 292} 293 294// disambiguator parses an optional: 295// 296// <disambiguator> = "s" <base-62-number> 297func (rst *rustState) disambiguator() int64 { 298 if len(rst.str) == 0 || rst.str[0] != 's' { 299 return 0 300 } 301 rst.advance(1) 302 return rst.base62Number() + 1 303} 304 305// undisambiguatedIdentifier parses: 306// 307// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 308func (rst *rustState) undisambiguatedIdentifier() (id string, isPunycode bool) { 309 isPunycode = false 310 if len(rst.str) > 0 && rst.str[0] == 'u' { 311 rst.advance(1) 312 isPunycode = true 313 } 314 315 val := rst.decimalNumber() 316 317 if len(rst.str) > 0 && rst.str[0] == '_' { 318 rst.advance(1) 319 } 320 321 if len(rst.str) < val { 322 rst.fail("not enough characters for identifier") 323 } 324 id = rst.str[:val] 325 rst.advance(val) 326 327 for i := 0; i < len(id); i++ { 328 c := id[i] 329 switch { 330 case c >= '0' && c <= '9': 331 case c >= 'A' && c <= 'Z': 332 case c >= 'a' && c <= 'z': 333 case c == '_': 334 default: 335 rst.fail("invalid character in identifier") 336 } 337 } 338 339 if isPunycode { 340 id = rst.expandPunycode(id) 341 } 342 343 return id, isPunycode 344} 345 346// expandPunycode decodes the Rust version of punycode. 347// This algorithm is taken from RFC 3492 section 6.2. 348func (rst *rustState) expandPunycode(s string) string { 349 const ( 350 base = 36 351 tmin = 1 352 tmax = 26 353 skew = 38 354 damp = 700 355 initialBias = 72 356 initialN = 128 357 ) 358 359 var ( 360 output []rune 361 encoding string 362 ) 363 idx := strings.LastIndex(s, "_") 364 if idx >= 0 { 365 output = []rune(s[:idx]) 366 encoding = s[idx+1:] 367 } else { 368 encoding = s 369 } 370 371 i := 0 372 n := initialN 373 bias := initialBias 374 375 pos := 0 376 for pos < len(encoding) { 377 oldI := i 378 w := 1 379 for k := base; ; k += base { 380 if pos == len(encoding) { 381 rst.fail("unterminated punycode") 382 } 383 384 var digit byte 385 d := encoding[pos] 386 pos++ 387 switch { 388 case '0' <= d && d <= '9': 389 digit = d - '0' + 26 390 case 'A' <= d && d <= 'Z': 391 digit = d - 'A' 392 case 'a' <= d && d <= 'z': 393 digit = d - 'a' 394 default: 395 rst.fail("invalid punycode digit") 396 } 397 398 i += int(digit) * w 399 if i < 0 { 400 rst.fail("punycode number overflow") 401 } 402 403 var t int 404 if k <= bias { 405 t = tmin 406 } else if k > bias+tmax { 407 t = tmax 408 } else { 409 t = k - bias 410 } 411 412 if int(digit) < t { 413 break 414 } 415 416 if w >= math.MaxInt32/base { 417 rst.fail("punycode number overflow") 418 } 419 w *= base - t 420 } 421 422 delta := i - oldI 423 numPoints := len(output) + 1 424 firstTime := oldI == 0 425 if firstTime { 426 delta /= damp 427 } else { 428 delta /= 2 429 } 430 delta += delta / numPoints 431 k := 0 432 for delta > ((base-tmin)*tmax)/2 { 433 delta /= base - tmin 434 k += base 435 } 436 bias = k + ((base-tmin+1)*delta)/(delta+skew) 437 438 n += i / (len(output) + 1) 439 if n > utf8.MaxRune { 440 rst.fail("punycode rune overflow") 441 } else if !utf8.ValidRune(rune(n)) { 442 rst.fail("punycode invalid code point") 443 } 444 i %= len(output) + 1 445 output = append(output, 0) 446 copy(output[i+1:], output[i:]) 447 output[i] = rune(n) 448 i++ 449 } 450 451 return string(output) 452} 453 454// genericArgs prints a list of generic arguments, without angle brackets. 455func (rst *rustState) genericArgs() { 456 if rst.noGenericArgs { 457 hold := rst.skip 458 rst.skip = true 459 defer func() { 460 rst.skip = hold 461 }() 462 } 463 464 first := true 465 for len(rst.str) > 0 && rst.str[0] != 'E' { 466 if first { 467 first = false 468 } else { 469 rst.writeString(", ") 470 } 471 rst.genericArg() 472 } 473} 474 475// genericArg parses: 476// 477// <generic-arg> = <lifetime> 478// | <type> 479// | "K" <const> // forward-compat for const generics 480// <lifetime> = "L" <base-62-number> 481func (rst *rustState) genericArg() { 482 if len(rst.str) < 1 { 483 rst.fail("expected generic-arg") 484 } 485 if rst.str[0] == 'L' { 486 rst.advance(1) 487 rst.writeLifetime(rst.base62Number()) 488 } else if rst.str[0] == 'K' { 489 rst.advance(1) 490 rst.demangleConst() 491 } else { 492 rst.demangleType() 493 } 494} 495 496// binder parses an optional: 497// 498// <binder> = "G" <base-62-number> 499func (rst *rustState) binder() { 500 if len(rst.str) < 1 || rst.str[0] != 'G' { 501 return 502 } 503 rst.advance(1) 504 505 binderLifetimes := rst.base62Number() + 1 506 507 // Every bound lifetime should be referenced later. 508 if binderLifetimes >= int64(len(rst.str))-rst.lifetimes { 509 rst.fail("binder lifetimes overflow") 510 } 511 512 rst.writeString("for<") 513 for i := int64(0); i < binderLifetimes; i++ { 514 if i > 0 { 515 rst.writeString(", ") 516 } 517 rst.lifetimes++ 518 rst.writeLifetime(1) 519 } 520 rst.writeString("> ") 521} 522 523// demangleType parses: 524// 525// <type> = <basic-type> 526// | <path> // named type 527// | "A" <type> <const> // [T; N] 528// | "S" <type> // [T] 529// | "T" {<type>} "E" // (T1, T2, T3, ...) 530// | "R" [<lifetime>] <type> // &T 531// | "Q" [<lifetime>] <type> // &mut T 532// | "P" <type> // *const T 533// | "O" <type> // *mut T 534// | "F" <fn-sig> // fn(...) -> ... 535// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 536// | <backref> 537func (rst *rustState) demangleType() { 538 if len(rst.str) < 1 { 539 rst.fail("expected type") 540 } 541 c := rst.str[0] 542 if c >= 'a' && c <= 'z' { 543 rst.basicType() 544 return 545 } 546 switch c { 547 case 'C', 'M', 'X', 'Y', 'N', 'I': 548 rst.path(false) 549 case 'A', 'S': 550 rst.advance(1) 551 rst.writeByte('[') 552 rst.demangleType() 553 if c == 'A' { 554 rst.writeString("; ") 555 rst.demangleConst() 556 } 557 rst.writeByte(']') 558 case 'T': 559 rst.advance(1) 560 rst.writeByte('(') 561 c := 0 562 for len(rst.str) > 0 && rst.str[0] != 'E' { 563 if c > 0 { 564 rst.writeString(", ") 565 } 566 c++ 567 rst.demangleType() 568 } 569 if c == 1 { 570 rst.writeByte(',') 571 } 572 rst.writeByte(')') 573 rst.checkChar('E') 574 case 'R', 'Q': 575 rst.advance(1) 576 rst.writeByte('&') 577 if len(rst.str) > 0 && rst.str[0] == 'L' { 578 rst.advance(1) 579 if lifetime := rst.base62Number(); lifetime > 0 { 580 rst.writeLifetime(lifetime) 581 rst.writeByte(' ') 582 } 583 } 584 if c == 'Q' { 585 rst.writeString("mut ") 586 } 587 rst.demangleType() 588 case 'P': 589 rst.advance(1) 590 rst.writeString("*const ") 591 rst.demangleType() 592 case 'O': 593 rst.advance(1) 594 rst.writeString("*mut ") 595 rst.demangleType() 596 case 'F': 597 rst.advance(1) 598 hold := rst.lifetimes 599 rst.fnSig() 600 rst.lifetimes = hold 601 case 'D': 602 rst.advance(1) 603 hold := rst.lifetimes 604 rst.dynBounds() 605 rst.lifetimes = hold 606 if len(rst.str) == 0 || rst.str[0] != 'L' { 607 rst.fail("expected L") 608 } 609 rst.advance(1) 610 if lifetime := rst.base62Number(); lifetime > 0 { 611 if rst.last != ' ' { 612 rst.writeByte(' ') 613 } 614 rst.writeString("+ ") 615 rst.writeLifetime(lifetime) 616 } 617 case 'B': 618 rst.backref(rst.demangleType) 619 default: 620 rst.fail("unrecognized character in type") 621 } 622} 623 624var rustBasicTypes = map[byte]string{ 625 'a': "i8", 626 'b': "bool", 627 'c': "char", 628 'd': "f64", 629 'e': "str", 630 'f': "f32", 631 'h': "u8", 632 'i': "isize", 633 'j': "usize", 634 'l': "i32", 635 'm': "u32", 636 'n': "i128", 637 'o': "u128", 638 'p': "_", 639 's': "i16", 640 't': "u16", 641 'u': "()", 642 'v': "...", 643 'x': "i64", 644 'y': "u64", 645 'z': "!", 646} 647 648// basicType parses: 649// 650// <basic-type> 651func (rst *rustState) basicType() { 652 if len(rst.str) < 1 { 653 rst.fail("expected basic type") 654 } 655 str, ok := rustBasicTypes[rst.str[0]] 656 if !ok { 657 rst.fail("unrecognized basic type character") 658 } 659 rst.advance(1) 660 rst.writeString(str) 661} 662 663// fnSig parses: 664// 665// <fn-sig> = [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 666// <abi> = "C" 667// | <undisambiguated-identifier> 668func (rst *rustState) fnSig() { 669 rst.binder() 670 if len(rst.str) > 0 && rst.str[0] == 'U' { 671 rst.advance(1) 672 rst.writeString("unsafe ") 673 } 674 if len(rst.str) > 0 && rst.str[0] == 'K' { 675 rst.advance(1) 676 if len(rst.str) > 0 && rst.str[0] == 'C' { 677 rst.advance(1) 678 rst.writeString(`extern "C" `) 679 } else { 680 rst.writeString(`extern "`) 681 id, isPunycode := rst.undisambiguatedIdentifier() 682 if isPunycode { 683 rst.fail("punycode used in ABI string") 684 } 685 id = strings.ReplaceAll(id, "_", "-") 686 rst.writeString(id) 687 rst.writeString(`" `) 688 } 689 } 690 rst.writeString("fn(") 691 first := true 692 for len(rst.str) > 0 && rst.str[0] != 'E' { 693 if first { 694 first = false 695 } else { 696 rst.writeString(", ") 697 } 698 rst.demangleType() 699 } 700 rst.checkChar('E') 701 rst.writeByte(')') 702 if len(rst.str) > 0 && rst.str[0] == 'u' { 703 rst.advance(1) 704 } else { 705 rst.writeString(" -> ") 706 rst.demangleType() 707 } 708} 709 710// dynBounds parses: 711// 712// <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 713func (rst *rustState) dynBounds() { 714 rst.writeString("dyn ") 715 rst.binder() 716 first := true 717 for len(rst.str) > 0 && rst.str[0] != 'E' { 718 if first { 719 first = false 720 } else { 721 rst.writeString(" + ") 722 } 723 rst.dynTrait() 724 } 725 rst.checkChar('E') 726} 727 728// dynTrait parses: 729// 730// <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 731// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 732func (rst *rustState) dynTrait() { 733 started := rst.pathStartGenerics() 734 for len(rst.str) > 0 && rst.str[0] == 'p' { 735 rst.advance(1) 736 if started { 737 rst.writeString(", ") 738 } else { 739 rst.writeByte('<') 740 started = true 741 } 742 id, _ := rst.undisambiguatedIdentifier() 743 rst.writeString(id) 744 rst.writeString(" = ") 745 rst.demangleType() 746 } 747 if started { 748 rst.writeByte('>') 749 } 750} 751 752// pathStartGenerics is like path but if it sees an I to start generic 753// arguments it won't close them. It reports whether it started generics. 754func (rst *rustState) pathStartGenerics() bool { 755 if len(rst.str) < 1 { 756 rst.fail("expected path") 757 } 758 switch rst.str[0] { 759 case 'I': 760 rst.advance(1) 761 rst.path(false) 762 rst.writeByte('<') 763 rst.genericArgs() 764 rst.checkChar('E') 765 return true 766 case 'B': 767 var started bool 768 rst.backref(func() { started = rst.pathStartGenerics() }) 769 return started 770 default: 771 rst.path(false) 772 return false 773 } 774} 775 776// writeLifetime writes out a lifetime binding. 777func (rst *rustState) writeLifetime(lifetime int64) { 778 rst.writeByte('\'') 779 if lifetime == 0 { 780 rst.writeByte('_') 781 return 782 } 783 depth := rst.lifetimes - lifetime 784 if depth < 0 { 785 rst.fail("invalid lifetime") 786 } else if depth < 26 { 787 rst.writeByte('a' + byte(depth)) 788 } else { 789 rst.writeByte('z') 790 if !rst.skip { 791 fmt.Fprintf(&rst.buf, "%d", depth-26+1) 792 rst.last = '0' 793 } 794 } 795} 796 797// demangleConst parses: 798// 799// <const> = <type> <const-data> 800// | "p" // placeholder, shown as _ 801// | <backref> 802// <const-data> = ["n"] {<hex-digit>} "_" 803func (rst *rustState) demangleConst() { 804 if len(rst.str) < 1 { 805 rst.fail("expected constant") 806 } 807 808 if rst.str[0] == 'B' { 809 rst.backref(rst.demangleConst) 810 return 811 } 812 813 if rst.str[0] == 'p' { 814 rst.advance(1) 815 rst.writeByte('_') 816 return 817 } 818 819 typ := rst.str[0] 820 821 const ( 822 invalid = iota 823 signedInt 824 unsignedInt 825 boolean 826 character 827 ) 828 829 var kind int 830 switch typ { 831 case 'a', 's', 'l', 'x', 'n', 'i': 832 kind = signedInt 833 case 'h', 't', 'm', 'y', 'o', 'j': 834 kind = unsignedInt 835 case 'b': 836 kind = boolean 837 case 'c': 838 kind = character 839 default: 840 rst.fail("unrecognized constant type") 841 } 842 843 rst.advance(1) 844 845 if kind == signedInt && len(rst.str) > 0 && rst.str[0] == 'n' { 846 rst.advance(1) 847 rst.writeByte('-') 848 } 849 850 start := rst.str 851 digits := 0 852 val := uint64(0) 853digitLoop: 854 for len(rst.str) > 0 { 855 c := rst.str[0] 856 var digit uint64 857 switch { 858 case c >= '0' && c <= '9': 859 digit = uint64(c - '0') 860 case c >= 'a' && c <= 'f': 861 digit = uint64(c - 'a' + 10) 862 case c == '_': 863 rst.advance(1) 864 break digitLoop 865 default: 866 rst.fail("expected hex digit or _") 867 } 868 rst.advance(1) 869 if val == 0 && digit == 0 && (len(rst.str) == 0 || rst.str[0] != '_') { 870 rst.fail("invalid leading 0 in constant") 871 } 872 val *= 16 873 val += digit 874 digits++ 875 } 876 877 if digits == 0 { 878 rst.fail("expected constant") 879 } 880 881 switch kind { 882 case signedInt, unsignedInt: 883 if digits > 16 { 884 // Value too big, just write out the string. 885 rst.writeString("0x") 886 rst.writeString(start[:digits]) 887 } else { 888 if !rst.skip { 889 fmt.Fprintf(&rst.buf, "%d", val) 890 rst.last = '0' 891 } 892 } 893 case boolean: 894 if digits > 1 { 895 rst.fail("boolean value too large") 896 } else if val == 0 { 897 rst.writeString("false") 898 } else if val == 1 { 899 rst.writeString("true") 900 } else { 901 rst.fail("invalid boolean value") 902 } 903 case character: 904 if digits > 6 { 905 rst.fail("character value too large") 906 } 907 rst.writeByte('\'') 908 if val == '\t' { 909 rst.writeString(`\t`) 910 } else if val == '\r' { 911 rst.writeString(`\r`) 912 } else if val == '\n' { 913 rst.writeString(`\n`) 914 } else if val == '\\' { 915 rst.writeString(`\\`) 916 } else if val == '\'' { 917 rst.writeString(`\'`) 918 } else if val >= ' ' && val <= '~' { 919 // printable ASCII character 920 rst.writeByte(byte(val)) 921 } else { 922 if !rst.skip { 923 fmt.Fprintf(&rst.buf, `\u{%x}`, val) 924 rst.last = '}' 925 } 926 } 927 rst.writeByte('\'') 928 default: 929 panic("internal error") 930 } 931} 932 933// base62Number parses: 934// 935// <base-62-number> = {<0-9a-zA-Z>} "_" 936func (rst *rustState) base62Number() int64 { 937 if len(rst.str) > 0 && rst.str[0] == '_' { 938 rst.advance(1) 939 return 0 940 } 941 val := int64(0) 942 for len(rst.str) > 0 { 943 c := rst.str[0] 944 rst.advance(1) 945 if c == '_' { 946 return val + 1 947 } 948 val *= 62 949 if c >= '0' && c <= '9' { 950 val += int64(c - '0') 951 } else if c >= 'a' && c <= 'z' { 952 val += int64(c - 'a' + 10) 953 } else if c >= 'A' && c <= 'Z' { 954 val += int64(c - 'A' + 36) 955 } else { 956 rst.fail("invalid digit in base 62 number") 957 } 958 } 959 rst.fail("expected _ after base 62 number") 960 return 0 961} 962 963// backref parses: 964// 965// <backref> = "B" <base-62-number> 966func (rst *rustState) backref(demangle func()) { 967 backoff := rst.off 968 969 rst.checkChar('B') 970 idx64 := rst.base62Number() 971 972 if rst.skip { 973 return 974 } 975 if rst.max > 0 && rst.buf.Len() > rst.max { 976 return 977 } 978 979 idx := int(idx64) 980 if int64(idx) != idx64 { 981 rst.fail("backref index overflow") 982 } 983 if idx < 0 || idx >= backoff { 984 rst.fail("invalid backref index") 985 } 986 987 holdStr := rst.str 988 holdOff := rst.off 989 rst.str = rst.orig[idx:backoff] 990 rst.off = idx 991 defer func() { 992 rst.str = holdStr 993 rst.off = holdOff 994 }() 995 996 demangle() 997} 998 999func (rst *rustState) decimalNumber() int { 1000 if len(rst.str) == 0 { 1001 rst.fail("expected number") 1002 } 1003 1004 val := 0 1005 for len(rst.str) > 0 && isDigit(rst.str[0]) { 1006 add := int(rst.str[0] - '0') 1007 if val >= math.MaxInt32/10-add { 1008 rst.fail("decimal number overflow") 1009 } 1010 val *= 10 1011 val += add 1012 rst.advance(1) 1013 } 1014 return val 1015} 1016 1017// oldRustToString demangles a Rust symbol using the old demangling. 1018// The second result reports whether this is a valid Rust mangled name. 1019func oldRustToString(name string, options []Option) (string, bool) { 1020 max := 0 1021 for _, o := range options { 1022 if isMaxLength(o) { 1023 max = maxLength(o) 1024 } 1025 } 1026 1027 // We know that the string starts with _ZN. 1028 name = name[3:] 1029 1030 hexDigit := func(c byte) (byte, bool) { 1031 switch { 1032 case c >= '0' && c <= '9': 1033 return c - '0', true 1034 case c >= 'a' && c <= 'f': 1035 return c - 'a' + 10, true 1036 default: 1037 return 0, false 1038 } 1039 } 1040 1041 // We know that the strings end with "17h" followed by 16 characters 1042 // followed by "E". We check that the 16 characters are all hex digits. 1043 // Also the hex digits must contain at least 5 distinct digits. 1044 seen := uint16(0) 1045 for i := len(name) - 17; i < len(name)-1; i++ { 1046 digit, ok := hexDigit(name[i]) 1047 if !ok { 1048 return "", false 1049 } 1050 seen |= 1 << digit 1051 } 1052 if bits.OnesCount16(seen) < 5 { 1053 return "", false 1054 } 1055 name = name[:len(name)-20] 1056 1057 // The name is a sequence of length-preceded identifiers. 1058 var sb strings.Builder 1059 for len(name) > 0 { 1060 if max > 0 && sb.Len() > max { 1061 break 1062 } 1063 1064 if !isDigit(name[0]) { 1065 return "", false 1066 } 1067 1068 val := 0 1069 for len(name) > 0 && isDigit(name[0]) { 1070 add := int(name[0] - '0') 1071 if val >= math.MaxInt32/10-add { 1072 return "", false 1073 } 1074 val *= 10 1075 val += add 1076 name = name[1:] 1077 } 1078 1079 // An optional trailing underscore can separate the 1080 // length from the identifier. 1081 if len(name) > 0 && name[0] == '_' { 1082 name = name[1:] 1083 val-- 1084 } 1085 1086 if len(name) < val { 1087 return "", false 1088 } 1089 1090 id := name[:val] 1091 name = name[val:] 1092 1093 if sb.Len() > 0 { 1094 sb.WriteString("::") 1095 } 1096 1097 // Ignore leading underscores preceding escape sequences. 1098 if strings.HasPrefix(id, "_$") { 1099 id = id[1:] 1100 } 1101 1102 // The identifier can have escape sequences. 1103 escape: 1104 for len(id) > 0 { 1105 switch c := id[0]; c { 1106 case '$': 1107 codes := map[string]byte{ 1108 "SP": '@', 1109 "BP": '*', 1110 "RF": '&', 1111 "LT": '<', 1112 "GT": '>', 1113 "LP": '(', 1114 "RP": ')', 1115 } 1116 1117 valid := true 1118 if len(id) > 2 && id[1] == 'C' && id[2] == '$' { 1119 sb.WriteByte(',') 1120 id = id[3:] 1121 } else if len(id) > 4 && id[1] == 'u' && id[4] == '$' { 1122 dig1, ok1 := hexDigit(id[2]) 1123 dig2, ok2 := hexDigit(id[3]) 1124 val := (dig1 << 4) | dig2 1125 if !ok1 || !ok2 || dig1 > 7 || val < ' ' { 1126 valid = false 1127 } else { 1128 sb.WriteByte(val) 1129 id = id[5:] 1130 } 1131 } else if len(id) > 3 && id[3] == '$' { 1132 if code, ok := codes[id[1:3]]; !ok { 1133 valid = false 1134 } else { 1135 sb.WriteByte(code) 1136 id = id[4:] 1137 } 1138 } else { 1139 valid = false 1140 } 1141 if !valid { 1142 sb.WriteString(id) 1143 break escape 1144 } 1145 case '.': 1146 if strings.HasPrefix(id, "..") { 1147 sb.WriteString("::") 1148 id = id[2:] 1149 } else { 1150 sb.WriteByte(c) 1151 id = id[1:] 1152 } 1153 default: 1154 sb.WriteByte(c) 1155 id = id[1:] 1156 } 1157 } 1158 } 1159 1160 s := sb.String() 1161 if max > 0 && len(s) > max { 1162 s = s[:max] 1163 } 1164 return s, true 1165} 1166