1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5/* 6Package mail implements parsing of mail messages. 7 8For the most part, this package follows the syntax as specified by RFC 5322 and 9extended by RFC 6532. 10Notable divergences: 11 - Obsolete address formats are not parsed, including addresses with 12 embedded route information. 13 - The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 - No unicode normalization is performed. 16 - A leading From line is permitted, as in mbox format (RFC 4155). 17*/ 18package mail 19 20import ( 21 "bufio" 22 "errors" 23 "fmt" 24 "io" 25 "log" 26 "mime" 27 "net" 28 "net/textproto" 29 "strings" 30 "sync" 31 "time" 32 "unicode/utf8" 33) 34 35var debug = debugT(false) 36 37type debugT bool 38 39func (d debugT) Printf(format string, args ...any) { 40 if d { 41 log.Printf(format, args...) 42 } 43} 44 45// A Message represents a parsed mail message. 46type Message struct { 47 Header Header 48 Body io.Reader 49} 50 51// ReadMessage reads a message from r. 52// The headers are parsed, and the body of the message will be available 53// for reading from msg.Body. 54func ReadMessage(r io.Reader) (msg *Message, err error) { 55 tp := textproto.NewReader(bufio.NewReader(r)) 56 57 hdr, err := readHeader(tp) 58 if err != nil && (err != io.EOF || len(hdr) == 0) { 59 return nil, err 60 } 61 62 return &Message{ 63 Header: Header(hdr), 64 Body: tp.R, 65 }, nil 66} 67 68// readHeader reads the message headers from r. 69// This is like textproto.ReadMIMEHeader, but doesn't validate. 70// The fix for issue #53188 tightened up net/textproto to enforce 71// restrictions of RFC 7230. 72// This package implements RFC 5322, which does not have those restrictions. 73// This function copies the relevant code from net/textproto, 74// simplified for RFC 5322. 75func readHeader(r *textproto.Reader) (map[string][]string, error) { 76 m := make(map[string][]string) 77 78 // The first line cannot start with a leading space. 79 if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') { 80 line, err := r.ReadLine() 81 if err != nil { 82 return m, err 83 } 84 return m, errors.New("malformed initial line: " + line) 85 } 86 87 for { 88 kv, err := r.ReadContinuedLine() 89 if kv == "" { 90 return m, err 91 } 92 93 // Key ends at first colon. 94 k, v, ok := strings.Cut(kv, ":") 95 if !ok { 96 return m, errors.New("malformed header line: " + kv) 97 } 98 key := textproto.CanonicalMIMEHeaderKey(k) 99 100 // Permit empty key, because that is what we did in the past. 101 if key == "" { 102 continue 103 } 104 105 // Skip initial spaces in value. 106 value := strings.TrimLeft(v, " \t") 107 108 m[key] = append(m[key], value) 109 110 if err != nil { 111 return m, err 112 } 113 } 114} 115 116// Layouts suitable for passing to time.Parse. 117// These are tried in order. 118var ( 119 dateLayoutsBuildOnce sync.Once 120 dateLayouts []string 121) 122 123func buildDateLayouts() { 124 // Generate layouts based on RFC 5322, section 3.3. 125 126 dows := [...]string{"", "Mon, "} // day-of-week 127 days := [...]string{"2", "02"} // day = 1*2DIGIT 128 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 129 seconds := [...]string{":05", ""} // second 130 // "-0700 (MST)" is not in RFC 5322, but is common. 131 zones := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ... 132 133 for _, dow := range dows { 134 for _, day := range days { 135 for _, year := range years { 136 for _, second := range seconds { 137 for _, zone := range zones { 138 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 139 dateLayouts = append(dateLayouts, s) 140 } 141 } 142 } 143 } 144 } 145} 146 147// ParseDate parses an RFC 5322 date string. 148func ParseDate(date string) (time.Time, error) { 149 dateLayoutsBuildOnce.Do(buildDateLayouts) 150 // CR and LF must match and are tolerated anywhere in the date field. 151 date = strings.ReplaceAll(date, "\r\n", "") 152 if strings.Contains(date, "\r") { 153 return time.Time{}, errors.New("mail: header has a CR without LF") 154 } 155 // Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII 156 p := addrParser{date, nil} 157 p.skipSpace() 158 159 // RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone 160 // zone length is always 5 chars unless obsolete (obs-zone) 161 if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 { 162 date = p.s[:ind+5] 163 p.s = p.s[ind+5:] 164 } else { 165 ind := strings.Index(p.s, "T") 166 if ind == 0 { 167 // In this case we have the following date formats: 168 // * Thu, 20 Nov 1997 09:55:06 MDT 169 // * Thu, 20 Nov 1997 09:55:06 MDT (MDT) 170 // * Thu, 20 Nov 1997 09:55:06 MDT (This comment) 171 ind = strings.Index(p.s[1:], "T") 172 if ind != -1 { 173 ind++ 174 } 175 } 176 177 if ind != -1 && len(p.s) >= ind+5 { 178 // The last letter T of the obsolete time zone is checked when no standard time zone is found. 179 // If T is misplaced, the date to parse is garbage. 180 date = p.s[:ind+1] 181 p.s = p.s[ind+1:] 182 } 183 } 184 if !p.skipCFWS() { 185 return time.Time{}, errors.New("mail: misformatted parenthetical comment") 186 } 187 for _, layout := range dateLayouts { 188 t, err := time.Parse(layout, date) 189 if err == nil { 190 return t, nil 191 } 192 } 193 return time.Time{}, errors.New("mail: header could not be parsed") 194} 195 196// A Header represents the key-value pairs in a mail message header. 197type Header map[string][]string 198 199// Get gets the first value associated with the given key. 200// It is case insensitive; CanonicalMIMEHeaderKey is used 201// to canonicalize the provided key. 202// If there are no values associated with the key, Get returns "". 203// To access multiple values of a key, or to use non-canonical keys, 204// access the map directly. 205func (h Header) Get(key string) string { 206 return textproto.MIMEHeader(h).Get(key) 207} 208 209var ErrHeaderNotPresent = errors.New("mail: header not in message") 210 211// Date parses the Date header field. 212func (h Header) Date() (time.Time, error) { 213 hdr := h.Get("Date") 214 if hdr == "" { 215 return time.Time{}, ErrHeaderNotPresent 216 } 217 return ParseDate(hdr) 218} 219 220// AddressList parses the named header field as a list of addresses. 221func (h Header) AddressList(key string) ([]*Address, error) { 222 hdr := h.Get(key) 223 if hdr == "" { 224 return nil, ErrHeaderNotPresent 225 } 226 return ParseAddressList(hdr) 227} 228 229// Address represents a single mail address. 230// An address such as "Barry Gibbs <[email protected]>" is represented 231// as Address{Name: "Barry Gibbs", Address: "[email protected]"}. 232type Address struct { 233 Name string // Proper name; may be empty. 234 Address string // user@domain 235} 236 237// ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <[email protected]>" 238func ParseAddress(address string) (*Address, error) { 239 return (&addrParser{s: address}).parseSingleAddress() 240} 241 242// ParseAddressList parses the given string as a list of addresses. 243func ParseAddressList(list string) ([]*Address, error) { 244 return (&addrParser{s: list}).parseAddressList() 245} 246 247// An AddressParser is an RFC 5322 address parser. 248type AddressParser struct { 249 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 250 WordDecoder *mime.WordDecoder 251} 252 253// Parse parses a single RFC 5322 address of the 254// form "Gogh Fir <[email protected]>" or "[email protected]". 255func (p *AddressParser) Parse(address string) (*Address, error) { 256 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() 257} 258 259// ParseList parses the given string as a list of comma-separated addresses 260// of the form "Gogh Fir <[email protected]>" or "[email protected]". 261func (p *AddressParser) ParseList(list string) ([]*Address, error) { 262 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 263} 264 265// String formats the address as a valid RFC 5322 address. 266// If the address's name contains non-ASCII characters 267// the name will be rendered according to RFC 2047. 268func (a *Address) String() string { 269 // Format address local@domain 270 at := strings.LastIndex(a.Address, "@") 271 var local, domain string 272 if at < 0 { 273 // This is a malformed address ("@" is required in addr-spec); 274 // treat the whole address as local-part. 275 local = a.Address 276 } else { 277 local, domain = a.Address[:at], a.Address[at+1:] 278 } 279 280 // Add quotes if needed 281 quoteLocal := false 282 for i, r := range local { 283 if isAtext(r, false) { 284 continue 285 } 286 if r == '.' { 287 // Dots are okay if they are surrounded by atext. 288 // We only need to check that the previous byte is 289 // not a dot, and this isn't the end of the string. 290 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 291 continue 292 } 293 } 294 quoteLocal = true 295 break 296 } 297 if quoteLocal { 298 local = quoteString(local) 299 300 } 301 302 s := "<" + local + "@" + domain + ">" 303 304 if a.Name == "" { 305 return s 306 } 307 308 // If every character is printable ASCII, quoting is simple. 309 allPrintable := true 310 for _, r := range a.Name { 311 // isWSP here should actually be isFWS, 312 // but we don't support folding yet. 313 if !isVchar(r) && !isWSP(r) || isMultibyte(r) { 314 allPrintable = false 315 break 316 } 317 } 318 if allPrintable { 319 return quoteString(a.Name) + " " + s 320 } 321 322 // Text in an encoded-word in a display-name must not contain certain 323 // characters like quotes or parentheses (see RFC 2047 section 5.3). 324 // When this is the case encode the name using base64 encoding. 325 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 326 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 327 } 328 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 329} 330 331type addrParser struct { 332 s string 333 dec *mime.WordDecoder // may be nil 334} 335 336func (p *addrParser) parseAddressList() ([]*Address, error) { 337 var list []*Address 338 for { 339 p.skipSpace() 340 341 // allow skipping empty entries (RFC5322 obs-addr-list) 342 if p.consume(',') { 343 continue 344 } 345 346 addrs, err := p.parseAddress(true) 347 if err != nil { 348 return nil, err 349 } 350 list = append(list, addrs...) 351 352 if !p.skipCFWS() { 353 return nil, errors.New("mail: misformatted parenthetical comment") 354 } 355 if p.empty() { 356 break 357 } 358 if p.peek() != ',' { 359 return nil, errors.New("mail: expected comma") 360 } 361 362 // Skip empty entries for obs-addr-list. 363 for p.consume(',') { 364 p.skipSpace() 365 } 366 if p.empty() { 367 break 368 } 369 } 370 return list, nil 371} 372 373func (p *addrParser) parseSingleAddress() (*Address, error) { 374 addrs, err := p.parseAddress(true) 375 if err != nil { 376 return nil, err 377 } 378 if !p.skipCFWS() { 379 return nil, errors.New("mail: misformatted parenthetical comment") 380 } 381 if !p.empty() { 382 return nil, fmt.Errorf("mail: expected single address, got %q", p.s) 383 } 384 if len(addrs) == 0 { 385 return nil, errors.New("mail: empty group") 386 } 387 if len(addrs) > 1 { 388 return nil, errors.New("mail: group with multiple addresses") 389 } 390 return addrs[0], nil 391} 392 393// parseAddress parses a single RFC 5322 address at the start of p. 394func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) { 395 debug.Printf("parseAddress: %q", p.s) 396 p.skipSpace() 397 if p.empty() { 398 return nil, errors.New("mail: no address") 399 } 400 401 // address = mailbox / group 402 // mailbox = name-addr / addr-spec 403 // group = display-name ":" [group-list] ";" [CFWS] 404 405 // addr-spec has a more restricted grammar than name-addr, 406 // so try parsing it first, and fallback to name-addr. 407 // TODO(dsymonds): Is this really correct? 408 spec, err := p.consumeAddrSpec() 409 if err == nil { 410 var displayName string 411 p.skipSpace() 412 if !p.empty() && p.peek() == '(' { 413 displayName, err = p.consumeDisplayNameComment() 414 if err != nil { 415 return nil, err 416 } 417 } 418 419 return []*Address{{ 420 Name: displayName, 421 Address: spec, 422 }}, err 423 } 424 debug.Printf("parseAddress: not an addr-spec: %v", err) 425 debug.Printf("parseAddress: state is now %q", p.s) 426 427 // display-name 428 var displayName string 429 if p.peek() != '<' { 430 displayName, err = p.consumePhrase() 431 if err != nil { 432 return nil, err 433 } 434 } 435 debug.Printf("parseAddress: displayName=%q", displayName) 436 437 p.skipSpace() 438 if handleGroup { 439 if p.consume(':') { 440 return p.consumeGroupList() 441 } 442 } 443 // angle-addr = "<" addr-spec ">" 444 if !p.consume('<') { 445 atext := true 446 for _, r := range displayName { 447 if !isAtext(r, true) { 448 atext = false 449 break 450 } 451 } 452 if atext { 453 // The input is like "foo.bar"; it's possible the input 454 // meant to be "foo.bar@domain", or "foo.bar <...>". 455 return nil, errors.New("mail: missing '@' or angle-addr") 456 } 457 // The input is like "Full Name", which couldn't possibly be a 458 // valid email address if followed by "@domain"; the input 459 // likely meant to be "Full Name <...>". 460 return nil, errors.New("mail: no angle-addr") 461 } 462 spec, err = p.consumeAddrSpec() 463 if err != nil { 464 return nil, err 465 } 466 if !p.consume('>') { 467 return nil, errors.New("mail: unclosed angle-addr") 468 } 469 debug.Printf("parseAddress: spec=%q", spec) 470 471 return []*Address{{ 472 Name: displayName, 473 Address: spec, 474 }}, nil 475} 476 477func (p *addrParser) consumeGroupList() ([]*Address, error) { 478 var group []*Address 479 // handle empty group. 480 p.skipSpace() 481 if p.consume(';') { 482 if !p.skipCFWS() { 483 return nil, errors.New("mail: misformatted parenthetical comment") 484 } 485 return group, nil 486 } 487 488 for { 489 p.skipSpace() 490 // embedded groups not allowed. 491 addrs, err := p.parseAddress(false) 492 if err != nil { 493 return nil, err 494 } 495 group = append(group, addrs...) 496 497 if !p.skipCFWS() { 498 return nil, errors.New("mail: misformatted parenthetical comment") 499 } 500 if p.consume(';') { 501 if !p.skipCFWS() { 502 return nil, errors.New("mail: misformatted parenthetical comment") 503 } 504 break 505 } 506 if !p.consume(',') { 507 return nil, errors.New("mail: expected comma") 508 } 509 } 510 return group, nil 511} 512 513// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 514func (p *addrParser) consumeAddrSpec() (spec string, err error) { 515 debug.Printf("consumeAddrSpec: %q", p.s) 516 517 orig := *p 518 defer func() { 519 if err != nil { 520 *p = orig 521 } 522 }() 523 524 // local-part = dot-atom / quoted-string 525 var localPart string 526 p.skipSpace() 527 if p.empty() { 528 return "", errors.New("mail: no addr-spec") 529 } 530 if p.peek() == '"' { 531 // quoted-string 532 debug.Printf("consumeAddrSpec: parsing quoted-string") 533 localPart, err = p.consumeQuotedString() 534 if localPart == "" { 535 err = errors.New("mail: empty quoted string in addr-spec") 536 } 537 } else { 538 // dot-atom 539 debug.Printf("consumeAddrSpec: parsing dot-atom") 540 localPart, err = p.consumeAtom(true, false) 541 } 542 if err != nil { 543 debug.Printf("consumeAddrSpec: failed: %v", err) 544 return "", err 545 } 546 547 if !p.consume('@') { 548 return "", errors.New("mail: missing @ in addr-spec") 549 } 550 551 // domain = dot-atom / domain-literal 552 var domain string 553 p.skipSpace() 554 if p.empty() { 555 return "", errors.New("mail: no domain in addr-spec") 556 } 557 558 if p.peek() == '[' { 559 // domain-literal 560 domain, err = p.consumeDomainLiteral() 561 if err != nil { 562 return "", err 563 } 564 } else { 565 // dot-atom 566 domain, err = p.consumeAtom(true, false) 567 if err != nil { 568 return "", err 569 } 570 } 571 572 return localPart + "@" + domain, nil 573} 574 575// consumePhrase parses the RFC 5322 phrase at the start of p. 576func (p *addrParser) consumePhrase() (phrase string, err error) { 577 debug.Printf("consumePhrase: [%s]", p.s) 578 // phrase = 1*word 579 var words []string 580 var isPrevEncoded bool 581 for { 582 // obs-phrase allows CFWS after one word 583 if len(words) > 0 { 584 if !p.skipCFWS() { 585 return "", errors.New("mail: misformatted parenthetical comment") 586 } 587 } 588 // word = atom / quoted-string 589 var word string 590 p.skipSpace() 591 if p.empty() { 592 break 593 } 594 isEncoded := false 595 if p.peek() == '"' { 596 // quoted-string 597 word, err = p.consumeQuotedString() 598 } else { 599 // atom 600 // We actually parse dot-atom here to be more permissive 601 // than what RFC 5322 specifies. 602 word, err = p.consumeAtom(true, true) 603 if err == nil { 604 word, isEncoded, err = p.decodeRFC2047Word(word) 605 } 606 } 607 608 if err != nil { 609 break 610 } 611 debug.Printf("consumePhrase: consumed %q", word) 612 if isPrevEncoded && isEncoded { 613 words[len(words)-1] += word 614 } else { 615 words = append(words, word) 616 } 617 isPrevEncoded = isEncoded 618 } 619 // Ignore any error if we got at least one word. 620 if err != nil && len(words) == 0 { 621 debug.Printf("consumePhrase: hit err: %v", err) 622 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 623 } 624 phrase = strings.Join(words, " ") 625 return phrase, nil 626} 627 628// consumeQuotedString parses the quoted string at the start of p. 629func (p *addrParser) consumeQuotedString() (qs string, err error) { 630 // Assume first byte is '"'. 631 i := 1 632 qsb := make([]rune, 0, 10) 633 634 escaped := false 635 636Loop: 637 for { 638 r, size := utf8.DecodeRuneInString(p.s[i:]) 639 640 switch { 641 case size == 0: 642 return "", errors.New("mail: unclosed quoted-string") 643 644 case size == 1 && r == utf8.RuneError: 645 return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) 646 647 case escaped: 648 // quoted-pair = ("\" (VCHAR / WSP)) 649 650 if !isVchar(r) && !isWSP(r) { 651 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 652 } 653 654 qsb = append(qsb, r) 655 escaped = false 656 657 case isQtext(r) || isWSP(r): 658 // qtext (printable US-ASCII excluding " and \), or 659 // FWS (almost; we're ignoring CRLF) 660 qsb = append(qsb, r) 661 662 case r == '"': 663 break Loop 664 665 case r == '\\': 666 escaped = true 667 668 default: 669 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 670 671 } 672 673 i += size 674 } 675 p.s = p.s[i+1:] 676 return string(qsb), nil 677} 678 679// consumeAtom parses an RFC 5322 atom at the start of p. 680// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 681// If permissive is true, consumeAtom will not fail on: 682// - leading/trailing/double dots in the atom (see golang.org/issue/4938) 683func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 684 i := 0 685 686Loop: 687 for { 688 r, size := utf8.DecodeRuneInString(p.s[i:]) 689 switch { 690 case size == 1 && r == utf8.RuneError: 691 return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) 692 693 case size == 0 || !isAtext(r, dot): 694 break Loop 695 696 default: 697 i += size 698 699 } 700 } 701 702 if i == 0 { 703 return "", errors.New("mail: invalid string") 704 } 705 atom, p.s = p.s[:i], p.s[i:] 706 if !permissive { 707 if strings.HasPrefix(atom, ".") { 708 return "", errors.New("mail: leading dot in atom") 709 } 710 if strings.Contains(atom, "..") { 711 return "", errors.New("mail: double dot in atom") 712 } 713 if strings.HasSuffix(atom, ".") { 714 return "", errors.New("mail: trailing dot in atom") 715 } 716 } 717 return atom, nil 718} 719 720// consumeDomainLiteral parses an RFC 5322 domain-literal at the start of p. 721func (p *addrParser) consumeDomainLiteral() (string, error) { 722 // Skip the leading [ 723 if !p.consume('[') { 724 return "", errors.New(`mail: missing "[" in domain-literal`) 725 } 726 727 // Parse the dtext 728 var dtext string 729 for { 730 if p.empty() { 731 return "", errors.New("mail: unclosed domain-literal") 732 } 733 if p.peek() == ']' { 734 break 735 } 736 737 r, size := utf8.DecodeRuneInString(p.s) 738 if size == 1 && r == utf8.RuneError { 739 return "", fmt.Errorf("mail: invalid utf-8 in domain-literal: %q", p.s) 740 } 741 if !isDtext(r) { 742 return "", fmt.Errorf("mail: bad character in domain-literal: %q", r) 743 } 744 745 dtext += p.s[:size] 746 p.s = p.s[size:] 747 } 748 749 // Skip the trailing ] 750 if !p.consume(']') { 751 return "", errors.New("mail: unclosed domain-literal") 752 } 753 754 // Check if the domain literal is an IP address 755 if net.ParseIP(dtext) == nil { 756 return "", fmt.Errorf("mail: invalid IP address in domain-literal: %q", dtext) 757 } 758 759 return "[" + dtext + "]", nil 760} 761 762func (p *addrParser) consumeDisplayNameComment() (string, error) { 763 if !p.consume('(') { 764 return "", errors.New("mail: comment does not start with (") 765 } 766 comment, ok := p.consumeComment() 767 if !ok { 768 return "", errors.New("mail: misformatted parenthetical comment") 769 } 770 771 // TODO(stapelberg): parse quoted-string within comment 772 words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' }) 773 for idx, word := range words { 774 decoded, isEncoded, err := p.decodeRFC2047Word(word) 775 if err != nil { 776 return "", err 777 } 778 if isEncoded { 779 words[idx] = decoded 780 } 781 } 782 783 return strings.Join(words, " "), nil 784} 785 786func (p *addrParser) consume(c byte) bool { 787 if p.empty() || p.peek() != c { 788 return false 789 } 790 p.s = p.s[1:] 791 return true 792} 793 794// skipSpace skips the leading space and tab characters. 795func (p *addrParser) skipSpace() { 796 p.s = strings.TrimLeft(p.s, " \t") 797} 798 799func (p *addrParser) peek() byte { 800 return p.s[0] 801} 802 803func (p *addrParser) empty() bool { 804 return p.len() == 0 805} 806 807func (p *addrParser) len() int { 808 return len(p.s) 809} 810 811// skipCFWS skips CFWS as defined in RFC5322. 812func (p *addrParser) skipCFWS() bool { 813 p.skipSpace() 814 815 for { 816 if !p.consume('(') { 817 break 818 } 819 820 if _, ok := p.consumeComment(); !ok { 821 return false 822 } 823 824 p.skipSpace() 825 } 826 827 return true 828} 829 830func (p *addrParser) consumeComment() (string, bool) { 831 // '(' already consumed. 832 depth := 1 833 834 var comment string 835 for { 836 if p.empty() || depth == 0 { 837 break 838 } 839 840 if p.peek() == '\\' && p.len() > 1 { 841 p.s = p.s[1:] 842 } else if p.peek() == '(' { 843 depth++ 844 } else if p.peek() == ')' { 845 depth-- 846 } 847 if depth > 0 { 848 comment += p.s[:1] 849 } 850 p.s = p.s[1:] 851 } 852 853 return comment, depth == 0 854} 855 856func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) { 857 dec := p.dec 858 if dec == nil { 859 dec = &rfc2047Decoder 860 } 861 862 // Substitute our own CharsetReader function so that we can tell 863 // whether an error from the Decode method was due to the 864 // CharsetReader (meaning the charset is invalid). 865 // We used to look for the charsetError type in the error result, 866 // but that behaves badly with CharsetReaders other than the 867 // one in rfc2047Decoder. 868 adec := *dec 869 charsetReaderError := false 870 adec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 871 if dec.CharsetReader == nil { 872 charsetReaderError = true 873 return nil, charsetError(charset) 874 } 875 r, err := dec.CharsetReader(charset, input) 876 if err != nil { 877 charsetReaderError = true 878 } 879 return r, err 880 } 881 word, err = adec.Decode(s) 882 if err == nil { 883 return word, true, nil 884 } 885 886 // If the error came from the character set reader 887 // (meaning the character set itself is invalid 888 // but the decoding worked fine until then), 889 // return the original text and the error, 890 // with isEncoded=true. 891 if charsetReaderError { 892 return s, true, err 893 } 894 895 // Ignore invalid RFC 2047 encoded-word errors. 896 return s, false, nil 897} 898 899var rfc2047Decoder = mime.WordDecoder{ 900 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 901 return nil, charsetError(charset) 902 }, 903} 904 905type charsetError string 906 907func (e charsetError) Error() string { 908 return fmt.Sprintf("charset not supported: %q", string(e)) 909} 910 911// isAtext reports whether r is an RFC 5322 atext character. 912// If dot is true, period is included. 913func isAtext(r rune, dot bool) bool { 914 switch r { 915 case '.': 916 return dot 917 918 // RFC 5322 3.2.3. specials 919 case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials 920 return false 921 } 922 return isVchar(r) 923} 924 925// isQtext reports whether r is an RFC 5322 qtext character. 926func isQtext(r rune) bool { 927 // Printable US-ASCII, excluding backslash or quote. 928 if r == '\\' || r == '"' { 929 return false 930 } 931 return isVchar(r) 932} 933 934// quoteString renders a string as an RFC 5322 quoted-string. 935func quoteString(s string) string { 936 var b strings.Builder 937 b.WriteByte('"') 938 for _, r := range s { 939 if isQtext(r) || isWSP(r) { 940 b.WriteRune(r) 941 } else if isVchar(r) { 942 b.WriteByte('\\') 943 b.WriteRune(r) 944 } 945 } 946 b.WriteByte('"') 947 return b.String() 948} 949 950// isVchar reports whether r is an RFC 5322 VCHAR character. 951func isVchar(r rune) bool { 952 // Visible (printing) characters. 953 return '!' <= r && r <= '~' || isMultibyte(r) 954} 955 956// isMultibyte reports whether r is a multi-byte UTF-8 character 957// as supported by RFC 6532. 958func isMultibyte(r rune) bool { 959 return r >= utf8.RuneSelf 960} 961 962// isWSP reports whether r is a WSP (white space). 963// WSP is a space or horizontal tab (RFC 5234 Appendix B). 964func isWSP(r rune) bool { 965 return r == ' ' || r == '\t' 966} 967 968// isDtext reports whether r is an RFC 5322 dtext character. 969func isDtext(r rune) bool { 970 // Printable US-ASCII, excluding "[", "]", or "\". 971 if r == '[' || r == ']' || r == '\\' { 972 return false 973 } 974 return isVchar(r) 975} 976