1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package mail implements parsing of mail messages.
7
8For the most part, this package follows the syntax as specified by RFC 5322 and
9extended by RFC 6532.
10Notable divergences:
11  - Obsolete address formats are not parsed, including addresses with
12    embedded route information.
13  - The full range of spacing (the CFWS syntax element) is not supported,
14    such as breaking addresses across lines.
15  - No unicode normalization is performed.
16  - A leading From line is permitted, as in mbox format (RFC 4155).
17*/
18package mail
19
20import (
21	"bufio"
22	"errors"
23	"fmt"
24	"io"
25	"log"
26	"mime"
27	"net"
28	"net/textproto"
29	"strings"
30	"sync"
31	"time"
32	"unicode/utf8"
33)
34
35var debug = debugT(false)
36
37type debugT bool
38
39func (d debugT) Printf(format string, args ...any) {
40	if d {
41		log.Printf(format, args...)
42	}
43}
44
45// A Message represents a parsed mail message.
46type Message struct {
47	Header Header
48	Body   io.Reader
49}
50
51// ReadMessage reads a message from r.
52// The headers are parsed, and the body of the message will be available
53// for reading from msg.Body.
54func ReadMessage(r io.Reader) (msg *Message, err error) {
55	tp := textproto.NewReader(bufio.NewReader(r))
56
57	hdr, err := readHeader(tp)
58	if err != nil && (err != io.EOF || len(hdr) == 0) {
59		return nil, err
60	}
61
62	return &Message{
63		Header: Header(hdr),
64		Body:   tp.R,
65	}, nil
66}
67
68// readHeader reads the message headers from r.
69// This is like textproto.ReadMIMEHeader, but doesn't validate.
70// The fix for issue #53188 tightened up net/textproto to enforce
71// restrictions of RFC 7230.
72// This package implements RFC 5322, which does not have those restrictions.
73// This function copies the relevant code from net/textproto,
74// simplified for RFC 5322.
75func readHeader(r *textproto.Reader) (map[string][]string, error) {
76	m := make(map[string][]string)
77
78	// The first line cannot start with a leading space.
79	if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
80		line, err := r.ReadLine()
81		if err != nil {
82			return m, err
83		}
84		return m, errors.New("malformed initial line: " + line)
85	}
86
87	for {
88		kv, err := r.ReadContinuedLine()
89		if kv == "" {
90			return m, err
91		}
92
93		// Key ends at first colon.
94		k, v, ok := strings.Cut(kv, ":")
95		if !ok {
96			return m, errors.New("malformed header line: " + kv)
97		}
98		key := textproto.CanonicalMIMEHeaderKey(k)
99
100		// Permit empty key, because that is what we did in the past.
101		if key == "" {
102			continue
103		}
104
105		// Skip initial spaces in value.
106		value := strings.TrimLeft(v, " \t")
107
108		m[key] = append(m[key], value)
109
110		if err != nil {
111			return m, err
112		}
113	}
114}
115
116// Layouts suitable for passing to time.Parse.
117// These are tried in order.
118var (
119	dateLayoutsBuildOnce sync.Once
120	dateLayouts          []string
121)
122
123func buildDateLayouts() {
124	// Generate layouts based on RFC 5322, section 3.3.
125
126	dows := [...]string{"", "Mon, "}   // day-of-week
127	days := [...]string{"2", "02"}     // day = 1*2DIGIT
128	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
129	seconds := [...]string{":05", ""}  // second
130	// "-0700 (MST)" is not in RFC 5322, but is common.
131	zones := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ...
132
133	for _, dow := range dows {
134		for _, day := range days {
135			for _, year := range years {
136				for _, second := range seconds {
137					for _, zone := range zones {
138						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
139						dateLayouts = append(dateLayouts, s)
140					}
141				}
142			}
143		}
144	}
145}
146
147// ParseDate parses an RFC 5322 date string.
148func ParseDate(date string) (time.Time, error) {
149	dateLayoutsBuildOnce.Do(buildDateLayouts)
150	// CR and LF must match and are tolerated anywhere in the date field.
151	date = strings.ReplaceAll(date, "\r\n", "")
152	if strings.Contains(date, "\r") {
153		return time.Time{}, errors.New("mail: header has a CR without LF")
154	}
155	// Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII
156	p := addrParser{date, nil}
157	p.skipSpace()
158
159	// RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
160	// zone length is always 5 chars unless obsolete (obs-zone)
161	if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 {
162		date = p.s[:ind+5]
163		p.s = p.s[ind+5:]
164	} else {
165		ind := strings.Index(p.s, "T")
166		if ind == 0 {
167			// In this case we have the following date formats:
168			// * Thu, 20 Nov 1997 09:55:06 MDT
169			// * Thu, 20 Nov 1997 09:55:06 MDT (MDT)
170			// * Thu, 20 Nov 1997 09:55:06 MDT (This comment)
171			ind = strings.Index(p.s[1:], "T")
172			if ind != -1 {
173				ind++
174			}
175		}
176
177		if ind != -1 && len(p.s) >= ind+5 {
178			// The last letter T of the obsolete time zone is checked when no standard time zone is found.
179			// If T is misplaced, the date to parse is garbage.
180			date = p.s[:ind+1]
181			p.s = p.s[ind+1:]
182		}
183	}
184	if !p.skipCFWS() {
185		return time.Time{}, errors.New("mail: misformatted parenthetical comment")
186	}
187	for _, layout := range dateLayouts {
188		t, err := time.Parse(layout, date)
189		if err == nil {
190			return t, nil
191		}
192	}
193	return time.Time{}, errors.New("mail: header could not be parsed")
194}
195
196// A Header represents the key-value pairs in a mail message header.
197type Header map[string][]string
198
199// Get gets the first value associated with the given key.
200// It is case insensitive; CanonicalMIMEHeaderKey is used
201// to canonicalize the provided key.
202// If there are no values associated with the key, Get returns "".
203// To access multiple values of a key, or to use non-canonical keys,
204// access the map directly.
205func (h Header) Get(key string) string {
206	return textproto.MIMEHeader(h).Get(key)
207}
208
209var ErrHeaderNotPresent = errors.New("mail: header not in message")
210
211// Date parses the Date header field.
212func (h Header) Date() (time.Time, error) {
213	hdr := h.Get("Date")
214	if hdr == "" {
215		return time.Time{}, ErrHeaderNotPresent
216	}
217	return ParseDate(hdr)
218}
219
220// AddressList parses the named header field as a list of addresses.
221func (h Header) AddressList(key string) ([]*Address, error) {
222	hdr := h.Get(key)
223	if hdr == "" {
224		return nil, ErrHeaderNotPresent
225	}
226	return ParseAddressList(hdr)
227}
228
229// Address represents a single mail address.
230// An address such as "Barry Gibbs <[email protected]>" is represented
231// as Address{Name: "Barry Gibbs", Address: "[email protected]"}.
232type Address struct {
233	Name    string // Proper name; may be empty.
234	Address string // user@domain
235}
236
237// ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <[email protected]>"
238func ParseAddress(address string) (*Address, error) {
239	return (&addrParser{s: address}).parseSingleAddress()
240}
241
242// ParseAddressList parses the given string as a list of addresses.
243func ParseAddressList(list string) ([]*Address, error) {
244	return (&addrParser{s: list}).parseAddressList()
245}
246
247// An AddressParser is an RFC 5322 address parser.
248type AddressParser struct {
249	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
250	WordDecoder *mime.WordDecoder
251}
252
253// Parse parses a single RFC 5322 address of the
254// form "Gogh Fir <[email protected]>" or "[email protected]".
255func (p *AddressParser) Parse(address string) (*Address, error) {
256	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
257}
258
259// ParseList parses the given string as a list of comma-separated addresses
260// of the form "Gogh Fir <[email protected]>" or "[email protected]".
261func (p *AddressParser) ParseList(list string) ([]*Address, error) {
262	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
263}
264
265// String formats the address as a valid RFC 5322 address.
266// If the address's name contains non-ASCII characters
267// the name will be rendered according to RFC 2047.
268func (a *Address) String() string {
269	// Format address local@domain
270	at := strings.LastIndex(a.Address, "@")
271	var local, domain string
272	if at < 0 {
273		// This is a malformed address ("@" is required in addr-spec);
274		// treat the whole address as local-part.
275		local = a.Address
276	} else {
277		local, domain = a.Address[:at], a.Address[at+1:]
278	}
279
280	// Add quotes if needed
281	quoteLocal := false
282	for i, r := range local {
283		if isAtext(r, false) {
284			continue
285		}
286		if r == '.' {
287			// Dots are okay if they are surrounded by atext.
288			// We only need to check that the previous byte is
289			// not a dot, and this isn't the end of the string.
290			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
291				continue
292			}
293		}
294		quoteLocal = true
295		break
296	}
297	if quoteLocal {
298		local = quoteString(local)
299
300	}
301
302	s := "<" + local + "@" + domain + ">"
303
304	if a.Name == "" {
305		return s
306	}
307
308	// If every character is printable ASCII, quoting is simple.
309	allPrintable := true
310	for _, r := range a.Name {
311		// isWSP here should actually be isFWS,
312		// but we don't support folding yet.
313		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
314			allPrintable = false
315			break
316		}
317	}
318	if allPrintable {
319		return quoteString(a.Name) + " " + s
320	}
321
322	// Text in an encoded-word in a display-name must not contain certain
323	// characters like quotes or parentheses (see RFC 2047 section 5.3).
324	// When this is the case encode the name using base64 encoding.
325	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
326		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
327	}
328	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
329}
330
331type addrParser struct {
332	s   string
333	dec *mime.WordDecoder // may be nil
334}
335
336func (p *addrParser) parseAddressList() ([]*Address, error) {
337	var list []*Address
338	for {
339		p.skipSpace()
340
341		// allow skipping empty entries (RFC5322 obs-addr-list)
342		if p.consume(',') {
343			continue
344		}
345
346		addrs, err := p.parseAddress(true)
347		if err != nil {
348			return nil, err
349		}
350		list = append(list, addrs...)
351
352		if !p.skipCFWS() {
353			return nil, errors.New("mail: misformatted parenthetical comment")
354		}
355		if p.empty() {
356			break
357		}
358		if p.peek() != ',' {
359			return nil, errors.New("mail: expected comma")
360		}
361
362		// Skip empty entries for obs-addr-list.
363		for p.consume(',') {
364			p.skipSpace()
365		}
366		if p.empty() {
367			break
368		}
369	}
370	return list, nil
371}
372
373func (p *addrParser) parseSingleAddress() (*Address, error) {
374	addrs, err := p.parseAddress(true)
375	if err != nil {
376		return nil, err
377	}
378	if !p.skipCFWS() {
379		return nil, errors.New("mail: misformatted parenthetical comment")
380	}
381	if !p.empty() {
382		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
383	}
384	if len(addrs) == 0 {
385		return nil, errors.New("mail: empty group")
386	}
387	if len(addrs) > 1 {
388		return nil, errors.New("mail: group with multiple addresses")
389	}
390	return addrs[0], nil
391}
392
393// parseAddress parses a single RFC 5322 address at the start of p.
394func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
395	debug.Printf("parseAddress: %q", p.s)
396	p.skipSpace()
397	if p.empty() {
398		return nil, errors.New("mail: no address")
399	}
400
401	// address = mailbox / group
402	// mailbox = name-addr / addr-spec
403	// group = display-name ":" [group-list] ";" [CFWS]
404
405	// addr-spec has a more restricted grammar than name-addr,
406	// so try parsing it first, and fallback to name-addr.
407	// TODO(dsymonds): Is this really correct?
408	spec, err := p.consumeAddrSpec()
409	if err == nil {
410		var displayName string
411		p.skipSpace()
412		if !p.empty() && p.peek() == '(' {
413			displayName, err = p.consumeDisplayNameComment()
414			if err != nil {
415				return nil, err
416			}
417		}
418
419		return []*Address{{
420			Name:    displayName,
421			Address: spec,
422		}}, err
423	}
424	debug.Printf("parseAddress: not an addr-spec: %v", err)
425	debug.Printf("parseAddress: state is now %q", p.s)
426
427	// display-name
428	var displayName string
429	if p.peek() != '<' {
430		displayName, err = p.consumePhrase()
431		if err != nil {
432			return nil, err
433		}
434	}
435	debug.Printf("parseAddress: displayName=%q", displayName)
436
437	p.skipSpace()
438	if handleGroup {
439		if p.consume(':') {
440			return p.consumeGroupList()
441		}
442	}
443	// angle-addr = "<" addr-spec ">"
444	if !p.consume('<') {
445		atext := true
446		for _, r := range displayName {
447			if !isAtext(r, true) {
448				atext = false
449				break
450			}
451		}
452		if atext {
453			// The input is like "foo.bar"; it's possible the input
454			// meant to be "foo.bar@domain", or "foo.bar <...>".
455			return nil, errors.New("mail: missing '@' or angle-addr")
456		}
457		// The input is like "Full Name", which couldn't possibly be a
458		// valid email address if followed by "@domain"; the input
459		// likely meant to be "Full Name <...>".
460		return nil, errors.New("mail: no angle-addr")
461	}
462	spec, err = p.consumeAddrSpec()
463	if err != nil {
464		return nil, err
465	}
466	if !p.consume('>') {
467		return nil, errors.New("mail: unclosed angle-addr")
468	}
469	debug.Printf("parseAddress: spec=%q", spec)
470
471	return []*Address{{
472		Name:    displayName,
473		Address: spec,
474	}}, nil
475}
476
477func (p *addrParser) consumeGroupList() ([]*Address, error) {
478	var group []*Address
479	// handle empty group.
480	p.skipSpace()
481	if p.consume(';') {
482		if !p.skipCFWS() {
483			return nil, errors.New("mail: misformatted parenthetical comment")
484		}
485		return group, nil
486	}
487
488	for {
489		p.skipSpace()
490		// embedded groups not allowed.
491		addrs, err := p.parseAddress(false)
492		if err != nil {
493			return nil, err
494		}
495		group = append(group, addrs...)
496
497		if !p.skipCFWS() {
498			return nil, errors.New("mail: misformatted parenthetical comment")
499		}
500		if p.consume(';') {
501			if !p.skipCFWS() {
502				return nil, errors.New("mail: misformatted parenthetical comment")
503			}
504			break
505		}
506		if !p.consume(',') {
507			return nil, errors.New("mail: expected comma")
508		}
509	}
510	return group, nil
511}
512
513// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
514func (p *addrParser) consumeAddrSpec() (spec string, err error) {
515	debug.Printf("consumeAddrSpec: %q", p.s)
516
517	orig := *p
518	defer func() {
519		if err != nil {
520			*p = orig
521		}
522	}()
523
524	// local-part = dot-atom / quoted-string
525	var localPart string
526	p.skipSpace()
527	if p.empty() {
528		return "", errors.New("mail: no addr-spec")
529	}
530	if p.peek() == '"' {
531		// quoted-string
532		debug.Printf("consumeAddrSpec: parsing quoted-string")
533		localPart, err = p.consumeQuotedString()
534		if localPart == "" {
535			err = errors.New("mail: empty quoted string in addr-spec")
536		}
537	} else {
538		// dot-atom
539		debug.Printf("consumeAddrSpec: parsing dot-atom")
540		localPart, err = p.consumeAtom(true, false)
541	}
542	if err != nil {
543		debug.Printf("consumeAddrSpec: failed: %v", err)
544		return "", err
545	}
546
547	if !p.consume('@') {
548		return "", errors.New("mail: missing @ in addr-spec")
549	}
550
551	// domain = dot-atom / domain-literal
552	var domain string
553	p.skipSpace()
554	if p.empty() {
555		return "", errors.New("mail: no domain in addr-spec")
556	}
557
558	if p.peek() == '[' {
559		// domain-literal
560		domain, err = p.consumeDomainLiteral()
561		if err != nil {
562			return "", err
563		}
564	} else {
565		// dot-atom
566		domain, err = p.consumeAtom(true, false)
567		if err != nil {
568			return "", err
569		}
570	}
571
572	return localPart + "@" + domain, nil
573}
574
575// consumePhrase parses the RFC 5322 phrase at the start of p.
576func (p *addrParser) consumePhrase() (phrase string, err error) {
577	debug.Printf("consumePhrase: [%s]", p.s)
578	// phrase = 1*word
579	var words []string
580	var isPrevEncoded bool
581	for {
582		// obs-phrase allows CFWS after one word
583		if len(words) > 0 {
584			if !p.skipCFWS() {
585				return "", errors.New("mail: misformatted parenthetical comment")
586			}
587		}
588		// word = atom / quoted-string
589		var word string
590		p.skipSpace()
591		if p.empty() {
592			break
593		}
594		isEncoded := false
595		if p.peek() == '"' {
596			// quoted-string
597			word, err = p.consumeQuotedString()
598		} else {
599			// atom
600			// We actually parse dot-atom here to be more permissive
601			// than what RFC 5322 specifies.
602			word, err = p.consumeAtom(true, true)
603			if err == nil {
604				word, isEncoded, err = p.decodeRFC2047Word(word)
605			}
606		}
607
608		if err != nil {
609			break
610		}
611		debug.Printf("consumePhrase: consumed %q", word)
612		if isPrevEncoded && isEncoded {
613			words[len(words)-1] += word
614		} else {
615			words = append(words, word)
616		}
617		isPrevEncoded = isEncoded
618	}
619	// Ignore any error if we got at least one word.
620	if err != nil && len(words) == 0 {
621		debug.Printf("consumePhrase: hit err: %v", err)
622		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
623	}
624	phrase = strings.Join(words, " ")
625	return phrase, nil
626}
627
628// consumeQuotedString parses the quoted string at the start of p.
629func (p *addrParser) consumeQuotedString() (qs string, err error) {
630	// Assume first byte is '"'.
631	i := 1
632	qsb := make([]rune, 0, 10)
633
634	escaped := false
635
636Loop:
637	for {
638		r, size := utf8.DecodeRuneInString(p.s[i:])
639
640		switch {
641		case size == 0:
642			return "", errors.New("mail: unclosed quoted-string")
643
644		case size == 1 && r == utf8.RuneError:
645			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
646
647		case escaped:
648			//  quoted-pair = ("\" (VCHAR / WSP))
649
650			if !isVchar(r) && !isWSP(r) {
651				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
652			}
653
654			qsb = append(qsb, r)
655			escaped = false
656
657		case isQtext(r) || isWSP(r):
658			// qtext (printable US-ASCII excluding " and \), or
659			// FWS (almost; we're ignoring CRLF)
660			qsb = append(qsb, r)
661
662		case r == '"':
663			break Loop
664
665		case r == '\\':
666			escaped = true
667
668		default:
669			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
670
671		}
672
673		i += size
674	}
675	p.s = p.s[i+1:]
676	return string(qsb), nil
677}
678
679// consumeAtom parses an RFC 5322 atom at the start of p.
680// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
681// If permissive is true, consumeAtom will not fail on:
682// - leading/trailing/double dots in the atom (see golang.org/issue/4938)
683func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
684	i := 0
685
686Loop:
687	for {
688		r, size := utf8.DecodeRuneInString(p.s[i:])
689		switch {
690		case size == 1 && r == utf8.RuneError:
691			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
692
693		case size == 0 || !isAtext(r, dot):
694			break Loop
695
696		default:
697			i += size
698
699		}
700	}
701
702	if i == 0 {
703		return "", errors.New("mail: invalid string")
704	}
705	atom, p.s = p.s[:i], p.s[i:]
706	if !permissive {
707		if strings.HasPrefix(atom, ".") {
708			return "", errors.New("mail: leading dot in atom")
709		}
710		if strings.Contains(atom, "..") {
711			return "", errors.New("mail: double dot in atom")
712		}
713		if strings.HasSuffix(atom, ".") {
714			return "", errors.New("mail: trailing dot in atom")
715		}
716	}
717	return atom, nil
718}
719
720// consumeDomainLiteral parses an RFC 5322 domain-literal at the start of p.
721func (p *addrParser) consumeDomainLiteral() (string, error) {
722	// Skip the leading [
723	if !p.consume('[') {
724		return "", errors.New(`mail: missing "[" in domain-literal`)
725	}
726
727	// Parse the dtext
728	var dtext string
729	for {
730		if p.empty() {
731			return "", errors.New("mail: unclosed domain-literal")
732		}
733		if p.peek() == ']' {
734			break
735		}
736
737		r, size := utf8.DecodeRuneInString(p.s)
738		if size == 1 && r == utf8.RuneError {
739			return "", fmt.Errorf("mail: invalid utf-8 in domain-literal: %q", p.s)
740		}
741		if !isDtext(r) {
742			return "", fmt.Errorf("mail: bad character in domain-literal: %q", r)
743		}
744
745		dtext += p.s[:size]
746		p.s = p.s[size:]
747	}
748
749	// Skip the trailing ]
750	if !p.consume(']') {
751		return "", errors.New("mail: unclosed domain-literal")
752	}
753
754	// Check if the domain literal is an IP address
755	if net.ParseIP(dtext) == nil {
756		return "", fmt.Errorf("mail: invalid IP address in domain-literal: %q", dtext)
757	}
758
759	return "[" + dtext + "]", nil
760}
761
762func (p *addrParser) consumeDisplayNameComment() (string, error) {
763	if !p.consume('(') {
764		return "", errors.New("mail: comment does not start with (")
765	}
766	comment, ok := p.consumeComment()
767	if !ok {
768		return "", errors.New("mail: misformatted parenthetical comment")
769	}
770
771	// TODO(stapelberg): parse quoted-string within comment
772	words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
773	for idx, word := range words {
774		decoded, isEncoded, err := p.decodeRFC2047Word(word)
775		if err != nil {
776			return "", err
777		}
778		if isEncoded {
779			words[idx] = decoded
780		}
781	}
782
783	return strings.Join(words, " "), nil
784}
785
786func (p *addrParser) consume(c byte) bool {
787	if p.empty() || p.peek() != c {
788		return false
789	}
790	p.s = p.s[1:]
791	return true
792}
793
794// skipSpace skips the leading space and tab characters.
795func (p *addrParser) skipSpace() {
796	p.s = strings.TrimLeft(p.s, " \t")
797}
798
799func (p *addrParser) peek() byte {
800	return p.s[0]
801}
802
803func (p *addrParser) empty() bool {
804	return p.len() == 0
805}
806
807func (p *addrParser) len() int {
808	return len(p.s)
809}
810
811// skipCFWS skips CFWS as defined in RFC5322.
812func (p *addrParser) skipCFWS() bool {
813	p.skipSpace()
814
815	for {
816		if !p.consume('(') {
817			break
818		}
819
820		if _, ok := p.consumeComment(); !ok {
821			return false
822		}
823
824		p.skipSpace()
825	}
826
827	return true
828}
829
830func (p *addrParser) consumeComment() (string, bool) {
831	// '(' already consumed.
832	depth := 1
833
834	var comment string
835	for {
836		if p.empty() || depth == 0 {
837			break
838		}
839
840		if p.peek() == '\\' && p.len() > 1 {
841			p.s = p.s[1:]
842		} else if p.peek() == '(' {
843			depth++
844		} else if p.peek() == ')' {
845			depth--
846		}
847		if depth > 0 {
848			comment += p.s[:1]
849		}
850		p.s = p.s[1:]
851	}
852
853	return comment, depth == 0
854}
855
856func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
857	dec := p.dec
858	if dec == nil {
859		dec = &rfc2047Decoder
860	}
861
862	// Substitute our own CharsetReader function so that we can tell
863	// whether an error from the Decode method was due to the
864	// CharsetReader (meaning the charset is invalid).
865	// We used to look for the charsetError type in the error result,
866	// but that behaves badly with CharsetReaders other than the
867	// one in rfc2047Decoder.
868	adec := *dec
869	charsetReaderError := false
870	adec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
871		if dec.CharsetReader == nil {
872			charsetReaderError = true
873			return nil, charsetError(charset)
874		}
875		r, err := dec.CharsetReader(charset, input)
876		if err != nil {
877			charsetReaderError = true
878		}
879		return r, err
880	}
881	word, err = adec.Decode(s)
882	if err == nil {
883		return word, true, nil
884	}
885
886	// If the error came from the character set reader
887	// (meaning the character set itself is invalid
888	// but the decoding worked fine until then),
889	// return the original text and the error,
890	// with isEncoded=true.
891	if charsetReaderError {
892		return s, true, err
893	}
894
895	// Ignore invalid RFC 2047 encoded-word errors.
896	return s, false, nil
897}
898
899var rfc2047Decoder = mime.WordDecoder{
900	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
901		return nil, charsetError(charset)
902	},
903}
904
905type charsetError string
906
907func (e charsetError) Error() string {
908	return fmt.Sprintf("charset not supported: %q", string(e))
909}
910
911// isAtext reports whether r is an RFC 5322 atext character.
912// If dot is true, period is included.
913func isAtext(r rune, dot bool) bool {
914	switch r {
915	case '.':
916		return dot
917
918	// RFC 5322 3.2.3. specials
919	case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
920		return false
921	}
922	return isVchar(r)
923}
924
925// isQtext reports whether r is an RFC 5322 qtext character.
926func isQtext(r rune) bool {
927	// Printable US-ASCII, excluding backslash or quote.
928	if r == '\\' || r == '"' {
929		return false
930	}
931	return isVchar(r)
932}
933
934// quoteString renders a string as an RFC 5322 quoted-string.
935func quoteString(s string) string {
936	var b strings.Builder
937	b.WriteByte('"')
938	for _, r := range s {
939		if isQtext(r) || isWSP(r) {
940			b.WriteRune(r)
941		} else if isVchar(r) {
942			b.WriteByte('\\')
943			b.WriteRune(r)
944		}
945	}
946	b.WriteByte('"')
947	return b.String()
948}
949
950// isVchar reports whether r is an RFC 5322 VCHAR character.
951func isVchar(r rune) bool {
952	// Visible (printing) characters.
953	return '!' <= r && r <= '~' || isMultibyte(r)
954}
955
956// isMultibyte reports whether r is a multi-byte UTF-8 character
957// as supported by RFC 6532.
958func isMultibyte(r rune) bool {
959	return r >= utf8.RuneSelf
960}
961
962// isWSP reports whether r is a WSP (white space).
963// WSP is a space or horizontal tab (RFC 5234 Appendix B).
964func isWSP(r rune) bool {
965	return r == ' ' || r == '\t'
966}
967
968// isDtext reports whether r is an RFC 5322 dtext character.
969func isDtext(r rune) bool {
970	// Printable US-ASCII, excluding "[", "]", or "\".
971	if r == '[' || r == ']' || r == '\\' {
972		return false
973	}
974	return isVchar(r)
975}
976