1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package markdown
6
7import (
8	"bytes"
9	"fmt"
10	"strings"
11	"unicode/utf8"
12
13	"golang.org/x/text/cases"
14)
15
16func parseLinkRefDef(p buildState, s string) (int, bool) {
17	// “A link reference definition consists of a link label,
18	// optionally preceded by up to three spaces of indentation,
19	// followed by a colon (:),
20	// optional spaces or tabs (including up to one line ending),
21	// a link destination,
22	// optional spaces or tabs (including up to one line ending),
23	// and an optional link title,
24	// which if it is present must be separated from the link destination
25	// by spaces or tabs. No further character may occur.”
26	i := skipSpace(s, 0)
27	label, i, ok := parseLinkLabel(p.(*parseState), s, i)
28	if !ok || i >= len(s) || s[i] != ':' {
29		return 0, false
30	}
31	i = skipSpace(s, i+1)
32	suf := s[i:]
33	dest, i, ok := parseLinkDest(s, i)
34	if !ok {
35		if suf != "" && suf[0] == '<' {
36			// Goldmark treats <<> as a link definition.
37			p.(*parseState).corner = true
38		}
39		return 0, false
40	}
41	moved := false
42	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
43		moved = true
44		i++
45	}
46
47	// Take title if present and doesn't break parse.
48	j := i
49	if j >= len(s) || s[j] == '\n' {
50		moved = true
51		if j < len(s) {
52			j++
53		}
54	}
55
56	var title string
57	var titleChar byte
58	var corner bool
59	if moved {
60		for j < len(s) && (s[j] == ' ' || s[j] == '\t') {
61			j++
62		}
63		if t, c, j, ok := parseLinkTitle(s, j); ok {
64			for j < len(s) && (s[j] == ' ' || s[j] == '\t') {
65				j++
66			}
67			if j >= len(s) || s[j] == '\n' {
68				i = j
69				if t == "" {
70					// Goldmark adds title="" in this case.
71					// We do not, nor does the Dingus.
72					corner = true
73				}
74				title = t
75				titleChar = c
76			}
77		}
78	}
79
80	// Must end line. Already trimmed spaces.
81	if i < len(s) && s[i] != '\n' {
82		return 0, false
83	}
84	if i < len(s) {
85		i++
86	}
87
88	label = normalizeLabel(label)
89	if p.link(label) == nil {
90		p.defineLink(label, &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner})
91	}
92	return i, true
93}
94
95func parseLinkTitle(s string, i int) (title string, char byte, next int, found bool) {
96	if i < len(s) && (s[i] == '"' || s[i] == '\'' || s[i] == '(') {
97		want := s[i]
98		if want == '(' {
99			want = ')'
100		}
101		j := i + 1
102		for ; j < len(s); j++ {
103			if s[j] == want {
104				title := s[i+1 : j]
105				// TODO: Validate title?
106				return mdUnescaper.Replace(title), want, j + 1, true
107			}
108			if s[j] == '(' && want == ')' {
109				break
110			}
111			if s[j] == '\\' && j+1 < len(s) {
112				j++
113			}
114		}
115	}
116	return "", 0, 0, false
117}
118
119func parseLinkLabel(p *parseState, s string, i int) (string, int, bool) {
120	// “A link label begins with a left bracket ([) and ends with
121	// the first right bracket (]) that is not backslash-escaped.
122	// Between these brackets there must be at least one character
123	// that is not a space, tab, or line ending.
124	// Unescaped square bracket characters are not allowed
125	// inside the opening and closing square brackets of link labels.
126	// A link label can have at most 999 characters inside the square brackets.”
127	if i >= len(s) || s[i] != '[' {
128		return "", 0, false
129	}
130	j := i + 1
131	for ; j < len(s); j++ {
132		if s[j] == ']' {
133			if j-(i+1) > 999 {
134				// Goldmark does not apply 999 limit.
135				p.corner = true
136				break
137			}
138			if label := trimSpaceTabNewline(s[i+1 : j]); label != "" {
139				// Note: CommonMark Dingus does not escape.
140				return label, j + 1, true
141			}
142			break
143		}
144		if s[j] == '[' {
145			break
146		}
147		if s[j] == '\\' && j+1 < len(s) {
148			j++
149		}
150	}
151	return "", 0, false
152}
153
154func normalizeLabel(s string) string {
155	if strings.Contains(s, "[") || strings.Contains(s, "]") {
156		// Labels cannot have [ ] so avoid the work of translating.
157		// This is especially important for pathlogical cases like
158		// [[[[[[[[[[a]]]]]]]]]] which would otherwise generate quadratic
159		// amounts of garbage.
160		return ""
161	}
162
163	// “To normalize a label, strip off the opening and closing brackets,
164	// perform the Unicode case fold, strip leading and trailing spaces, tabs, and line endings,
165	// and collapse consecutive internal spaces, tabs, and line endings to a single space.”
166	s = trimSpaceTabNewline(s)
167	var b strings.Builder
168	space := false
169	hi := false
170	for i := 0; i < len(s); i++ {
171		c := s[i]
172		switch c {
173		case ' ', '\t', '\n':
174			space = true
175			continue
176		default:
177			if space {
178				b.WriteByte(' ')
179				space = false
180			}
181			if 'A' <= c && c <= 'Z' {
182				c += 'a' - 'A'
183			}
184			if c >= 0x80 {
185				hi = true
186			}
187			b.WriteByte(c)
188		}
189	}
190	s = b.String()
191	if hi {
192		s = cases.Fold().String(s)
193	}
194	return s
195}
196
197func parseLinkDest(s string, i int) (string, int, bool) {
198	if i >= len(s) {
199		return "", 0, false
200	}
201
202	// “A sequence of zero or more characters between an opening < and a closing >
203	// that contains no line endings or unescaped < or > characters,”
204	if s[i] == '<' {
205		for j := i + 1; ; j++ {
206			if j >= len(s) || s[j] == '\n' || s[j] == '<' {
207				return "", 0, false
208			}
209			if s[j] == '>' {
210				// TODO unescape?
211				return mdUnescape(s[i+1 : j]), j + 1, true
212			}
213			if s[j] == '\\' {
214				j++
215			}
216		}
217	}
218
219	// “or a nonempty sequence of characters that does not start with <,
220	// does not include ASCII control characters or space character,
221	// and includes parentheses only if (a) they are backslash-escaped
222	// or (b) they are part of a balanced pair of unescaped parentheses.
223	depth := 0
224	j := i
225Loop:
226	for ; j < len(s); j++ {
227		switch s[j] {
228		case '(':
229			depth++
230			if depth > 32 {
231				// Avoid quadratic inputs by stopping if too deep.
232				// This is the same depth that cmark-gfm uses.
233				return "", 0, false
234			}
235		case ')':
236			if depth == 0 {
237				break Loop
238			}
239			depth--
240		case '\\':
241			if j+1 < len(s) {
242				if s[j+1] == ' ' || s[j+1] == '\t' {
243					return "", 0, false
244				}
245				j++
246			}
247		case ' ', '\t', '\n':
248			break Loop
249		}
250	}
251
252	dest := s[i:j]
253	// TODO: Validate dest?
254	// TODO: Unescape?
255	// NOTE: CommonMark Dingus does not reject control characters.
256	return mdUnescape(dest), j, true
257}
258
259func parseAutoLinkURI(s string, i int) (Inline, int, bool) {
260	// CommonMark 0.30:
261	//
262	//	For purposes of this spec, a scheme is any sequence of 2–32 characters
263	//	beginning with an ASCII letter and followed by any combination of
264	//	ASCII letters, digits, or the symbols plus (”+”), period (”.”), or
265	//	hyphen (”-”).
266	//
267	//	An absolute URI, for these purposes, consists of a scheme followed by
268	//	a colon (:) followed by zero or more characters other ASCII control
269	//	characters, space, <, and >. If the URI includes these characters,
270	//	they must be percent-encoded (e.g. %20 for a space).
271
272	j := i
273	if j+1 >= len(s) || s[j] != '<' || !isLetter(s[j+1]) {
274		return nil, 0, false
275	}
276	j++
277	for j < len(s) && isScheme(s[j]) && j-(i+1) <= 32 {
278		j++
279	}
280	if j-(i+1) < 2 || j-(i+1) > 32 || j >= len(s) || s[j] != ':' {
281		return nil, 0, false
282	}
283	j++
284	for j < len(s) && isURL(s[j]) {
285		j++
286	}
287	if j >= len(s) || s[j] != '>' {
288		return nil, 0, false
289	}
290	link := s[i+1 : j]
291	// link = mdUnescaper.Replace(link)
292	return &AutoLink{link, link}, j + 1, true
293}
294
295func parseAutoLinkEmail(s string, i int) (Inline, int, bool) {
296	// CommonMark 0.30:
297	//
298	//	An email address, for these purposes, is anything that matches
299	//	the non-normative regex from the HTML5 spec:
300	//
301	//	/^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
302
303	j := i
304	if j+1 >= len(s) || s[j] != '<' || !isUser(s[j+1]) {
305		return nil, 0, false
306	}
307	j++
308	for j < len(s) && isUser(s[j]) {
309		j++
310	}
311	if j >= len(s) || s[j] != '@' {
312		return nil, 0, false
313	}
314	for {
315		j++
316		n, ok := skipDomainElem(s[j:])
317		if !ok {
318			return nil, 0, false
319		}
320		j += n
321		if j >= len(s) || s[j] != '.' && s[j] != '>' {
322			return nil, 0, false
323		}
324		if s[j] == '>' {
325			break
326		}
327	}
328	email := s[i+1 : j]
329	return &AutoLink{email, "mailto:" + email}, j + 1, true
330}
331
332func isUser(c byte) bool {
333	if isLetterDigit(c) {
334		return true
335	}
336	s := ".!#$%&'*+/=?^_`{|}~-"
337	for i := 0; i < len(s); i++ {
338		if c == s[i] {
339			return true
340		}
341	}
342	return false
343}
344
345func isHexDigit(c byte) bool {
346	return 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' || '0' <= c && c <= '9'
347}
348
349func isDigit(c byte) bool {
350	return '0' <= c && c <= '9'
351}
352
353func skipDomainElem(s string) (int, bool) {
354	// String of LDH, up to 63 in length, with LetterDigit
355	// at both ends (1-letter/digit names are OK).
356	// Aka /[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?/.
357	if len(s) < 1 || !isLetterDigit(s[0]) {
358		return 0, false
359	}
360	i := 1
361	for i < len(s) && isLDH(s[i]) && i <= 63 {
362		i++
363	}
364	if i > 63 || !isLetterDigit(s[i-1]) {
365		return 0, false
366	}
367	return i, true
368}
369
370func isScheme(c byte) bool {
371	return isLetterDigit(c) || c == '+' || c == '.' || c == '-'
372}
373
374func isURL(c byte) bool {
375	return c > ' ' && c != '<' && c != '>'
376}
377
378type AutoLink struct {
379	Text string
380	URL  string
381}
382
383func (*AutoLink) Inline() {}
384
385func (x *AutoLink) PrintHTML(buf *bytes.Buffer) {
386	fmt.Fprintf(buf, "<a href=\"%s\">%s</a>", htmlLinkEscaper.Replace(x.URL), htmlEscaper.Replace(x.Text))
387}
388
389func (x *AutoLink) printMarkdown(buf *bytes.Buffer) {
390	fmt.Fprintf(buf, "<%s>", x.Text)
391}
392
393func (x *AutoLink) PrintText(buf *bytes.Buffer) {
394	fmt.Fprintf(buf, "%s", htmlEscaper.Replace(x.Text))
395}
396
397type Link struct {
398	Inner     []Inline
399	URL       string
400	Title     string
401	TitleChar byte // ', " or )
402	corner    bool
403}
404
405func (*Link) Inline() {}
406
407func (x *Link) PrintHTML(buf *bytes.Buffer) {
408	fmt.Fprintf(buf, "<a href=\"%s\"", htmlLinkEscaper.Replace(x.URL))
409	if x.Title != "" {
410		fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title))
411	}
412	buf.WriteString(">")
413	for _, c := range x.Inner {
414		c.PrintHTML(buf)
415	}
416	buf.WriteString("</a>")
417}
418
419func (x *Link) printMarkdown(buf *bytes.Buffer) {
420	buf.WriteByte('[')
421	x.printRemainingMarkdown(buf)
422}
423
424func (x *Link) printRemainingMarkdown(buf *bytes.Buffer) {
425	for _, c := range x.Inner {
426		c.printMarkdown(buf)
427	}
428	buf.WriteString("](")
429	buf.WriteString(x.URL)
430	printLinkTitleMarkdown(buf, x.Title, x.TitleChar)
431	buf.WriteByte(')')
432}
433
434func printLinkTitleMarkdown(buf *bytes.Buffer, title string, titleChar byte) {
435	if title == "" {
436		return
437	}
438	closeChar := titleChar
439	openChar := closeChar
440	if openChar == ')' {
441		openChar = '('
442	}
443	fmt.Fprintf(buf, " %c%s%c", openChar, title /*TODO(jba): escape*/, closeChar)
444}
445
446func (x *Link) PrintText(buf *bytes.Buffer) {
447	for _, c := range x.Inner {
448		c.PrintText(buf)
449	}
450}
451
452type Image struct {
453	Inner     []Inline
454	URL       string
455	Title     string
456	TitleChar byte
457	corner    bool
458}
459
460func (*Image) Inline() {}
461
462func (x *Image) PrintHTML(buf *bytes.Buffer) {
463	fmt.Fprintf(buf, "<img src=\"%s\"", htmlLinkEscaper.Replace(x.URL))
464	fmt.Fprintf(buf, " alt=\"")
465	i := buf.Len()
466	for _, c := range x.Inner {
467		c.PrintText(buf)
468	}
469	// GitHub and Goldmark both rewrite \n to space
470	// but the Dingus does not.
471	// The spec says title can be split across lines but not
472	// what happens at that point.
473	out := buf.Bytes()
474	for ; i < len(out); i++ {
475		if out[i] == '\n' {
476			out[i] = ' '
477		}
478	}
479	fmt.Fprintf(buf, "\"")
480	if x.Title != "" {
481		fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title))
482	}
483	buf.WriteString(" />")
484}
485
486func (x *Image) printMarkdown(buf *bytes.Buffer) {
487	buf.WriteString("![")
488	(*Link)(x).printRemainingMarkdown(buf)
489}
490
491func (x *Image) PrintText(buf *bytes.Buffer) {
492	for _, c := range x.Inner {
493		c.PrintText(buf)
494	}
495}
496
497// GitHub Flavored Markdown autolinks extension
498// https://github.github.com/gfm/#autolinks-extension-
499
500// autoLinkMore rewrites any extended autolinks in the body
501// and returns the result.
502//
503// body is a list of Plain, Emph, Strong, and Del nodes.
504// Two Plains only appear consecutively when one is a
505// potential emphasis marker that ended up being plain after all, like "_" or "**".
506// There are no Link nodes.
507//
508// The GitHub “spec” declares that “autolinks can only come at the
509// beginning of a line, after whitespace, or any of the delimiting
510// characters *, _, ~, and (”. However, the GitHub web site does not
511// enforce this rule: text like "[email protected] is my email" links the
512// text following the $ as an email address. It appears the actual rule
513// is that autolinks cannot come after ASCII letters, although they can
514// come after numbers or Unicode letters.
515// Since the only point of implementing GitHub Flavored Markdown
516// is to match GitHub's behavior, we do what they do, not what they say,
517// at least for now.
518func (p *parseState) autoLinkText(list []Inline) []Inline {
519	if !p.AutoLinkText {
520		return list
521	}
522
523	var out []Inline // allocated lazily when we first change list
524	for i, x := range list {
525		switch x := x.(type) {
526		case *Plain:
527			if rewrite := p.autoLinkPlain(x.Text); rewrite != nil {
528				if out == nil {
529					out = append(out, list[:i]...)
530				}
531				out = append(out, rewrite...)
532				continue
533			}
534		case *Strong:
535			x.Inner = p.autoLinkText(x.Inner)
536		case *Del:
537			x.Inner = p.autoLinkText(x.Inner)
538		case *Emph:
539			x.Inner = p.autoLinkText(x.Inner)
540		}
541		if out != nil {
542			out = append(out, x)
543		}
544	}
545	if out == nil {
546		return list
547	}
548	return out
549}
550
551func (p *parseState) autoLinkPlain(s string) []Inline {
552	vd := &validDomainChecker{s: s}
553	var out []Inline
554Restart:
555	for i := 0; i < len(s); i++ {
556		c := s[i]
557		if c == '@' {
558			if before, link, after, ok := p.parseAutoEmail(s, i); ok {
559				if before != "" {
560					out = append(out, &Plain{Text: before})
561				}
562				out = append(out, link)
563				vd.skip(len(s) - len(after))
564				s = after
565				goto Restart
566			}
567		}
568
569		if (c == 'h' || c == 'm' || c == 'x' || c == 'w') && (i == 0 || !isLetter(s[i-1])) {
570			if link, after, ok := p.parseAutoProto(s, i, vd); ok {
571				if i > 0 {
572					out = append(out, &Plain{Text: s[:i]})
573				}
574				out = append(out, link)
575				vd.skip(len(s) - len(after))
576				s = after
577				goto Restart
578			}
579		}
580	}
581	if out == nil {
582		return nil
583	}
584	out = append(out, &Plain{Text: s})
585	return out
586}
587
588func (p *parseState) parseAutoProto(s string, i int, vd *validDomainChecker) (link *Link, after string, found bool) {
589	if s == "" {
590		return
591	}
592	switch s[i] {
593	case 'h':
594		var n int
595		if strings.HasPrefix(s[i:], "https://") {
596			n = len("https://")
597		} else if strings.HasPrefix(s[i:], "http://") {
598			n = len("http://")
599		} else {
600			return
601		}
602		return p.parseAutoHTTP(s[i:i+n], s, i, i+n, i+n+1, vd)
603	case 'w':
604		if !strings.HasPrefix(s[i:], "www.") {
605			return
606		}
607		// GitHub Flavored Markdown says to use http://,
608		// but it's not 1985 anymore. We live in the https:// future
609		// (unless the parser is explicitly configured otherwise).
610		// People who really care in their docs can write http:// themselves.
611		scheme := "https://"
612		if p.AutoLinkAssumeHTTP {
613			scheme = "http://"
614		}
615		return p.parseAutoHTTP(scheme, s, i, i, i+3, vd)
616	case 'm':
617		if !strings.HasPrefix(s[i:], "mailto:") {
618			return
619		}
620		return p.parseAutoMailto(s, i)
621	case 'x':
622		if !strings.HasPrefix(s[i:], "xmpp:") {
623			return
624		}
625		return p.parseAutoXmpp(s, i)
626	}
627	return
628}
629
630// parseAutoWWW parses an extended www autolink.
631// https://github.github.com/gfm/#extended-www-autolink
632func (p *parseState) parseAutoHTTP(scheme, s string, textstart, start, min int, vd *validDomainChecker) (link *Link, after string, found bool) {
633	n, ok := vd.parseValidDomain(start)
634	if !ok {
635		return
636	}
637	i := start + n
638	domEnd := i
639
640	// “After a valid domain, zero or more non-space non-< characters may follow.”
641	paren := 0
642	for i < len(s) {
643		r, n := utf8.DecodeRuneInString(s[i:])
644		if isUnicodeSpace(r) || r == '<' {
645			break
646		}
647		if r == '(' {
648			paren++
649		}
650		if r == ')' {
651			paren--
652		}
653		i += n
654	}
655
656	// https://github.github.com/gfm/#extended-autolink-path-validation
657Trim:
658	for i > min {
659		switch s[i-1] {
660		case '?', '!', '.', ',', ':', '@', '_', '~':
661			// Trim certain trailing punctuation.
662			i--
663			continue Trim
664
665		case ')':
666			// Trim trailing unmatched (by count only) parens.
667			if paren < 0 {
668				for s[i-1] == ')' && paren < 0 {
669					paren++
670					i--
671				}
672				continue Trim
673			}
674
675		case ';':
676			// Trim entity reference.
677			// After doing the work of the scan, we either cut that part off the string
678			// or we stop the trimming entirely, so there's no chance of repeating
679			// the scan on a future iteration and going accidentally quadratic.
680			// Even though the Markdown spec already requires having a complete
681			// list of all the HTML entities, the GitHub definition here just requires
682			// "looks like" an entity, meaning its an ampersand, letters/digits, and semicolon.
683			for j := i - 2; j > start; j-- {
684				if j < i-2 && s[j] == '&' {
685					i = j
686					continue Trim
687				}
688				if !isLetterDigit(s[j]) {
689					break Trim
690				}
691			}
692		}
693		break Trim
694	}
695
696	// According to the literal text of the GitHub Flavored Markdown spec
697	// and the actual behavior on GitHub,
698	// www.example.com$foo turns into <a href="https://www.example.com$foo">,
699	// but that makes the character restrictions in the valid-domain check
700	// almost meaningless. So we insist that when all is said and done,
701	// if the domain is followed by anything, that thing must be a slash,
702	// even though GitHub is not that picky.
703	// People might complain about www.example.com:1234 not working,
704	// but if you want to get fancy with that kind of thing, just write http:// in front.
705	if textstart == start && i > domEnd && s[domEnd] != '/' {
706		i = domEnd
707	}
708
709	if i < min {
710		return
711	}
712
713	link = &Link{
714		Inner: []Inline{&Plain{Text: s[textstart:i]}},
715		URL:   scheme + s[start:i],
716	}
717	return link, s[i:], true
718}
719
720type validDomainChecker struct {
721	s   string
722	cut int // before this index, no valid domains
723}
724
725func (v *validDomainChecker) skip(i int) {
726	v.s = v.s[i:]
727	v.cut -= i
728}
729
730// parseValidDomain parses a valid domain.
731// https://github.github.com/gfm/#valid-domain
732//
733// If s starts with a valid domain, parseValidDomain returns
734// the length of that domain and true. If s does not start with
735// a valid domain, parseValidDomain returns n, false,
736// where n is the length of a prefix guaranteed not to be acceptable
737// to any future call to parseValidDomain.
738//
739// “A valid domain consists of segments of alphanumeric characters,
740// underscores (_) and hyphens (-) separated by periods (.).
741// There must be at least one period, and no underscores may be
742// present in the last two segments of the domain.”
743//
744// The spec does not spell out whether segments can be empty.
745// Empirically, in GitHub's implementation they can.
746func (v *validDomainChecker) parseValidDomain(start int) (n int, found bool) {
747	if start < v.cut {
748		return 0, false
749	}
750	i := start
751	dots := 0
752	for ; i < len(v.s); i++ {
753		c := v.s[i]
754		if c == '_' {
755			dots = -2
756			continue
757		}
758		if c == '.' {
759			dots++
760			continue
761		}
762		if !isLDH(c) {
763			break
764		}
765	}
766	if dots >= 0 && i > start {
767		return i - start, true
768	}
769	v.cut = i
770	return 0, false
771}
772
773func (p *parseState) parseAutoEmail(s string, i int) (before string, link *Link, after string, ok bool) {
774	if s[i] != '@' {
775		return
776	}
777
778	// “One ore more characters which are alphanumeric, or ., -, _, or +.”
779	j := i
780	for j > 0 && (isLDH(s[j-1]) || s[j-1] == '_' || s[j-1] == '+' || s[j-1] == '.') {
781		j--
782	}
783	if i-j < 1 {
784		return
785	}
786
787	// “One or more characters which are alphanumeric, or - or _, separated by periods (.).
788	// There must be at least one period. The last character must not be one of - or _.”
789	dots := 0
790	k := i + 1
791	for k < len(s) && (isLDH(s[k]) || s[k] == '_' || s[k] == '.') {
792		if s[k] == '.' {
793			if s[k-1] == '.' {
794				// Empirically, .. stops the scan but [email protected] is fine.
795				break
796			}
797			dots++
798		}
799		k++
800	}
801
802	// “., -, and _ can occur on both sides of the @, but only . may occur at the end
803	// of the email address, in which case it will not be considered part of the address”
804	if s[k-1] == '.' {
805		dots--
806		k--
807	}
808	if s[k-1] == '-' || s[k-1] == '_' {
809		return
810	}
811	if k-(i+1)-dots < 2 || dots < 1 {
812		return
813	}
814
815	link = &Link{
816		Inner: []Inline{&Plain{Text: s[j:k]}},
817		URL:   "mailto:" + s[j:k],
818	}
819	return s[:j], link, s[k:], true
820}
821
822func (p *parseState) parseAutoMailto(s string, i int) (link *Link, after string, ok bool) {
823	j := i + len("mailto:")
824	for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') {
825		j++
826	}
827	if j >= len(s) || s[j] != '@' {
828		return
829	}
830	before, link, after, ok := p.parseAutoEmail(s[i:], j-i)
831	if before != "mailto:" || !ok {
832		return nil, "", false
833	}
834	link.Inner[0] = &Plain{Text: s[i : len(s)-len(after)]}
835	return link, after, true
836}
837
838func (p *parseState) parseAutoXmpp(s string, i int) (link *Link, after string, ok bool) {
839	j := i + len("xmpp:")
840	for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') {
841		j++
842	}
843	if j >= len(s) || s[j] != '@' {
844		return
845	}
846	before, link, after, ok := p.parseAutoEmail(s[i:], j-i)
847	if before != "xmpp:" || !ok {
848		return nil, "", false
849	}
850	if after != "" && after[0] == '/' {
851		k := 1
852		for k < len(after) && (isLetterDigit(after[k]) || after[k] == '@' || after[k] == '.') {
853			k++
854		}
855		after = after[k:]
856	}
857	url := s[i : len(s)-len(after)]
858	link.Inner[0] = &Plain{Text: url}
859	link.URL = url
860	return link, after, true
861}
862