1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package markdown
6
7import (
8	"bytes"
9	"fmt"
10	"strings"
11	"unicode"
12	"unicode/utf8"
13)
14
15/*
16text node can be
17
18 - other literal text
19 - run of * or _ characters
20 - [
21 - ![
22
23keep delimiter stack pointing at non-other literal text
24each node contains
25
26 - type of delimiter [ ![ _ *
27 - number of delimiters
28 - active or not
29 - potential opener, potential closer, or obth
30
31when a ] is hit, call look for link or image
32when end is hit, call process emphasis
33
34look for link or image:
35
36	find topmost [ or ![
37	if none, emit literal ]
38	if its inactive, remove and emit literal ]
39	parse ahead to look for rest of link; if none, remove and emit literal ]
40	run process emphasis on the interior,
41	remove opener
42	if this was a link (not an image), set all [ before opener to inactive, to avoid links inside links
43
44process emphasis
45
46	walk forward in list to find a closer.
47	walk back to find first potential matching opener.
48	if found:
49		strong for length >= 2
50		insert node
51		drop delimiters between opener and closer
52		remove 1 or 2 from open/close count, removing if now empty
53		if closing has some left, go around again on this node
54	if not:
55		set openers bottom for this kind of element to before current_position
56		if the closer at current pos is not an opener, remove it
57
58seems needlessly complex. two passes
59
60scan and find ` ` first.
61
62pass 1. scan and find [ and ]() and leave the rest alone.
63
64each completed one invokes emphasis on inner text and then on the overall list.
65
66*/
67
68type Inline interface {
69	PrintHTML(*bytes.Buffer)
70	PrintText(*bytes.Buffer)
71	printMarkdown(*bytes.Buffer)
72}
73
74type Plain struct {
75	Text string
76}
77
78func (*Plain) Inline() {}
79
80func (x *Plain) PrintHTML(buf *bytes.Buffer) {
81	htmlEscaper.WriteString(buf, x.Text)
82}
83
84func (x *Plain) printMarkdown(buf *bytes.Buffer) {
85	buf.WriteString(x.Text)
86}
87
88func (x *Plain) PrintText(buf *bytes.Buffer) {
89	htmlEscaper.WriteString(buf, x.Text)
90}
91
92type openPlain struct {
93	Plain
94	i int // position in input where bracket is
95}
96
97type emphPlain struct {
98	Plain
99	canOpen  bool
100	canClose bool
101	i        int // position in output where emph is
102	n        int // length of original span
103}
104
105type Escaped struct {
106	Plain
107}
108
109func (x *Escaped) printMarkdown(buf *bytes.Buffer) {
110	buf.WriteByte('\\')
111	x.Plain.printMarkdown(buf)
112}
113
114type Code struct {
115	Text string
116}
117
118func (*Code) Inline() {}
119
120func (x *Code) PrintHTML(buf *bytes.Buffer) {
121	fmt.Fprintf(buf, "<code>%s</code>", htmlEscaper.Replace(x.Text))
122}
123
124func (x *Code) printMarkdown(buf *bytes.Buffer) {
125	if len(x.Text) == 0 {
126		return
127	}
128	// Use the fewest backticks we can, and add spaces as needed.
129	ticks := strings.Repeat("`", longestSequence(x.Text, '`')+1)
130	buf.WriteString(ticks)
131	if x.Text[0] == '`' {
132		buf.WriteByte(' ')
133	}
134	buf.WriteString(x.Text)
135	if x.Text[len(x.Text)-1] == '`' {
136		buf.WriteByte(' ')
137	}
138	buf.WriteString(ticks)
139}
140
141// longestSequence returns the length of the longest sequence of consecutive bytes b in s.
142func longestSequence(s string, b byte) int {
143	max := 0
144	cur := 0
145	for i := range s {
146		if s[i] == b {
147			cur++
148		} else {
149			if cur > max {
150				max = cur
151			}
152			cur = 0
153		}
154	}
155	if cur > max {
156		max = cur
157	}
158	return max
159}
160
161func (x *Code) PrintText(buf *bytes.Buffer) {
162	htmlEscaper.WriteString(buf, x.Text)
163}
164
165type Strong struct {
166	Marker string
167	Inner  []Inline
168}
169
170func (x *Strong) Inline() {
171}
172
173func (x *Strong) PrintHTML(buf *bytes.Buffer) {
174	buf.WriteString("<strong>")
175	for _, c := range x.Inner {
176		c.PrintHTML(buf)
177	}
178	buf.WriteString("</strong>")
179}
180
181func (x *Strong) printMarkdown(buf *bytes.Buffer) {
182	buf.WriteString(x.Marker)
183	for _, c := range x.Inner {
184		c.printMarkdown(buf)
185	}
186	buf.WriteString(x.Marker)
187}
188
189func (x *Strong) PrintText(buf *bytes.Buffer) {
190	for _, c := range x.Inner {
191		c.PrintText(buf)
192	}
193}
194
195type Del struct {
196	Marker string
197	Inner  []Inline
198}
199
200func (x *Del) Inline() {
201
202}
203
204func (x *Del) PrintHTML(buf *bytes.Buffer) {
205	buf.WriteString("<del>")
206	for _, c := range x.Inner {
207		c.PrintHTML(buf)
208	}
209	buf.WriteString("</del>")
210}
211
212func (x *Del) printMarkdown(buf *bytes.Buffer) {
213	buf.WriteString(x.Marker)
214	for _, c := range x.Inner {
215		c.printMarkdown(buf)
216	}
217	buf.WriteString(x.Marker)
218}
219
220func (x *Del) PrintText(buf *bytes.Buffer) {
221	for _, c := range x.Inner {
222		c.PrintText(buf)
223	}
224}
225
226type Emph struct {
227	Marker string
228	Inner  []Inline
229}
230
231func (*Emph) Inline() {}
232
233func (x *Emph) PrintHTML(buf *bytes.Buffer) {
234	buf.WriteString("<em>")
235	for _, c := range x.Inner {
236		c.PrintHTML(buf)
237	}
238	buf.WriteString("</em>")
239}
240
241func (x *Emph) printMarkdown(buf *bytes.Buffer) {
242	buf.WriteString(x.Marker)
243	for _, c := range x.Inner {
244		c.printMarkdown(buf)
245	}
246	buf.WriteString(x.Marker)
247}
248
249func (x *Emph) PrintText(buf *bytes.Buffer) {
250	for _, c := range x.Inner {
251		c.PrintText(buf)
252	}
253}
254
255func (p *parseState) emit(i int) {
256	if p.emitted < i {
257		p.list = append(p.list, &Plain{p.s[p.emitted:i]})
258		p.emitted = i
259	}
260}
261
262func (p *parseState) skip(i int) {
263	p.emitted = i
264}
265
266func (p *parseState) inline(s string) []Inline {
267	s = trimSpaceTab(s)
268	// Scan text looking for inlines.
269	// Leaf inlines are converted immediately.
270	// Non-leaf inlines have potential starts pushed on a stack while we await completion.
271	// Links take priority over other emphasis, so the emphasis must be delayed.
272	p.s = s
273	p.list = nil
274	p.emitted = 0
275	var opens []int // indexes of open ![ and [ Plains in p.list
276	var lastLinkOpen int
277	backticks := false
278	i := 0
279	for i < len(s) {
280		var parser func(*parseState, string, int) (Inline, int, int, bool)
281		switch s[i] {
282		case '\\':
283			parser = parseEscape
284		case '`':
285			if !backticks {
286				backticks = true
287				p.backticks.reset()
288			}
289			parser = p.backticks.parseCodeSpan
290		case '<':
291			parser = parseAutoLinkOrHTML
292		case '[':
293			parser = parseLinkOpen
294		case '!':
295			parser = parseImageOpen
296		case '_', '*':
297			parser = parseEmph
298		case '.':
299			if p.SmartDot {
300				parser = parseDot
301			}
302		case '-':
303			if p.SmartDash {
304				parser = parseDash
305			}
306		case '"', '\'':
307			if p.SmartQuote {
308				parser = parseEmph
309			}
310		case '~':
311			if p.Strikethrough {
312				parser = parseEmph
313			}
314		case '\n': // TODO what about eof
315			parser = parseBreak
316		case '&':
317			parser = parseHTMLEntity
318		case ':':
319			if p.Emoji {
320				parser = parseEmoji
321			}
322		}
323		if parser != nil {
324			if x, start, end, ok := parser(p, s, i); ok {
325				p.emit(start)
326				if _, ok := x.(*openPlain); ok {
327					opens = append(opens, len(p.list))
328				}
329				p.list = append(p.list, x)
330				i = end
331				p.skip(i)
332				continue
333			}
334		}
335		if s[i] == ']' && len(opens) > 0 {
336			oi := opens[len(opens)-1]
337			open := p.list[oi].(*openPlain)
338			opens = opens[:len(opens)-1]
339			if open.Text[0] == '!' || lastLinkOpen <= open.i {
340				if x, end, ok := p.parseLinkClose(s, i, open); ok {
341					p.corner = p.corner || x.corner || linkCorner(x.URL)
342					p.emit(i)
343					x.Inner = p.emph(nil, p.list[oi+1:])
344					if open.Text[0] == '!' {
345						p.list[oi] = (*Image)(x)
346					} else {
347						p.list[oi] = x
348					}
349					p.list = p.list[:oi+1]
350					p.skip(end)
351					i = end
352					if open.Text[0] == '[' {
353						// No links around links.
354						lastLinkOpen = open.i
355					}
356					continue
357				}
358			}
359		}
360		i++
361	}
362	p.emit(len(s))
363	p.list = p.emph(p.list[:0], p.list)
364	p.list = p.mergePlain(p.list)
365	p.list = p.autoLinkText(p.list)
366
367	return p.list
368}
369
370func (ps *parseState) emph(dst, src []Inline) []Inline {
371	const chars = "_*~\"'"
372	var stack [len(chars)][]*emphPlain
373	stackOf := func(c byte) int {
374		return strings.IndexByte(chars, c)
375	}
376
377	trimStack := func() {
378		for i := range stack {
379			stk := &stack[i]
380			for len(*stk) > 0 && (*stk)[len(*stk)-1].i >= len(dst) {
381				*stk = (*stk)[:len(*stk)-1]
382			}
383		}
384	}
385
386Src:
387	for i := 0; i < len(src); i++ {
388		if open, ok := src[i].(*openPlain); ok {
389			// Convert unused link/image open marker to plain text.
390			dst = append(dst, &open.Plain)
391			continue
392		}
393		p, ok := src[i].(*emphPlain)
394		if !ok {
395			dst = append(dst, src[i])
396			continue
397		}
398		if p.canClose {
399			stk := &stack[stackOf(p.Text[0])]
400		Loop:
401			for p.Text != "" {
402				// Looking for same symbol and compatible with p.Text.
403				for i := len(*stk) - 1; i >= 0; i-- {
404					start := (*stk)[i]
405					if (p.Text[0] == '*' || p.Text[0] == '_') && (p.canOpen && p.canClose || start.canOpen && start.canClose) && (p.n+start.n)%3 == 0 && (p.n%3 != 0 || start.n%3 != 0) {
406						continue
407					}
408					if p.Text[0] == '~' && len(p.Text) != len(start.Text) { // ~ matches ~, ~~ matches ~~
409						continue
410					}
411					if p.Text[0] == '"' {
412						dst[start.i].(*emphPlain).Text = "“"
413						p.Text = "”"
414						dst = append(dst, p)
415						*stk = (*stk)[:i]
416						// no trimStack
417						continue Src
418					}
419					if p.Text[0] == '\'' {
420						dst[start.i].(*emphPlain).Text = "‘"
421						p.Text = "’"
422						dst = append(dst, p)
423						*stk = (*stk)[:i]
424						// no trimStack
425						continue Src
426					}
427					var d int
428					if len(p.Text) >= 2 && len(start.Text) >= 2 {
429						// strong
430						d = 2
431					} else {
432						// emph
433						d = 1
434					}
435					del := p.Text[0] == '~'
436					x := &Emph{Marker: p.Text[:d], Inner: append([]Inline(nil), dst[start.i+1:]...)}
437					start.Text = start.Text[:len(start.Text)-d]
438					p.Text = p.Text[d:]
439					if start.Text == "" {
440						dst = dst[:start.i]
441					} else {
442						dst = dst[:start.i+1]
443					}
444					trimStack()
445					if del {
446						dst = append(dst, (*Del)(x))
447					} else if d == 2 {
448						dst = append(dst, (*Strong)(x))
449					} else {
450						dst = append(dst, x)
451					}
452					continue Loop
453				}
454				break
455			}
456		}
457		if p.Text != "" {
458			stk := &stack[stackOf(p.Text[0])]
459			if p.Text == "'" {
460				p.Text = "’"
461			}
462			if p.Text == "\"" {
463				if p.canClose {
464					p.Text = "”"
465				} else {
466					p.Text = "“"
467				}
468			}
469			if p.canOpen {
470				p.i = len(dst)
471				dst = append(dst, p)
472				*stk = append(*stk, p)
473			} else {
474				dst = append(dst, &p.Plain)
475			}
476		}
477	}
478	return dst
479}
480
481func mdUnescape(s string) string {
482	if !strings.Contains(s, `\`) && !strings.Contains(s, `&`) {
483		return s
484	}
485	return mdUnescaper.Replace(s)
486}
487
488var mdUnescaper = func() *strings.Replacer {
489	var list = []string{
490		`\!`, `!`,
491		`\"`, `"`,
492		`\#`, `#`,
493		`\$`, `$`,
494		`\%`, `%`,
495		`\&`, `&`,
496		`\'`, `'`,
497		`\(`, `(`,
498		`\)`, `)`,
499		`\*`, `*`,
500		`\+`, `+`,
501		`\,`, `,`,
502		`\-`, `-`,
503		`\.`, `.`,
504		`\/`, `/`,
505		`\:`, `:`,
506		`\;`, `;`,
507		`\<`, `<`,
508		`\=`, `=`,
509		`\>`, `>`,
510		`\?`, `?`,
511		`\@`, `@`,
512		`\[`, `[`,
513		`\\`, `\`,
514		`\]`, `]`,
515		`\^`, `^`,
516		`\_`, `_`,
517		"\\`", "`",
518		`\{`, `{`,
519		`\|`, `|`,
520		`\}`, `}`,
521		`\~`, `~`,
522	}
523
524	for name, repl := range htmlEntity {
525		list = append(list, name, repl)
526	}
527	return strings.NewReplacer(list...)
528}()
529
530func isPunct(c byte) bool {
531	return '!' <= c && c <= '/' || ':' <= c && c <= '@' || '[' <= c && c <= '`' || '{' <= c && c <= '~'
532}
533
534func parseEscape(p *parseState, s string, i int) (Inline, int, int, bool) {
535	if i+1 < len(s) {
536		c := s[i+1]
537		if isPunct(c) {
538			return &Escaped{Plain{s[i+1 : i+2]}}, i, i + 2, true
539		}
540		if c == '\n' { // TODO what about eof
541			if i > 0 && s[i-1] == '\\' {
542				p.corner = true // goldmark mishandles \\\ newline
543			}
544			end := i + 2
545			for end < len(s) && (s[end] == ' ' || s[end] == '\t') {
546				end++
547			}
548			return &HardBreak{}, i, end, true
549		}
550	}
551	return nil, 0, 0, false
552}
553
554func parseDot(p *parseState, s string, i int) (Inline, int, int, bool) {
555	if i+2 < len(s) && s[i+1] == '.' && s[i+2] == '.' {
556		return &Plain{"…"}, i, i + 3, true
557	}
558	return nil, 0, 0, false
559}
560
561func parseDash(p *parseState, s string, i int) (Inline, int, int, bool) {
562	if i+1 >= len(s) || s[i+1] != '-' {
563		return nil, 0, 0, false
564	}
565
566	n := 2
567	for i+n < len(s) && s[i+n] == '-' {
568		n++
569	}
570
571	// Mimic cmark-gfm. Can't make this stuff up.
572	em, en := 0, 0
573	switch {
574	case n%3 == 0:
575		em = n / 3
576	case n%2 == 0:
577		en = n / 2
578	case n%3 == 2:
579		em = (n - 2) / 3
580		en = 1
581	case n%3 == 1:
582		em = (n - 4) / 3
583		en = 2
584	}
585	return &Plain{strings.Repeat("—", em) + strings.Repeat("–", en)}, i, i + n, true
586}
587
588// Inline code span markers must fit on punched cards, to match cmark-gfm.
589const maxBackticks = 80
590
591type backtickParser struct {
592	last    [maxBackticks]int
593	scanned bool
594}
595
596func (b *backtickParser) reset() {
597	*b = backtickParser{}
598}
599
600func (b *backtickParser) parseCodeSpan(p *parseState, s string, i int) (Inline, int, int, bool) {
601	start := i
602	// Count leading backticks. Need to find that many again.
603	n := 1
604	for i+n < len(s) && s[i+n] == '`' {
605		n++
606	}
607
608	// If we've already scanned the whole string (for a different count),
609	// we can skip a failed scan by checking whether we saw this count.
610	// To enable this optimization, following cmark-gfm, we declare by fiat
611	// that more than maxBackticks backquotes is too many.
612	if n > len(b.last) || b.scanned && b.last[n-1] < i+n {
613		goto NoMatch
614	}
615
616	for end := i + n; end < len(s); {
617		if s[end] != '`' {
618			end++
619			continue
620		}
621		estart := end
622		for end < len(s) && s[end] == '`' {
623			end++
624		}
625		m := end - estart
626		if !b.scanned && m < len(b.last) {
627			b.last[m-1] = estart
628		}
629		if m == n {
630			// Match.
631			// Line endings are converted to single spaces.
632			text := s[i+n : estart]
633			text = strings.ReplaceAll(text, "\n", " ")
634
635			// If enclosed text starts and ends with a space and is not all spaces,
636			// one space is removed from start and end, to allow `` ` `` to quote a single backquote.
637			if len(text) >= 2 && text[0] == ' ' && text[len(text)-1] == ' ' && trimSpace(text) != "" {
638				text = text[1 : len(text)-1]
639			}
640
641			return &Code{text}, start, end, true
642		}
643	}
644	b.scanned = true
645
646NoMatch:
647	// No match, so none of these backticks count: skip them all.
648	// For example ``x` is not a single backtick followed by a code span.
649	// Returning nil, 0, false would advance to the second backtick and try again.
650	return &Plain{s[i : i+n]}, start, i + n, true
651}
652
653func parseAutoLinkOrHTML(p *parseState, s string, i int) (Inline, int, int, bool) {
654	if x, end, ok := parseAutoLinkURI(s, i); ok {
655		return x, i, end, true
656	}
657	if x, end, ok := parseAutoLinkEmail(s, i); ok {
658		return x, i, end, true
659	}
660	if x, end, ok := parseHTMLTag(p, s, i); ok {
661		return x, i, end, true
662	}
663	return nil, 0, 0, false
664}
665
666func isLetter(c byte) bool {
667	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
668}
669
670func isLDH(c byte) bool {
671	return isLetterDigit(c) || c == '-'
672}
673
674func isLetterDigit(c byte) bool {
675	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9'
676}
677
678func parseLinkOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
679	return &openPlain{Plain{s[i : i+1]}, i + 1}, i, i + 1, true
680}
681
682func parseImageOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
683	if i+1 < len(s) && s[i+1] == '[' {
684		return &openPlain{Plain{s[i : i+2]}, i + 2}, i, i + 2, true
685	}
686	return nil, 0, 0, false
687}
688
689func parseEmph(p *parseState, s string, i int) (Inline, int, int, bool) {
690	c := s[i]
691	j := i + 1
692	if c == '*' || c == '~' || c == '_' {
693		for j < len(s) && s[j] == c {
694			j++
695		}
696	}
697	if c == '~' && j-i != 2 {
698		// Goldmark does not accept ~text~
699		// and incorrectly accepts ~~~text~~~.
700		// Only ~~ is correct.
701		p.corner = true
702	}
703	if c == '~' && j-i > 2 {
704		return &Plain{s[i:j]}, i, j, true
705	}
706
707	var before, after rune
708	if i == 0 {
709		before = ' '
710	} else {
711		before, _ = utf8.DecodeLastRuneInString(s[:i])
712	}
713	if j >= len(s) {
714		after = ' '
715	} else {
716		after, _ = utf8.DecodeRuneInString(s[j:])
717	}
718
719	// “A left-flanking delimiter run is a delimiter run that is
720	// (1) not followed by Unicode whitespace, and either
721	// (2a) not followed by a Unicode punctuation character, or
722	// (2b) followed by a Unicode punctuation character
723	// and preceded by Unicode whitespace or a Unicode punctuation character.
724	// For purposes of this definition, the beginning and the end
725	// of the line count as Unicode whitespace.”
726	leftFlank := !isUnicodeSpace(after) &&
727		(!isUnicodePunct(after) || isUnicodeSpace(before) || isUnicodePunct(before))
728
729	// “A right-flanking delimiter run is a delimiter run that is
730	// (1) not preceded by Unicode whitespace, and either
731	// (2a) not preceded by a Unicode punctuation character, or
732	// (2b) preceded by a Unicode punctuation character
733	// and followed by Unicode whitespace or a Unicode punctuation character.
734	// For purposes of this definition, the beginning and the end
735	// of the line count as Unicode whitespace.”
736	rightFlank := !isUnicodeSpace(before) &&
737		(!isUnicodePunct(before) || isUnicodeSpace(after) || isUnicodePunct(after))
738
739	var canOpen, canClose bool
740
741	switch c {
742	case '\'', '"':
743		canOpen = leftFlank && !rightFlank && before != ']' && before != ')'
744		canClose = rightFlank
745	case '*', '~':
746		// “A single * character can open emphasis iff
747		// it is part of a left-flanking delimiter run.”
748
749		// “A double ** can open strong emphasis iff
750		// it is part of a left-flanking delimiter run.”
751		canOpen = leftFlank
752
753		// “A single * character can close emphasis iff
754		// it is part of a right-flanking delimiter run.”
755
756		// “A double ** can close strong emphasis iff
757		// it is part of a right-flanking delimiter run.”
758		canClose = rightFlank
759	case '_':
760		// “A single _ character can open emphasis iff
761		// it is part of a left-flanking delimiter run and either
762		// (a) not part of a right-flanking delimiter run or
763		// (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.”
764
765		// “A double __ can open strong emphasis iff
766		// it is part of a left-flanking delimiter run and either
767		// (a) not part of a right-flanking delimiter run or
768		// (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.”
769		canOpen = leftFlank && (!rightFlank || isUnicodePunct(before))
770
771		// “A single _ character can close emphasis iff
772		// it is part of a right-flanking delimiter run and either
773		// (a) not part of a left-flanking delimiter run or
774		// (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.”
775
776		// “A double __ can close strong emphasis iff
777		// it is part of a right-flanking delimiter run and either
778		// (a) not part of a left-flanking delimiter run or
779		// (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.”
780		canClose = rightFlank && (!leftFlank || isUnicodePunct(after))
781	}
782
783	return &emphPlain{Plain: Plain{s[i:j]}, canOpen: canOpen, canClose: canClose, n: j - i}, i, j, true
784}
785
786func isUnicodeSpace(r rune) bool {
787	if r < 0x80 {
788		return r == ' ' || r == '\t' || r == '\f' || r == '\n'
789	}
790	return unicode.In(r, unicode.Zs)
791}
792
793func isUnicodePunct(r rune) bool {
794	if r < 0x80 {
795		return isPunct(byte(r))
796	}
797	return unicode.In(r, unicode.Punct)
798}
799
800func (p *parseState) parseLinkClose(s string, i int, open *openPlain) (*Link, int, bool) {
801	if i+1 < len(s) {
802		switch s[i+1] {
803		case '(':
804			// Inline link - [Text](Dest Title), with Title omitted or both Dest and Title omitted.
805			i := skipSpace(s, i+2)
806			var dest, title string
807			var titleChar byte
808			var corner bool
809			if i < len(s) && s[i] != ')' {
810				var ok bool
811				dest, i, ok = parseLinkDest(s, i)
812				if !ok {
813					break
814				}
815				i = skipSpace(s, i)
816				if i < len(s) && s[i] != ')' {
817					title, titleChar, i, ok = parseLinkTitle(s, i)
818					if title == "" {
819						corner = true
820					}
821					if !ok {
822						break
823					}
824					i = skipSpace(s, i)
825				}
826			}
827			if i < len(s) && s[i] == ')' {
828				return &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner}, i + 1, true
829			}
830			// NOTE: Test malformed ( ) with shortcut reference
831			// TODO fall back on syntax error?
832
833		case '[':
834			// Full reference link - [Text][Label]
835			label, i, ok := parseLinkLabel(p, s, i+1)
836			if !ok {
837				break
838			}
839			if link, ok := p.links[normalizeLabel(label)]; ok {
840				return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, i, true
841			}
842			// Note: Could break here, but CommonMark dingus does not
843			// fall back to trying Text for [Text][Label] when Label is unknown.
844			// Unclear from spec what the correct answer is.
845			return nil, 0, false
846		}
847	}
848
849	// Collapsed or shortcut reference link: [Text][] or [Text].
850	end := i + 1
851	if strings.HasPrefix(s[end:], "[]") {
852		end += 2
853	}
854
855	if link, ok := p.links[normalizeLabel(s[open.i:i])]; ok {
856		return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, end, true
857	}
858	return nil, 0, false
859}
860
861func skipSpace(s string, i int) int {
862	// Note: Blank lines have already been removed.
863	for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
864		i++
865	}
866	return i
867}
868
869func linkCorner(url string) bool {
870	for i := 0; i < len(url); i++ {
871		if url[i] == '%' {
872			if i+2 >= len(url) || !isHexDigit(url[i+1]) || !isHexDigit(url[i+2]) {
873				// Goldmark and the Dingus re-escape such percents as %25,
874				// but the spec does not seem to require this behavior.
875				return true
876			}
877		}
878	}
879	return false
880}
881
882func (p *parseState) mergePlain(list []Inline) []Inline {
883	out := list[:0]
884	start := 0
885	for i := 0; ; i++ {
886		if i < len(list) && toPlain(list[i]) != nil {
887			continue
888		}
889		// Non-Plain or end of list.
890		if start < i {
891			out = append(out, mergePlain1(list[start:i]))
892		}
893		if i >= len(list) {
894			break
895		}
896		out = append(out, list[i])
897		start = i + 1
898	}
899	return out
900}
901
902func toPlain(x Inline) *Plain {
903	// TODO what about Escaped?
904	switch x := x.(type) {
905	case *Plain:
906		return x
907	case *emphPlain:
908		return &x.Plain
909	case *openPlain:
910		return &x.Plain
911	}
912	return nil
913}
914
915func mergePlain1(list []Inline) *Plain {
916	if len(list) == 1 {
917		return toPlain(list[0])
918	}
919	var all []string
920	for _, pl := range list {
921		all = append(all, toPlain(pl).Text)
922	}
923	return &Plain{Text: strings.Join(all, "")}
924}
925
926func parseEmoji(p *parseState, s string, i int) (Inline, int, int, bool) {
927	for j := i + 1; ; j++ {
928		if j >= len(s) || j-i > 2+maxEmojiLen {
929			break
930		}
931		if s[j] == ':' {
932			name := s[i+1 : j]
933			if utf, ok := emoji[name]; ok {
934				return &Emoji{s[i : j+1], utf}, i, j + 1, true
935			}
936			break
937		}
938	}
939	return nil, 0, 0, false
940}
941
942type Emoji struct {
943	Name string // emoji :name:, including colons
944	Text string // Unicode for emoji sequence
945}
946
947func (*Emoji) Inline() {}
948
949func (x *Emoji) PrintHTML(buf *bytes.Buffer) {
950	htmlEscaper.WriteString(buf, x.Text)
951}
952
953func (x *Emoji) printMarkdown(buf *bytes.Buffer) {
954	buf.WriteString(x.Text)
955}
956
957func (x *Emoji) PrintText(buf *bytes.Buffer) {
958	htmlEscaper.WriteString(buf, x.Text)
959}
960