1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package markdown
6
7import (
8	"bytes"
9	"fmt"
10	"reflect"
11	"slices"
12	"strings"
13)
14
15/*
16
17list block itself does not appear on stack?
18item does
19end of item returns block,
20new item continues previous block if possible?
21
22if close leaves lines or blocks behind, panic
23
24close(b a list item, parent)
25	if b's parent's last block is list && item can be added to it, do so
26	else return new list
27
28or maybe not parent but just current list of blocks
29
30preserve LinkRefDefs?
31
32*/
33
34// Block is implemented by:
35//
36//	CodeBLock
37//	Document
38//	Empty
39//	HTMLBlock
40//	Heading
41//	Item
42//	List
43//	Paragraph
44//	Quote
45//	Text
46//	ThematicBreak
47type Block interface {
48	Pos() Position
49	PrintHTML(buf *bytes.Buffer)
50	printMarkdown(buf *bytes.Buffer, s mdState)
51}
52
53type mdState struct {
54	prefix  string
55	prefix1 string // for first line only
56	bullet  rune   // for list items
57	num     int    // for numbered list items
58}
59
60type Position struct {
61	StartLine int
62	EndLine   int
63}
64
65func (p Position) Pos() Position {
66	return p
67}
68
69type buildState interface {
70	blocks() []Block
71	pos() Position
72	last() Block
73	deleteLast()
74
75	link(label string) *Link
76	defineLink(label string, link *Link)
77	newText(pos Position, text string) *Text
78}
79
80type blockBuilder interface {
81	extend(p *parseState, s line) (line, bool)
82	build(buildState) Block
83}
84
85type openBlock struct {
86	builder blockBuilder
87	inner   []Block
88	pos     Position
89}
90
91type itemBuilder struct {
92	list        *listBuilder
93	width       int
94	haveContent bool
95}
96
97func (p *parseState) last() Block {
98	ob := &p.stack[len(p.stack)-1]
99	return ob.inner[len(ob.inner)-1]
100}
101
102func (p *parseState) deleteLast() {
103	ob := &p.stack[len(p.stack)-1]
104	ob.inner = ob.inner[:len(ob.inner)-1]
105}
106
107type Text struct {
108	Position
109	Inline []Inline
110	raw    string
111}
112
113func (b *Text) PrintHTML(buf *bytes.Buffer) {
114	for _, x := range b.Inline {
115		x.PrintHTML(buf)
116	}
117}
118
119func (b *Text) printMarkdown(buf *bytes.Buffer, s mdState) {
120	if s.prefix1 != "" {
121		buf.WriteString(s.prefix1)
122	} else {
123		buf.WriteString(s.prefix)
124	}
125	var prev Inline
126	for _, x := range b.Inline {
127		switch prev.(type) {
128		case *SoftBreak, *HardBreak:
129			buf.WriteString(s.prefix)
130		}
131		x.printMarkdown(buf)
132		prev = x
133	}
134	buf.WriteByte('\n')
135}
136
137type rootBuilder struct{}
138
139func (b *rootBuilder) build(p buildState) Block {
140	return &Document{p.pos(), p.blocks(), p.(*parseState).links}
141}
142
143type Document struct {
144	Position
145	Blocks []Block
146	Links  map[string]*Link
147}
148
149// A Parser is a Markdown parser.
150// The exported fields in the struct can be filled in before calling
151// [Parser.Parse] in order to customize the details of the parsing process.
152// A Parser is safe for concurrent use by multiple goroutines.
153type Parser struct {
154	// HeadingIDs determines whether the parser accepts
155	// the {#hdr} syntax for an HTML id="hdr" attribute on headings.
156	// For example, if HeadingIDs is true then the Markdown
157	//    ## Overview {#overview}
158	// will render as the HTML
159	//    <h2 id="overview">Overview</h2>
160	HeadingIDs bool
161
162	// Strikethrough determines whether the parser accepts
163	// ~abc~ and ~~abc~~ as strikethrough syntax, producing
164	// <del>abc</del> in HTML.
165	Strikethrough bool
166
167	// TaskListItems determines whether the parser accepts
168	// “task list items” as defined in GitHub Flavored Markdown.
169	// When a list item begins with the plain text [ ] or [x]
170	// that turns into an unchecked or checked check box.
171	TaskListItems bool
172
173	// TODO
174	AutoLinkText       bool
175	AutoLinkAssumeHTTP bool
176
177	// TODO
178	Table bool
179
180	// TODO
181	Emoji bool
182
183	// TODO
184	SmartDot   bool
185	SmartDash  bool
186	SmartQuote bool
187}
188
189type parseState struct {
190	*Parser
191
192	root      *Document
193	links     map[string]*Link
194	lineno    int
195	stack     []openBlock
196	lineDepth int
197
198	corner bool // noticed corner case to ignore in cross-implementation testing
199
200	// inlines
201	s       string
202	emitted int // s[:emitted] has been emitted into list
203	list    []Inline
204
205	// for fixup at end
206	lists []*List
207	texts []*Text
208
209	backticks backtickParser
210}
211
212func (p *parseState) newText(pos Position, text string) *Text {
213	b := &Text{Position: pos, raw: text}
214	p.texts = append(p.texts, b)
215	return b
216}
217
218func (p *parseState) blocks() []Block {
219	b := &p.stack[len(p.stack)-1]
220	return b.inner
221}
222
223func (p *parseState) pos() Position {
224	b := &p.stack[len(p.stack)-1]
225	return b.pos
226}
227
228func (p *Parser) Parse(text string) *Document {
229	d, _ := p.parse(text)
230	return d
231}
232
233func (p *Parser) parse(text string) (d *Document, corner bool) {
234	var ps parseState
235	ps.Parser = p
236	if strings.Contains(text, "\x00") {
237		text = strings.ReplaceAll(text, "\x00", "\uFFFD")
238		ps.corner = true // goldmark does not replace NUL
239	}
240
241	ps.lineDepth = -1
242	ps.addBlock(&rootBuilder{})
243	for text != "" {
244		var ln string
245		i := strings.Index(text, "\n")
246		j := strings.Index(text, "\r")
247		var nl byte
248		switch {
249		case j >= 0 && (i < 0 || j < i): // have \r, maybe \r\n
250			ln = text[:j]
251			if i == j+1 {
252				text = text[j+2:]
253				nl = '\r' + '\n'
254			} else {
255				text = text[j+1:]
256				nl = '\r'
257			}
258		case i >= 0:
259			ln, text = text[:i], text[i+1:]
260			nl = '\n'
261		default:
262			ln, text = text, ""
263		}
264		ps.lineno++
265		ps.addLine(line{text: ln, nl: nl})
266	}
267	ps.trimStack(0)
268
269	for _, t := range ps.texts {
270		t.Inline = ps.inline(t.raw)
271	}
272
273	if p.TaskListItems {
274		for _, list := range ps.lists {
275			ps.taskList(list)
276		}
277	}
278
279	return ps.root, ps.corner
280}
281
282func (p *parseState) curB() blockBuilder {
283	if p.lineDepth < len(p.stack) {
284		return p.stack[p.lineDepth].builder
285	}
286	return nil
287}
288
289func (p *parseState) nextB() blockBuilder {
290	if p.lineDepth+1 < len(p.stack) {
291		return p.stack[p.lineDepth+1].builder
292	}
293	return nil
294}
295func (p *parseState) trimStack(depth int) {
296	if len(p.stack) < depth {
297		panic("trimStack")
298	}
299	for len(p.stack) > depth {
300		p.closeBlock()
301	}
302}
303
304func (p *parseState) addBlock(c blockBuilder) {
305	p.trimStack(p.lineDepth + 1)
306	p.stack = append(p.stack, openBlock{})
307	ob := &p.stack[len(p.stack)-1]
308	ob.builder = c
309	ob.pos.StartLine = p.lineno
310	ob.pos.EndLine = p.lineno
311}
312
313func (p *parseState) doneBlock(b Block) {
314	p.trimStack(p.lineDepth + 1)
315	ob := &p.stack[len(p.stack)-1]
316	ob.inner = append(ob.inner, b)
317}
318
319func (p *parseState) para() *paraBuilder {
320	if b, ok := p.stack[len(p.stack)-1].builder.(*paraBuilder); ok {
321		return b
322	}
323	return nil
324}
325
326func (p *parseState) closeBlock() Block {
327	b := &p.stack[len(p.stack)-1]
328	if b.builder == nil {
329		println("closeBlock", len(p.stack)-1)
330	}
331	blk := b.builder.build(p)
332	if list, ok := blk.(*List); ok {
333		p.corner = p.corner || listCorner(list)
334		if p.TaskListItems {
335			p.lists = append(p.lists, list)
336		}
337	}
338	p.stack = p.stack[:len(p.stack)-1]
339	if len(p.stack) > 0 {
340		b := &p.stack[len(p.stack)-1]
341		b.inner = append(b.inner, blk)
342		// _ = b
343	} else {
344		p.root = blk.(*Document)
345	}
346	return blk
347}
348
349func (p *parseState) link(label string) *Link {
350	return p.links[label]
351}
352
353func (p *parseState) defineLink(label string, link *Link) {
354	if p.links == nil {
355		p.links = make(map[string]*Link)
356	}
357	p.links[label] = link
358}
359
360type line struct {
361	spaces int
362	i      int
363	tab    int
364	text   string
365	nl     byte // newline character ending this line: \r or \n or zero for EOF
366}
367
368func (p *parseState) addLine(s line) {
369	// Process continued prefixes.
370	p.lineDepth = 0
371	for ; p.lineDepth+1 < len(p.stack); p.lineDepth++ {
372		old := s
373		var ok bool
374		s, ok = p.stack[p.lineDepth+1].builder.extend(p, s)
375		if !old.isBlank() && (ok || s != old) {
376			p.stack[p.lineDepth+1].pos.EndLine = p.lineno
377		}
378		if !ok {
379			break
380		}
381	}
382
383	if s.isBlank() {
384		p.trimStack(p.lineDepth + 1)
385		return
386	}
387
388	// Process new prefixes, if any.
389Prefixes:
390	// Start new block inside p.stack[depth].
391	for _, fn := range news {
392		if l, ok := fn(p, s); ok {
393			s = l
394			if s.isBlank() {
395				return
396			}
397			p.lineDepth++
398			goto Prefixes
399		}
400	}
401
402	newPara(p, s)
403}
404
405func (c *rootBuilder) extend(p *parseState, s line) (line, bool) {
406	panic("root extend")
407}
408
409var news = []func(*parseState, line) (line, bool){
410	newQuote,
411	newATXHeading,
412	newSetextHeading,
413	newHR,
414	newListItem,
415	newHTML,
416	newFence,
417	newPre,
418}
419
420func (s *line) peek() byte {
421	if s.spaces > 0 {
422		return ' '
423	}
424	if s.i >= len(s.text) {
425		return 0
426	}
427	return s.text[s.i]
428}
429
430func (s *line) skipSpace() {
431	s.spaces = 0
432	for s.i < len(s.text) && (s.text[s.i] == ' ' || s.text[s.i] == '\t') {
433		s.i++
434	}
435}
436
437func (s *line) trimSpace(min, max int, eolOK bool) bool {
438	t := *s
439	for n := 0; n < max; n++ {
440		if t.spaces > 0 {
441			t.spaces--
442			continue
443		}
444		if t.i >= len(t.text) && eolOK {
445			continue
446		}
447		if t.i < len(t.text) {
448			switch t.text[t.i] {
449			case '\t':
450				t.spaces = 4 - (t.i-t.tab)&3 - 1
451				t.i++
452				t.tab = t.i
453				continue
454			case ' ':
455				t.i++
456				continue
457			}
458		}
459		if n >= min {
460			break
461		}
462		return false
463	}
464	*s = t
465	return true
466}
467
468func (s *line) trim(c byte) bool {
469	if s.spaces > 0 {
470		if c == ' ' {
471			s.spaces--
472			return true
473		}
474		return false
475	}
476	if s.i < len(s.text) && s.text[s.i] == c {
477		s.i++
478		return true
479	}
480	return false
481}
482
483func (s *line) string() string {
484	switch s.spaces {
485	case 0:
486		return s.text[s.i:]
487	case 1:
488		return " " + s.text[s.i:]
489	case 2:
490		return "  " + s.text[s.i:]
491	case 3:
492		return "   " + s.text[s.i:]
493	}
494	panic("bad spaces")
495}
496
497func trimLeftSpaceTab(s string) string {
498	i := 0
499	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
500		i++
501	}
502	return s[i:]
503}
504
505func trimRightSpaceTab(s string) string {
506	j := len(s)
507	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
508		j--
509	}
510	return s[:j]
511}
512
513func trimSpaceTab(s string) string {
514	i := 0
515	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
516		i++
517	}
518	s = s[i:]
519	j := len(s)
520	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
521		j--
522	}
523	return s[:j]
524}
525
526func trimSpace(s string) string {
527	i := 0
528	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
529		i++
530	}
531	s = s[i:]
532	j := len(s)
533	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
534		j--
535	}
536	return s[:j]
537}
538
539func trimSpaceTabNewline(s string) string {
540	i := 0
541	for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
542		i++
543	}
544	s = s[i:]
545	j := len(s)
546	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
547		j--
548	}
549	return s[:j]
550}
551
552func (s *line) isBlank() bool {
553	return trimLeftSpaceTab(s.text[s.i:]) == ""
554}
555
556func (s *line) eof() bool {
557	return s.i >= len(s.text)
558}
559
560func (s *line) trimSpaceString() string {
561	return trimLeftSpaceTab(s.text[s.i:])
562}
563
564func (s *line) trimString() string {
565	return trimSpaceTab(s.text[s.i:])
566}
567
568func ToHTML(b Block) string {
569	var buf bytes.Buffer
570	b.PrintHTML(&buf)
571	return buf.String()
572}
573
574func ToMarkdown(b Block) string {
575	var buf bytes.Buffer
576	b.printMarkdown(&buf, mdState{})
577	s := buf.String()
578	// Remove final extra newline.
579	if strings.HasSuffix(s, "\n\n") {
580		s = s[:len(s)-1]
581	}
582	return s
583}
584
585func (b *Document) PrintHTML(buf *bytes.Buffer) {
586	for _, c := range b.Blocks {
587		c.PrintHTML(buf)
588	}
589}
590
591func (b *Document) printMarkdown(buf *bytes.Buffer, s mdState) {
592	printMarkdownBlocks(b.Blocks, buf, s)
593	// Print links sorted by keys for deterministic output.
594	var keys []string
595	for k := range b.Links {
596		keys = append(keys, k)
597	}
598	slices.Sort(keys)
599	for _, k := range keys {
600		l := b.Links[k]
601		fmt.Fprintf(buf, "[%s]: %s", k, l.URL)
602		printLinkTitleMarkdown(buf, l.Title, l.TitleChar)
603		buf.WriteByte('\n')
604	}
605}
606
607func printMarkdownBlocks(bs []Block, buf *bytes.Buffer, s mdState) {
608	prevEnd := 0
609	for _, b := range bs {
610		// Preserve blank lines between blocks.
611		if prevEnd > 0 {
612			for i := prevEnd + 1; i < b.Pos().StartLine; i++ {
613				buf.WriteString(trimRightSpaceTab(s.prefix))
614				buf.WriteByte('\n')
615			}
616		}
617		b.printMarkdown(buf, s)
618		prevEnd = b.Pos().EndLine
619		s.prefix1 = "" // item prefix only for first block
620	}
621}
622
623var (
624	blockType   = reflect.TypeOf(new(Block)).Elem()
625	blocksType  = reflect.TypeOf(new([]Block)).Elem()
626	inlinesType = reflect.TypeOf(new([]Inline)).Elem()
627)
628
629func printb(buf *bytes.Buffer, b Block, prefix string) {
630	fmt.Fprintf(buf, "(%T", b)
631	v := reflect.ValueOf(b)
632	v = reflect.Indirect(v)
633	if v.Kind() != reflect.Struct {
634		fmt.Fprintf(buf, " %v", b)
635	}
636	t := v.Type()
637	for i := 0; i < t.NumField(); i++ {
638		tf := t.Field(i)
639		if !tf.IsExported() {
640			continue
641		}
642		if tf.Type == inlinesType {
643			printis(buf, v.Field(i).Interface().([]Inline))
644		} else if tf.Type.Kind() == reflect.Slice && tf.Type.Elem().Kind() == reflect.String {
645			fmt.Fprintf(buf, " %s:%q", tf.Name, v.Field(i))
646		} else if tf.Type != blocksType && !tf.Type.Implements(blockType) && tf.Type.Kind() != reflect.Slice {
647			fmt.Fprintf(buf, " %s:%v", tf.Name, v.Field(i))
648		}
649	}
650
651	prefix += "\t"
652	for i := 0; i < t.NumField(); i++ {
653		tf := t.Field(i)
654		if !tf.IsExported() {
655			continue
656		}
657		if tf.Type.Implements(blockType) {
658			fmt.Fprintf(buf, "\n%s", prefix)
659			printb(buf, v.Field(i).Interface().(Block), prefix)
660		} else if tf.Type == blocksType {
661			vf := v.Field(i)
662			for i := 0; i < vf.Len(); i++ {
663				fmt.Fprintf(buf, "\n%s", prefix)
664				printb(buf, vf.Index(i).Interface().(Block), prefix)
665			}
666		} else if tf.Type.Kind() == reflect.Slice && tf.Type != inlinesType && tf.Type.Elem().Kind() != reflect.String {
667			fmt.Fprintf(buf, "\n%s%s:", prefix, t.Field(i).Name)
668			printslice(buf, v.Field(i), prefix)
669		}
670	}
671	fmt.Fprintf(buf, ")")
672}
673
674func printslice(buf *bytes.Buffer, v reflect.Value, prefix string) {
675	if v.Type().Elem().Kind() == reflect.Slice {
676		for i := 0; i < v.Len(); i++ {
677			fmt.Fprintf(buf, "\n%s#%d:", prefix, i)
678			printslice(buf, v.Index(i), prefix+"\t")
679		}
680		return
681	}
682	for i := 0; i < v.Len(); i++ {
683		fmt.Fprintf(buf, " ")
684		printb(buf, v.Index(i).Interface().(Block), prefix+"\t")
685	}
686}
687
688func printi(buf *bytes.Buffer, in Inline) {
689	fmt.Fprintf(buf, "%T(", in)
690	v := reflect.ValueOf(in).Elem()
691	text := v.FieldByName("Text")
692	if text.IsValid() {
693		fmt.Fprintf(buf, "%q", text)
694	}
695	inner := v.FieldByName("Inner")
696	if inner.IsValid() {
697		printis(buf, inner.Interface().([]Inline))
698	}
699	buf.WriteString(")")
700}
701
702func printis(buf *bytes.Buffer, ins []Inline) {
703	for _, in := range ins {
704		buf.WriteByte(' ')
705		printi(buf, in)
706	}
707}
708
709func dump(b Block) string {
710	var buf bytes.Buffer
711	printb(&buf, b, "")
712	return buf.String()
713}
714