1// Copyright 2021 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package markdown 6 7import ( 8 "bytes" 9 "fmt" 10 "reflect" 11 "slices" 12 "strings" 13) 14 15/* 16 17list block itself does not appear on stack? 18item does 19end of item returns block, 20new item continues previous block if possible? 21 22if close leaves lines or blocks behind, panic 23 24close(b a list item, parent) 25 if b's parent's last block is list && item can be added to it, do so 26 else return new list 27 28or maybe not parent but just current list of blocks 29 30preserve LinkRefDefs? 31 32*/ 33 34// Block is implemented by: 35// 36// CodeBLock 37// Document 38// Empty 39// HTMLBlock 40// Heading 41// Item 42// List 43// Paragraph 44// Quote 45// Text 46// ThematicBreak 47type Block interface { 48 Pos() Position 49 PrintHTML(buf *bytes.Buffer) 50 printMarkdown(buf *bytes.Buffer, s mdState) 51} 52 53type mdState struct { 54 prefix string 55 prefix1 string // for first line only 56 bullet rune // for list items 57 num int // for numbered list items 58} 59 60type Position struct { 61 StartLine int 62 EndLine int 63} 64 65func (p Position) Pos() Position { 66 return p 67} 68 69type buildState interface { 70 blocks() []Block 71 pos() Position 72 last() Block 73 deleteLast() 74 75 link(label string) *Link 76 defineLink(label string, link *Link) 77 newText(pos Position, text string) *Text 78} 79 80type blockBuilder interface { 81 extend(p *parseState, s line) (line, bool) 82 build(buildState) Block 83} 84 85type openBlock struct { 86 builder blockBuilder 87 inner []Block 88 pos Position 89} 90 91type itemBuilder struct { 92 list *listBuilder 93 width int 94 haveContent bool 95} 96 97func (p *parseState) last() Block { 98 ob := &p.stack[len(p.stack)-1] 99 return ob.inner[len(ob.inner)-1] 100} 101 102func (p *parseState) deleteLast() { 103 ob := &p.stack[len(p.stack)-1] 104 ob.inner = ob.inner[:len(ob.inner)-1] 105} 106 107type Text struct { 108 Position 109 Inline []Inline 110 raw string 111} 112 113func (b *Text) PrintHTML(buf *bytes.Buffer) { 114 for _, x := range b.Inline { 115 x.PrintHTML(buf) 116 } 117} 118 119func (b *Text) printMarkdown(buf *bytes.Buffer, s mdState) { 120 if s.prefix1 != "" { 121 buf.WriteString(s.prefix1) 122 } else { 123 buf.WriteString(s.prefix) 124 } 125 var prev Inline 126 for _, x := range b.Inline { 127 switch prev.(type) { 128 case *SoftBreak, *HardBreak: 129 buf.WriteString(s.prefix) 130 } 131 x.printMarkdown(buf) 132 prev = x 133 } 134 buf.WriteByte('\n') 135} 136 137type rootBuilder struct{} 138 139func (b *rootBuilder) build(p buildState) Block { 140 return &Document{p.pos(), p.blocks(), p.(*parseState).links} 141} 142 143type Document struct { 144 Position 145 Blocks []Block 146 Links map[string]*Link 147} 148 149// A Parser is a Markdown parser. 150// The exported fields in the struct can be filled in before calling 151// [Parser.Parse] in order to customize the details of the parsing process. 152// A Parser is safe for concurrent use by multiple goroutines. 153type Parser struct { 154 // HeadingIDs determines whether the parser accepts 155 // the {#hdr} syntax for an HTML id="hdr" attribute on headings. 156 // For example, if HeadingIDs is true then the Markdown 157 // ## Overview {#overview} 158 // will render as the HTML 159 // <h2 id="overview">Overview</h2> 160 HeadingIDs bool 161 162 // Strikethrough determines whether the parser accepts 163 // ~abc~ and ~~abc~~ as strikethrough syntax, producing 164 // <del>abc</del> in HTML. 165 Strikethrough bool 166 167 // TaskListItems determines whether the parser accepts 168 // “task list items” as defined in GitHub Flavored Markdown. 169 // When a list item begins with the plain text [ ] or [x] 170 // that turns into an unchecked or checked check box. 171 TaskListItems bool 172 173 // TODO 174 AutoLinkText bool 175 AutoLinkAssumeHTTP bool 176 177 // TODO 178 Table bool 179 180 // TODO 181 Emoji bool 182 183 // TODO 184 SmartDot bool 185 SmartDash bool 186 SmartQuote bool 187} 188 189type parseState struct { 190 *Parser 191 192 root *Document 193 links map[string]*Link 194 lineno int 195 stack []openBlock 196 lineDepth int 197 198 corner bool // noticed corner case to ignore in cross-implementation testing 199 200 // inlines 201 s string 202 emitted int // s[:emitted] has been emitted into list 203 list []Inline 204 205 // for fixup at end 206 lists []*List 207 texts []*Text 208 209 backticks backtickParser 210} 211 212func (p *parseState) newText(pos Position, text string) *Text { 213 b := &Text{Position: pos, raw: text} 214 p.texts = append(p.texts, b) 215 return b 216} 217 218func (p *parseState) blocks() []Block { 219 b := &p.stack[len(p.stack)-1] 220 return b.inner 221} 222 223func (p *parseState) pos() Position { 224 b := &p.stack[len(p.stack)-1] 225 return b.pos 226} 227 228func (p *Parser) Parse(text string) *Document { 229 d, _ := p.parse(text) 230 return d 231} 232 233func (p *Parser) parse(text string) (d *Document, corner bool) { 234 var ps parseState 235 ps.Parser = p 236 if strings.Contains(text, "\x00") { 237 text = strings.ReplaceAll(text, "\x00", "\uFFFD") 238 ps.corner = true // goldmark does not replace NUL 239 } 240 241 ps.lineDepth = -1 242 ps.addBlock(&rootBuilder{}) 243 for text != "" { 244 var ln string 245 i := strings.Index(text, "\n") 246 j := strings.Index(text, "\r") 247 var nl byte 248 switch { 249 case j >= 0 && (i < 0 || j < i): // have \r, maybe \r\n 250 ln = text[:j] 251 if i == j+1 { 252 text = text[j+2:] 253 nl = '\r' + '\n' 254 } else { 255 text = text[j+1:] 256 nl = '\r' 257 } 258 case i >= 0: 259 ln, text = text[:i], text[i+1:] 260 nl = '\n' 261 default: 262 ln, text = text, "" 263 } 264 ps.lineno++ 265 ps.addLine(line{text: ln, nl: nl}) 266 } 267 ps.trimStack(0) 268 269 for _, t := range ps.texts { 270 t.Inline = ps.inline(t.raw) 271 } 272 273 if p.TaskListItems { 274 for _, list := range ps.lists { 275 ps.taskList(list) 276 } 277 } 278 279 return ps.root, ps.corner 280} 281 282func (p *parseState) curB() blockBuilder { 283 if p.lineDepth < len(p.stack) { 284 return p.stack[p.lineDepth].builder 285 } 286 return nil 287} 288 289func (p *parseState) nextB() blockBuilder { 290 if p.lineDepth+1 < len(p.stack) { 291 return p.stack[p.lineDepth+1].builder 292 } 293 return nil 294} 295func (p *parseState) trimStack(depth int) { 296 if len(p.stack) < depth { 297 panic("trimStack") 298 } 299 for len(p.stack) > depth { 300 p.closeBlock() 301 } 302} 303 304func (p *parseState) addBlock(c blockBuilder) { 305 p.trimStack(p.lineDepth + 1) 306 p.stack = append(p.stack, openBlock{}) 307 ob := &p.stack[len(p.stack)-1] 308 ob.builder = c 309 ob.pos.StartLine = p.lineno 310 ob.pos.EndLine = p.lineno 311} 312 313func (p *parseState) doneBlock(b Block) { 314 p.trimStack(p.lineDepth + 1) 315 ob := &p.stack[len(p.stack)-1] 316 ob.inner = append(ob.inner, b) 317} 318 319func (p *parseState) para() *paraBuilder { 320 if b, ok := p.stack[len(p.stack)-1].builder.(*paraBuilder); ok { 321 return b 322 } 323 return nil 324} 325 326func (p *parseState) closeBlock() Block { 327 b := &p.stack[len(p.stack)-1] 328 if b.builder == nil { 329 println("closeBlock", len(p.stack)-1) 330 } 331 blk := b.builder.build(p) 332 if list, ok := blk.(*List); ok { 333 p.corner = p.corner || listCorner(list) 334 if p.TaskListItems { 335 p.lists = append(p.lists, list) 336 } 337 } 338 p.stack = p.stack[:len(p.stack)-1] 339 if len(p.stack) > 0 { 340 b := &p.stack[len(p.stack)-1] 341 b.inner = append(b.inner, blk) 342 // _ = b 343 } else { 344 p.root = blk.(*Document) 345 } 346 return blk 347} 348 349func (p *parseState) link(label string) *Link { 350 return p.links[label] 351} 352 353func (p *parseState) defineLink(label string, link *Link) { 354 if p.links == nil { 355 p.links = make(map[string]*Link) 356 } 357 p.links[label] = link 358} 359 360type line struct { 361 spaces int 362 i int 363 tab int 364 text string 365 nl byte // newline character ending this line: \r or \n or zero for EOF 366} 367 368func (p *parseState) addLine(s line) { 369 // Process continued prefixes. 370 p.lineDepth = 0 371 for ; p.lineDepth+1 < len(p.stack); p.lineDepth++ { 372 old := s 373 var ok bool 374 s, ok = p.stack[p.lineDepth+1].builder.extend(p, s) 375 if !old.isBlank() && (ok || s != old) { 376 p.stack[p.lineDepth+1].pos.EndLine = p.lineno 377 } 378 if !ok { 379 break 380 } 381 } 382 383 if s.isBlank() { 384 p.trimStack(p.lineDepth + 1) 385 return 386 } 387 388 // Process new prefixes, if any. 389Prefixes: 390 // Start new block inside p.stack[depth]. 391 for _, fn := range news { 392 if l, ok := fn(p, s); ok { 393 s = l 394 if s.isBlank() { 395 return 396 } 397 p.lineDepth++ 398 goto Prefixes 399 } 400 } 401 402 newPara(p, s) 403} 404 405func (c *rootBuilder) extend(p *parseState, s line) (line, bool) { 406 panic("root extend") 407} 408 409var news = []func(*parseState, line) (line, bool){ 410 newQuote, 411 newATXHeading, 412 newSetextHeading, 413 newHR, 414 newListItem, 415 newHTML, 416 newFence, 417 newPre, 418} 419 420func (s *line) peek() byte { 421 if s.spaces > 0 { 422 return ' ' 423 } 424 if s.i >= len(s.text) { 425 return 0 426 } 427 return s.text[s.i] 428} 429 430func (s *line) skipSpace() { 431 s.spaces = 0 432 for s.i < len(s.text) && (s.text[s.i] == ' ' || s.text[s.i] == '\t') { 433 s.i++ 434 } 435} 436 437func (s *line) trimSpace(min, max int, eolOK bool) bool { 438 t := *s 439 for n := 0; n < max; n++ { 440 if t.spaces > 0 { 441 t.spaces-- 442 continue 443 } 444 if t.i >= len(t.text) && eolOK { 445 continue 446 } 447 if t.i < len(t.text) { 448 switch t.text[t.i] { 449 case '\t': 450 t.spaces = 4 - (t.i-t.tab)&3 - 1 451 t.i++ 452 t.tab = t.i 453 continue 454 case ' ': 455 t.i++ 456 continue 457 } 458 } 459 if n >= min { 460 break 461 } 462 return false 463 } 464 *s = t 465 return true 466} 467 468func (s *line) trim(c byte) bool { 469 if s.spaces > 0 { 470 if c == ' ' { 471 s.spaces-- 472 return true 473 } 474 return false 475 } 476 if s.i < len(s.text) && s.text[s.i] == c { 477 s.i++ 478 return true 479 } 480 return false 481} 482 483func (s *line) string() string { 484 switch s.spaces { 485 case 0: 486 return s.text[s.i:] 487 case 1: 488 return " " + s.text[s.i:] 489 case 2: 490 return " " + s.text[s.i:] 491 case 3: 492 return " " + s.text[s.i:] 493 } 494 panic("bad spaces") 495} 496 497func trimLeftSpaceTab(s string) string { 498 i := 0 499 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 500 i++ 501 } 502 return s[i:] 503} 504 505func trimRightSpaceTab(s string) string { 506 j := len(s) 507 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { 508 j-- 509 } 510 return s[:j] 511} 512 513func trimSpaceTab(s string) string { 514 i := 0 515 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 516 i++ 517 } 518 s = s[i:] 519 j := len(s) 520 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { 521 j-- 522 } 523 return s[:j] 524} 525 526func trimSpace(s string) string { 527 i := 0 528 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 529 i++ 530 } 531 s = s[i:] 532 j := len(s) 533 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') { 534 j-- 535 } 536 return s[:j] 537} 538 539func trimSpaceTabNewline(s string) string { 540 i := 0 541 for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') { 542 i++ 543 } 544 s = s[i:] 545 j := len(s) 546 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { 547 j-- 548 } 549 return s[:j] 550} 551 552func (s *line) isBlank() bool { 553 return trimLeftSpaceTab(s.text[s.i:]) == "" 554} 555 556func (s *line) eof() bool { 557 return s.i >= len(s.text) 558} 559 560func (s *line) trimSpaceString() string { 561 return trimLeftSpaceTab(s.text[s.i:]) 562} 563 564func (s *line) trimString() string { 565 return trimSpaceTab(s.text[s.i:]) 566} 567 568func ToHTML(b Block) string { 569 var buf bytes.Buffer 570 b.PrintHTML(&buf) 571 return buf.String() 572} 573 574func ToMarkdown(b Block) string { 575 var buf bytes.Buffer 576 b.printMarkdown(&buf, mdState{}) 577 s := buf.String() 578 // Remove final extra newline. 579 if strings.HasSuffix(s, "\n\n") { 580 s = s[:len(s)-1] 581 } 582 return s 583} 584 585func (b *Document) PrintHTML(buf *bytes.Buffer) { 586 for _, c := range b.Blocks { 587 c.PrintHTML(buf) 588 } 589} 590 591func (b *Document) printMarkdown(buf *bytes.Buffer, s mdState) { 592 printMarkdownBlocks(b.Blocks, buf, s) 593 // Print links sorted by keys for deterministic output. 594 var keys []string 595 for k := range b.Links { 596 keys = append(keys, k) 597 } 598 slices.Sort(keys) 599 for _, k := range keys { 600 l := b.Links[k] 601 fmt.Fprintf(buf, "[%s]: %s", k, l.URL) 602 printLinkTitleMarkdown(buf, l.Title, l.TitleChar) 603 buf.WriteByte('\n') 604 } 605} 606 607func printMarkdownBlocks(bs []Block, buf *bytes.Buffer, s mdState) { 608 prevEnd := 0 609 for _, b := range bs { 610 // Preserve blank lines between blocks. 611 if prevEnd > 0 { 612 for i := prevEnd + 1; i < b.Pos().StartLine; i++ { 613 buf.WriteString(trimRightSpaceTab(s.prefix)) 614 buf.WriteByte('\n') 615 } 616 } 617 b.printMarkdown(buf, s) 618 prevEnd = b.Pos().EndLine 619 s.prefix1 = "" // item prefix only for first block 620 } 621} 622 623var ( 624 blockType = reflect.TypeOf(new(Block)).Elem() 625 blocksType = reflect.TypeOf(new([]Block)).Elem() 626 inlinesType = reflect.TypeOf(new([]Inline)).Elem() 627) 628 629func printb(buf *bytes.Buffer, b Block, prefix string) { 630 fmt.Fprintf(buf, "(%T", b) 631 v := reflect.ValueOf(b) 632 v = reflect.Indirect(v) 633 if v.Kind() != reflect.Struct { 634 fmt.Fprintf(buf, " %v", b) 635 } 636 t := v.Type() 637 for i := 0; i < t.NumField(); i++ { 638 tf := t.Field(i) 639 if !tf.IsExported() { 640 continue 641 } 642 if tf.Type == inlinesType { 643 printis(buf, v.Field(i).Interface().([]Inline)) 644 } else if tf.Type.Kind() == reflect.Slice && tf.Type.Elem().Kind() == reflect.String { 645 fmt.Fprintf(buf, " %s:%q", tf.Name, v.Field(i)) 646 } else if tf.Type != blocksType && !tf.Type.Implements(blockType) && tf.Type.Kind() != reflect.Slice { 647 fmt.Fprintf(buf, " %s:%v", tf.Name, v.Field(i)) 648 } 649 } 650 651 prefix += "\t" 652 for i := 0; i < t.NumField(); i++ { 653 tf := t.Field(i) 654 if !tf.IsExported() { 655 continue 656 } 657 if tf.Type.Implements(blockType) { 658 fmt.Fprintf(buf, "\n%s", prefix) 659 printb(buf, v.Field(i).Interface().(Block), prefix) 660 } else if tf.Type == blocksType { 661 vf := v.Field(i) 662 for i := 0; i < vf.Len(); i++ { 663 fmt.Fprintf(buf, "\n%s", prefix) 664 printb(buf, vf.Index(i).Interface().(Block), prefix) 665 } 666 } else if tf.Type.Kind() == reflect.Slice && tf.Type != inlinesType && tf.Type.Elem().Kind() != reflect.String { 667 fmt.Fprintf(buf, "\n%s%s:", prefix, t.Field(i).Name) 668 printslice(buf, v.Field(i), prefix) 669 } 670 } 671 fmt.Fprintf(buf, ")") 672} 673 674func printslice(buf *bytes.Buffer, v reflect.Value, prefix string) { 675 if v.Type().Elem().Kind() == reflect.Slice { 676 for i := 0; i < v.Len(); i++ { 677 fmt.Fprintf(buf, "\n%s#%d:", prefix, i) 678 printslice(buf, v.Index(i), prefix+"\t") 679 } 680 return 681 } 682 for i := 0; i < v.Len(); i++ { 683 fmt.Fprintf(buf, " ") 684 printb(buf, v.Index(i).Interface().(Block), prefix+"\t") 685 } 686} 687 688func printi(buf *bytes.Buffer, in Inline) { 689 fmt.Fprintf(buf, "%T(", in) 690 v := reflect.ValueOf(in).Elem() 691 text := v.FieldByName("Text") 692 if text.IsValid() { 693 fmt.Fprintf(buf, "%q", text) 694 } 695 inner := v.FieldByName("Inner") 696 if inner.IsValid() { 697 printis(buf, inner.Interface().([]Inline)) 698 } 699 buf.WriteString(")") 700} 701 702func printis(buf *bytes.Buffer, ins []Inline) { 703 for _, in := range ins { 704 buf.WriteByte(' ') 705 printi(buf, in) 706 } 707} 708 709func dump(b Block) string { 710 var buf bytes.Buffer 711 printb(&buf, b, "") 712 return buf.String() 713} 714