1// Copyright 2021 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package markdown 6 7import ( 8 "bytes" 9 "fmt" 10 "strings" 11 "unicode" 12 "unicode/utf8" 13) 14 15/* 16text node can be 17 18 - other literal text 19 - run of * or _ characters 20 - [ 21 - ![ 22 23keep delimiter stack pointing at non-other literal text 24each node contains 25 26 - type of delimiter [ ![ _ * 27 - number of delimiters 28 - active or not 29 - potential opener, potential closer, or obth 30 31when a ] is hit, call look for link or image 32when end is hit, call process emphasis 33 34look for link or image: 35 36 find topmost [ or ![ 37 if none, emit literal ] 38 if its inactive, remove and emit literal ] 39 parse ahead to look for rest of link; if none, remove and emit literal ] 40 run process emphasis on the interior, 41 remove opener 42 if this was a link (not an image), set all [ before opener to inactive, to avoid links inside links 43 44process emphasis 45 46 walk forward in list to find a closer. 47 walk back to find first potential matching opener. 48 if found: 49 strong for length >= 2 50 insert node 51 drop delimiters between opener and closer 52 remove 1 or 2 from open/close count, removing if now empty 53 if closing has some left, go around again on this node 54 if not: 55 set openers bottom for this kind of element to before current_position 56 if the closer at current pos is not an opener, remove it 57 58seems needlessly complex. two passes 59 60scan and find ` ` first. 61 62pass 1. scan and find [ and ]() and leave the rest alone. 63 64each completed one invokes emphasis on inner text and then on the overall list. 65 66*/ 67 68type Inline interface { 69 PrintHTML(*bytes.Buffer) 70 PrintText(*bytes.Buffer) 71 printMarkdown(*bytes.Buffer) 72} 73 74type Plain struct { 75 Text string 76} 77 78func (*Plain) Inline() {} 79 80func (x *Plain) PrintHTML(buf *bytes.Buffer) { 81 htmlEscaper.WriteString(buf, x.Text) 82} 83 84func (x *Plain) printMarkdown(buf *bytes.Buffer) { 85 buf.WriteString(x.Text) 86} 87 88func (x *Plain) PrintText(buf *bytes.Buffer) { 89 htmlEscaper.WriteString(buf, x.Text) 90} 91 92type openPlain struct { 93 Plain 94 i int // position in input where bracket is 95} 96 97type emphPlain struct { 98 Plain 99 canOpen bool 100 canClose bool 101 i int // position in output where emph is 102 n int // length of original span 103} 104 105type Escaped struct { 106 Plain 107} 108 109func (x *Escaped) printMarkdown(buf *bytes.Buffer) { 110 buf.WriteByte('\\') 111 x.Plain.printMarkdown(buf) 112} 113 114type Code struct { 115 Text string 116} 117 118func (*Code) Inline() {} 119 120func (x *Code) PrintHTML(buf *bytes.Buffer) { 121 fmt.Fprintf(buf, "<code>%s</code>", htmlEscaper.Replace(x.Text)) 122} 123 124func (x *Code) printMarkdown(buf *bytes.Buffer) { 125 if len(x.Text) == 0 { 126 return 127 } 128 // Use the fewest backticks we can, and add spaces as needed. 129 ticks := strings.Repeat("`", longestSequence(x.Text, '`')+1) 130 buf.WriteString(ticks) 131 if x.Text[0] == '`' { 132 buf.WriteByte(' ') 133 } 134 buf.WriteString(x.Text) 135 if x.Text[len(x.Text)-1] == '`' { 136 buf.WriteByte(' ') 137 } 138 buf.WriteString(ticks) 139} 140 141// longestSequence returns the length of the longest sequence of consecutive bytes b in s. 142func longestSequence(s string, b byte) int { 143 max := 0 144 cur := 0 145 for i := range s { 146 if s[i] == b { 147 cur++ 148 } else { 149 if cur > max { 150 max = cur 151 } 152 cur = 0 153 } 154 } 155 if cur > max { 156 max = cur 157 } 158 return max 159} 160 161func (x *Code) PrintText(buf *bytes.Buffer) { 162 htmlEscaper.WriteString(buf, x.Text) 163} 164 165type Strong struct { 166 Marker string 167 Inner []Inline 168} 169 170func (x *Strong) Inline() { 171} 172 173func (x *Strong) PrintHTML(buf *bytes.Buffer) { 174 buf.WriteString("<strong>") 175 for _, c := range x.Inner { 176 c.PrintHTML(buf) 177 } 178 buf.WriteString("</strong>") 179} 180 181func (x *Strong) printMarkdown(buf *bytes.Buffer) { 182 buf.WriteString(x.Marker) 183 for _, c := range x.Inner { 184 c.printMarkdown(buf) 185 } 186 buf.WriteString(x.Marker) 187} 188 189func (x *Strong) PrintText(buf *bytes.Buffer) { 190 for _, c := range x.Inner { 191 c.PrintText(buf) 192 } 193} 194 195type Del struct { 196 Marker string 197 Inner []Inline 198} 199 200func (x *Del) Inline() { 201 202} 203 204func (x *Del) PrintHTML(buf *bytes.Buffer) { 205 buf.WriteString("<del>") 206 for _, c := range x.Inner { 207 c.PrintHTML(buf) 208 } 209 buf.WriteString("</del>") 210} 211 212func (x *Del) printMarkdown(buf *bytes.Buffer) { 213 buf.WriteString(x.Marker) 214 for _, c := range x.Inner { 215 c.printMarkdown(buf) 216 } 217 buf.WriteString(x.Marker) 218} 219 220func (x *Del) PrintText(buf *bytes.Buffer) { 221 for _, c := range x.Inner { 222 c.PrintText(buf) 223 } 224} 225 226type Emph struct { 227 Marker string 228 Inner []Inline 229} 230 231func (*Emph) Inline() {} 232 233func (x *Emph) PrintHTML(buf *bytes.Buffer) { 234 buf.WriteString("<em>") 235 for _, c := range x.Inner { 236 c.PrintHTML(buf) 237 } 238 buf.WriteString("</em>") 239} 240 241func (x *Emph) printMarkdown(buf *bytes.Buffer) { 242 buf.WriteString(x.Marker) 243 for _, c := range x.Inner { 244 c.printMarkdown(buf) 245 } 246 buf.WriteString(x.Marker) 247} 248 249func (x *Emph) PrintText(buf *bytes.Buffer) { 250 for _, c := range x.Inner { 251 c.PrintText(buf) 252 } 253} 254 255func (p *parseState) emit(i int) { 256 if p.emitted < i { 257 p.list = append(p.list, &Plain{p.s[p.emitted:i]}) 258 p.emitted = i 259 } 260} 261 262func (p *parseState) skip(i int) { 263 p.emitted = i 264} 265 266func (p *parseState) inline(s string) []Inline { 267 s = trimSpaceTab(s) 268 // Scan text looking for inlines. 269 // Leaf inlines are converted immediately. 270 // Non-leaf inlines have potential starts pushed on a stack while we await completion. 271 // Links take priority over other emphasis, so the emphasis must be delayed. 272 p.s = s 273 p.list = nil 274 p.emitted = 0 275 var opens []int // indexes of open ![ and [ Plains in p.list 276 var lastLinkOpen int 277 backticks := false 278 i := 0 279 for i < len(s) { 280 var parser func(*parseState, string, int) (Inline, int, int, bool) 281 switch s[i] { 282 case '\\': 283 parser = parseEscape 284 case '`': 285 if !backticks { 286 backticks = true 287 p.backticks.reset() 288 } 289 parser = p.backticks.parseCodeSpan 290 case '<': 291 parser = parseAutoLinkOrHTML 292 case '[': 293 parser = parseLinkOpen 294 case '!': 295 parser = parseImageOpen 296 case '_', '*': 297 parser = parseEmph 298 case '.': 299 if p.SmartDot { 300 parser = parseDot 301 } 302 case '-': 303 if p.SmartDash { 304 parser = parseDash 305 } 306 case '"', '\'': 307 if p.SmartQuote { 308 parser = parseEmph 309 } 310 case '~': 311 if p.Strikethrough { 312 parser = parseEmph 313 } 314 case '\n': // TODO what about eof 315 parser = parseBreak 316 case '&': 317 parser = parseHTMLEntity 318 case ':': 319 if p.Emoji { 320 parser = parseEmoji 321 } 322 } 323 if parser != nil { 324 if x, start, end, ok := parser(p, s, i); ok { 325 p.emit(start) 326 if _, ok := x.(*openPlain); ok { 327 opens = append(opens, len(p.list)) 328 } 329 p.list = append(p.list, x) 330 i = end 331 p.skip(i) 332 continue 333 } 334 } 335 if s[i] == ']' && len(opens) > 0 { 336 oi := opens[len(opens)-1] 337 open := p.list[oi].(*openPlain) 338 opens = opens[:len(opens)-1] 339 if open.Text[0] == '!' || lastLinkOpen <= open.i { 340 if x, end, ok := p.parseLinkClose(s, i, open); ok { 341 p.corner = p.corner || x.corner || linkCorner(x.URL) 342 p.emit(i) 343 x.Inner = p.emph(nil, p.list[oi+1:]) 344 if open.Text[0] == '!' { 345 p.list[oi] = (*Image)(x) 346 } else { 347 p.list[oi] = x 348 } 349 p.list = p.list[:oi+1] 350 p.skip(end) 351 i = end 352 if open.Text[0] == '[' { 353 // No links around links. 354 lastLinkOpen = open.i 355 } 356 continue 357 } 358 } 359 } 360 i++ 361 } 362 p.emit(len(s)) 363 p.list = p.emph(p.list[:0], p.list) 364 p.list = p.mergePlain(p.list) 365 p.list = p.autoLinkText(p.list) 366 367 return p.list 368} 369 370func (ps *parseState) emph(dst, src []Inline) []Inline { 371 const chars = "_*~\"'" 372 var stack [len(chars)][]*emphPlain 373 stackOf := func(c byte) int { 374 return strings.IndexByte(chars, c) 375 } 376 377 trimStack := func() { 378 for i := range stack { 379 stk := &stack[i] 380 for len(*stk) > 0 && (*stk)[len(*stk)-1].i >= len(dst) { 381 *stk = (*stk)[:len(*stk)-1] 382 } 383 } 384 } 385 386Src: 387 for i := 0; i < len(src); i++ { 388 if open, ok := src[i].(*openPlain); ok { 389 // Convert unused link/image open marker to plain text. 390 dst = append(dst, &open.Plain) 391 continue 392 } 393 p, ok := src[i].(*emphPlain) 394 if !ok { 395 dst = append(dst, src[i]) 396 continue 397 } 398 if p.canClose { 399 stk := &stack[stackOf(p.Text[0])] 400 Loop: 401 for p.Text != "" { 402 // Looking for same symbol and compatible with p.Text. 403 for i := len(*stk) - 1; i >= 0; i-- { 404 start := (*stk)[i] 405 if (p.Text[0] == '*' || p.Text[0] == '_') && (p.canOpen && p.canClose || start.canOpen && start.canClose) && (p.n+start.n)%3 == 0 && (p.n%3 != 0 || start.n%3 != 0) { 406 continue 407 } 408 if p.Text[0] == '~' && len(p.Text) != len(start.Text) { // ~ matches ~, ~~ matches ~~ 409 continue 410 } 411 if p.Text[0] == '"' { 412 dst[start.i].(*emphPlain).Text = "“" 413 p.Text = "”" 414 dst = append(dst, p) 415 *stk = (*stk)[:i] 416 // no trimStack 417 continue Src 418 } 419 if p.Text[0] == '\'' { 420 dst[start.i].(*emphPlain).Text = "‘" 421 p.Text = "’" 422 dst = append(dst, p) 423 *stk = (*stk)[:i] 424 // no trimStack 425 continue Src 426 } 427 var d int 428 if len(p.Text) >= 2 && len(start.Text) >= 2 { 429 // strong 430 d = 2 431 } else { 432 // emph 433 d = 1 434 } 435 del := p.Text[0] == '~' 436 x := &Emph{Marker: p.Text[:d], Inner: append([]Inline(nil), dst[start.i+1:]...)} 437 start.Text = start.Text[:len(start.Text)-d] 438 p.Text = p.Text[d:] 439 if start.Text == "" { 440 dst = dst[:start.i] 441 } else { 442 dst = dst[:start.i+1] 443 } 444 trimStack() 445 if del { 446 dst = append(dst, (*Del)(x)) 447 } else if d == 2 { 448 dst = append(dst, (*Strong)(x)) 449 } else { 450 dst = append(dst, x) 451 } 452 continue Loop 453 } 454 break 455 } 456 } 457 if p.Text != "" { 458 stk := &stack[stackOf(p.Text[0])] 459 if p.Text == "'" { 460 p.Text = "’" 461 } 462 if p.Text == "\"" { 463 if p.canClose { 464 p.Text = "”" 465 } else { 466 p.Text = "“" 467 } 468 } 469 if p.canOpen { 470 p.i = len(dst) 471 dst = append(dst, p) 472 *stk = append(*stk, p) 473 } else { 474 dst = append(dst, &p.Plain) 475 } 476 } 477 } 478 return dst 479} 480 481func mdUnescape(s string) string { 482 if !strings.Contains(s, `\`) && !strings.Contains(s, `&`) { 483 return s 484 } 485 return mdUnescaper.Replace(s) 486} 487 488var mdUnescaper = func() *strings.Replacer { 489 var list = []string{ 490 `\!`, `!`, 491 `\"`, `"`, 492 `\#`, `#`, 493 `\$`, `$`, 494 `\%`, `%`, 495 `\&`, `&`, 496 `\'`, `'`, 497 `\(`, `(`, 498 `\)`, `)`, 499 `\*`, `*`, 500 `\+`, `+`, 501 `\,`, `,`, 502 `\-`, `-`, 503 `\.`, `.`, 504 `\/`, `/`, 505 `\:`, `:`, 506 `\;`, `;`, 507 `\<`, `<`, 508 `\=`, `=`, 509 `\>`, `>`, 510 `\?`, `?`, 511 `\@`, `@`, 512 `\[`, `[`, 513 `\\`, `\`, 514 `\]`, `]`, 515 `\^`, `^`, 516 `\_`, `_`, 517 "\\`", "`", 518 `\{`, `{`, 519 `\|`, `|`, 520 `\}`, `}`, 521 `\~`, `~`, 522 } 523 524 for name, repl := range htmlEntity { 525 list = append(list, name, repl) 526 } 527 return strings.NewReplacer(list...) 528}() 529 530func isPunct(c byte) bool { 531 return '!' <= c && c <= '/' || ':' <= c && c <= '@' || '[' <= c && c <= '`' || '{' <= c && c <= '~' 532} 533 534func parseEscape(p *parseState, s string, i int) (Inline, int, int, bool) { 535 if i+1 < len(s) { 536 c := s[i+1] 537 if isPunct(c) { 538 return &Escaped{Plain{s[i+1 : i+2]}}, i, i + 2, true 539 } 540 if c == '\n' { // TODO what about eof 541 if i > 0 && s[i-1] == '\\' { 542 p.corner = true // goldmark mishandles \\\ newline 543 } 544 end := i + 2 545 for end < len(s) && (s[end] == ' ' || s[end] == '\t') { 546 end++ 547 } 548 return &HardBreak{}, i, end, true 549 } 550 } 551 return nil, 0, 0, false 552} 553 554func parseDot(p *parseState, s string, i int) (Inline, int, int, bool) { 555 if i+2 < len(s) && s[i+1] == '.' && s[i+2] == '.' { 556 return &Plain{"…"}, i, i + 3, true 557 } 558 return nil, 0, 0, false 559} 560 561func parseDash(p *parseState, s string, i int) (Inline, int, int, bool) { 562 if i+1 >= len(s) || s[i+1] != '-' { 563 return nil, 0, 0, false 564 } 565 566 n := 2 567 for i+n < len(s) && s[i+n] == '-' { 568 n++ 569 } 570 571 // Mimic cmark-gfm. Can't make this stuff up. 572 em, en := 0, 0 573 switch { 574 case n%3 == 0: 575 em = n / 3 576 case n%2 == 0: 577 en = n / 2 578 case n%3 == 2: 579 em = (n - 2) / 3 580 en = 1 581 case n%3 == 1: 582 em = (n - 4) / 3 583 en = 2 584 } 585 return &Plain{strings.Repeat("—", em) + strings.Repeat("–", en)}, i, i + n, true 586} 587 588// Inline code span markers must fit on punched cards, to match cmark-gfm. 589const maxBackticks = 80 590 591type backtickParser struct { 592 last [maxBackticks]int 593 scanned bool 594} 595 596func (b *backtickParser) reset() { 597 *b = backtickParser{} 598} 599 600func (b *backtickParser) parseCodeSpan(p *parseState, s string, i int) (Inline, int, int, bool) { 601 start := i 602 // Count leading backticks. Need to find that many again. 603 n := 1 604 for i+n < len(s) && s[i+n] == '`' { 605 n++ 606 } 607 608 // If we've already scanned the whole string (for a different count), 609 // we can skip a failed scan by checking whether we saw this count. 610 // To enable this optimization, following cmark-gfm, we declare by fiat 611 // that more than maxBackticks backquotes is too many. 612 if n > len(b.last) || b.scanned && b.last[n-1] < i+n { 613 goto NoMatch 614 } 615 616 for end := i + n; end < len(s); { 617 if s[end] != '`' { 618 end++ 619 continue 620 } 621 estart := end 622 for end < len(s) && s[end] == '`' { 623 end++ 624 } 625 m := end - estart 626 if !b.scanned && m < len(b.last) { 627 b.last[m-1] = estart 628 } 629 if m == n { 630 // Match. 631 // Line endings are converted to single spaces. 632 text := s[i+n : estart] 633 text = strings.ReplaceAll(text, "\n", " ") 634 635 // If enclosed text starts and ends with a space and is not all spaces, 636 // one space is removed from start and end, to allow `` ` `` to quote a single backquote. 637 if len(text) >= 2 && text[0] == ' ' && text[len(text)-1] == ' ' && trimSpace(text) != "" { 638 text = text[1 : len(text)-1] 639 } 640 641 return &Code{text}, start, end, true 642 } 643 } 644 b.scanned = true 645 646NoMatch: 647 // No match, so none of these backticks count: skip them all. 648 // For example ``x` is not a single backtick followed by a code span. 649 // Returning nil, 0, false would advance to the second backtick and try again. 650 return &Plain{s[i : i+n]}, start, i + n, true 651} 652 653func parseAutoLinkOrHTML(p *parseState, s string, i int) (Inline, int, int, bool) { 654 if x, end, ok := parseAutoLinkURI(s, i); ok { 655 return x, i, end, true 656 } 657 if x, end, ok := parseAutoLinkEmail(s, i); ok { 658 return x, i, end, true 659 } 660 if x, end, ok := parseHTMLTag(p, s, i); ok { 661 return x, i, end, true 662 } 663 return nil, 0, 0, false 664} 665 666func isLetter(c byte) bool { 667 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' 668} 669 670func isLDH(c byte) bool { 671 return isLetterDigit(c) || c == '-' 672} 673 674func isLetterDigit(c byte) bool { 675 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' 676} 677 678func parseLinkOpen(_ *parseState, s string, i int) (Inline, int, int, bool) { 679 return &openPlain{Plain{s[i : i+1]}, i + 1}, i, i + 1, true 680} 681 682func parseImageOpen(_ *parseState, s string, i int) (Inline, int, int, bool) { 683 if i+1 < len(s) && s[i+1] == '[' { 684 return &openPlain{Plain{s[i : i+2]}, i + 2}, i, i + 2, true 685 } 686 return nil, 0, 0, false 687} 688 689func parseEmph(p *parseState, s string, i int) (Inline, int, int, bool) { 690 c := s[i] 691 j := i + 1 692 if c == '*' || c == '~' || c == '_' { 693 for j < len(s) && s[j] == c { 694 j++ 695 } 696 } 697 if c == '~' && j-i != 2 { 698 // Goldmark does not accept ~text~ 699 // and incorrectly accepts ~~~text~~~. 700 // Only ~~ is correct. 701 p.corner = true 702 } 703 if c == '~' && j-i > 2 { 704 return &Plain{s[i:j]}, i, j, true 705 } 706 707 var before, after rune 708 if i == 0 { 709 before = ' ' 710 } else { 711 before, _ = utf8.DecodeLastRuneInString(s[:i]) 712 } 713 if j >= len(s) { 714 after = ' ' 715 } else { 716 after, _ = utf8.DecodeRuneInString(s[j:]) 717 } 718 719 // “A left-flanking delimiter run is a delimiter run that is 720 // (1) not followed by Unicode whitespace, and either 721 // (2a) not followed by a Unicode punctuation character, or 722 // (2b) followed by a Unicode punctuation character 723 // and preceded by Unicode whitespace or a Unicode punctuation character. 724 // For purposes of this definition, the beginning and the end 725 // of the line count as Unicode whitespace.” 726 leftFlank := !isUnicodeSpace(after) && 727 (!isUnicodePunct(after) || isUnicodeSpace(before) || isUnicodePunct(before)) 728 729 // “A right-flanking delimiter run is a delimiter run that is 730 // (1) not preceded by Unicode whitespace, and either 731 // (2a) not preceded by a Unicode punctuation character, or 732 // (2b) preceded by a Unicode punctuation character 733 // and followed by Unicode whitespace or a Unicode punctuation character. 734 // For purposes of this definition, the beginning and the end 735 // of the line count as Unicode whitespace.” 736 rightFlank := !isUnicodeSpace(before) && 737 (!isUnicodePunct(before) || isUnicodeSpace(after) || isUnicodePunct(after)) 738 739 var canOpen, canClose bool 740 741 switch c { 742 case '\'', '"': 743 canOpen = leftFlank && !rightFlank && before != ']' && before != ')' 744 canClose = rightFlank 745 case '*', '~': 746 // “A single * character can open emphasis iff 747 // it is part of a left-flanking delimiter run.” 748 749 // “A double ** can open strong emphasis iff 750 // it is part of a left-flanking delimiter run.” 751 canOpen = leftFlank 752 753 // “A single * character can close emphasis iff 754 // it is part of a right-flanking delimiter run.” 755 756 // “A double ** can close strong emphasis iff 757 // it is part of a right-flanking delimiter run.” 758 canClose = rightFlank 759 case '_': 760 // “A single _ character can open emphasis iff 761 // it is part of a left-flanking delimiter run and either 762 // (a) not part of a right-flanking delimiter run or 763 // (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.” 764 765 // “A double __ can open strong emphasis iff 766 // it is part of a left-flanking delimiter run and either 767 // (a) not part of a right-flanking delimiter run or 768 // (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.” 769 canOpen = leftFlank && (!rightFlank || isUnicodePunct(before)) 770 771 // “A single _ character can close emphasis iff 772 // it is part of a right-flanking delimiter run and either 773 // (a) not part of a left-flanking delimiter run or 774 // (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.” 775 776 // “A double __ can close strong emphasis iff 777 // it is part of a right-flanking delimiter run and either 778 // (a) not part of a left-flanking delimiter run or 779 // (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.” 780 canClose = rightFlank && (!leftFlank || isUnicodePunct(after)) 781 } 782 783 return &emphPlain{Plain: Plain{s[i:j]}, canOpen: canOpen, canClose: canClose, n: j - i}, i, j, true 784} 785 786func isUnicodeSpace(r rune) bool { 787 if r < 0x80 { 788 return r == ' ' || r == '\t' || r == '\f' || r == '\n' 789 } 790 return unicode.In(r, unicode.Zs) 791} 792 793func isUnicodePunct(r rune) bool { 794 if r < 0x80 { 795 return isPunct(byte(r)) 796 } 797 return unicode.In(r, unicode.Punct) 798} 799 800func (p *parseState) parseLinkClose(s string, i int, open *openPlain) (*Link, int, bool) { 801 if i+1 < len(s) { 802 switch s[i+1] { 803 case '(': 804 // Inline link - [Text](Dest Title), with Title omitted or both Dest and Title omitted. 805 i := skipSpace(s, i+2) 806 var dest, title string 807 var titleChar byte 808 var corner bool 809 if i < len(s) && s[i] != ')' { 810 var ok bool 811 dest, i, ok = parseLinkDest(s, i) 812 if !ok { 813 break 814 } 815 i = skipSpace(s, i) 816 if i < len(s) && s[i] != ')' { 817 title, titleChar, i, ok = parseLinkTitle(s, i) 818 if title == "" { 819 corner = true 820 } 821 if !ok { 822 break 823 } 824 i = skipSpace(s, i) 825 } 826 } 827 if i < len(s) && s[i] == ')' { 828 return &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner}, i + 1, true 829 } 830 // NOTE: Test malformed ( ) with shortcut reference 831 // TODO fall back on syntax error? 832 833 case '[': 834 // Full reference link - [Text][Label] 835 label, i, ok := parseLinkLabel(p, s, i+1) 836 if !ok { 837 break 838 } 839 if link, ok := p.links[normalizeLabel(label)]; ok { 840 return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, i, true 841 } 842 // Note: Could break here, but CommonMark dingus does not 843 // fall back to trying Text for [Text][Label] when Label is unknown. 844 // Unclear from spec what the correct answer is. 845 return nil, 0, false 846 } 847 } 848 849 // Collapsed or shortcut reference link: [Text][] or [Text]. 850 end := i + 1 851 if strings.HasPrefix(s[end:], "[]") { 852 end += 2 853 } 854 855 if link, ok := p.links[normalizeLabel(s[open.i:i])]; ok { 856 return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, end, true 857 } 858 return nil, 0, false 859} 860 861func skipSpace(s string, i int) int { 862 // Note: Blank lines have already been removed. 863 for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') { 864 i++ 865 } 866 return i 867} 868 869func linkCorner(url string) bool { 870 for i := 0; i < len(url); i++ { 871 if url[i] == '%' { 872 if i+2 >= len(url) || !isHexDigit(url[i+1]) || !isHexDigit(url[i+2]) { 873 // Goldmark and the Dingus re-escape such percents as %25, 874 // but the spec does not seem to require this behavior. 875 return true 876 } 877 } 878 } 879 return false 880} 881 882func (p *parseState) mergePlain(list []Inline) []Inline { 883 out := list[:0] 884 start := 0 885 for i := 0; ; i++ { 886 if i < len(list) && toPlain(list[i]) != nil { 887 continue 888 } 889 // Non-Plain or end of list. 890 if start < i { 891 out = append(out, mergePlain1(list[start:i])) 892 } 893 if i >= len(list) { 894 break 895 } 896 out = append(out, list[i]) 897 start = i + 1 898 } 899 return out 900} 901 902func toPlain(x Inline) *Plain { 903 // TODO what about Escaped? 904 switch x := x.(type) { 905 case *Plain: 906 return x 907 case *emphPlain: 908 return &x.Plain 909 case *openPlain: 910 return &x.Plain 911 } 912 return nil 913} 914 915func mergePlain1(list []Inline) *Plain { 916 if len(list) == 1 { 917 return toPlain(list[0]) 918 } 919 var all []string 920 for _, pl := range list { 921 all = append(all, toPlain(pl).Text) 922 } 923 return &Plain{Text: strings.Join(all, "")} 924} 925 926func parseEmoji(p *parseState, s string, i int) (Inline, int, int, bool) { 927 for j := i + 1; ; j++ { 928 if j >= len(s) || j-i > 2+maxEmojiLen { 929 break 930 } 931 if s[j] == ':' { 932 name := s[i+1 : j] 933 if utf, ok := emoji[name]; ok { 934 return &Emoji{s[i : j+1], utf}, i, j + 1, true 935 } 936 break 937 } 938 } 939 return nil, 0, 0, false 940} 941 942type Emoji struct { 943 Name string // emoji :name:, including colons 944 Text string // Unicode for emoji sequence 945} 946 947func (*Emoji) Inline() {} 948 949func (x *Emoji) PrintHTML(buf *bytes.Buffer) { 950 htmlEscaper.WriteString(buf, x.Text) 951} 952 953func (x *Emoji) printMarkdown(buf *bytes.Buffer) { 954 buf.WriteString(x.Text) 955} 956 957func (x *Emoji) PrintText(buf *bytes.Buffer) { 958 htmlEscaper.WriteString(buf, x.Text) 959} 960