1// Copyright 2021 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package markdown 6 7import ( 8 "bytes" 9 "fmt" 10 "strings" 11 "unicode/utf8" 12 13 "golang.org/x/text/cases" 14) 15 16func parseLinkRefDef(p buildState, s string) (int, bool) { 17 // “A link reference definition consists of a link label, 18 // optionally preceded by up to three spaces of indentation, 19 // followed by a colon (:), 20 // optional spaces or tabs (including up to one line ending), 21 // a link destination, 22 // optional spaces or tabs (including up to one line ending), 23 // and an optional link title, 24 // which if it is present must be separated from the link destination 25 // by spaces or tabs. No further character may occur.” 26 i := skipSpace(s, 0) 27 label, i, ok := parseLinkLabel(p.(*parseState), s, i) 28 if !ok || i >= len(s) || s[i] != ':' { 29 return 0, false 30 } 31 i = skipSpace(s, i+1) 32 suf := s[i:] 33 dest, i, ok := parseLinkDest(s, i) 34 if !ok { 35 if suf != "" && suf[0] == '<' { 36 // Goldmark treats <<> as a link definition. 37 p.(*parseState).corner = true 38 } 39 return 0, false 40 } 41 moved := false 42 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 43 moved = true 44 i++ 45 } 46 47 // Take title if present and doesn't break parse. 48 j := i 49 if j >= len(s) || s[j] == '\n' { 50 moved = true 51 if j < len(s) { 52 j++ 53 } 54 } 55 56 var title string 57 var titleChar byte 58 var corner bool 59 if moved { 60 for j < len(s) && (s[j] == ' ' || s[j] == '\t') { 61 j++ 62 } 63 if t, c, j, ok := parseLinkTitle(s, j); ok { 64 for j < len(s) && (s[j] == ' ' || s[j] == '\t') { 65 j++ 66 } 67 if j >= len(s) || s[j] == '\n' { 68 i = j 69 if t == "" { 70 // Goldmark adds title="" in this case. 71 // We do not, nor does the Dingus. 72 corner = true 73 } 74 title = t 75 titleChar = c 76 } 77 } 78 } 79 80 // Must end line. Already trimmed spaces. 81 if i < len(s) && s[i] != '\n' { 82 return 0, false 83 } 84 if i < len(s) { 85 i++ 86 } 87 88 label = normalizeLabel(label) 89 if p.link(label) == nil { 90 p.defineLink(label, &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner}) 91 } 92 return i, true 93} 94 95func parseLinkTitle(s string, i int) (title string, char byte, next int, found bool) { 96 if i < len(s) && (s[i] == '"' || s[i] == '\'' || s[i] == '(') { 97 want := s[i] 98 if want == '(' { 99 want = ')' 100 } 101 j := i + 1 102 for ; j < len(s); j++ { 103 if s[j] == want { 104 title := s[i+1 : j] 105 // TODO: Validate title? 106 return mdUnescaper.Replace(title), want, j + 1, true 107 } 108 if s[j] == '(' && want == ')' { 109 break 110 } 111 if s[j] == '\\' && j+1 < len(s) { 112 j++ 113 } 114 } 115 } 116 return "", 0, 0, false 117} 118 119func parseLinkLabel(p *parseState, s string, i int) (string, int, bool) { 120 // “A link label begins with a left bracket ([) and ends with 121 // the first right bracket (]) that is not backslash-escaped. 122 // Between these brackets there must be at least one character 123 // that is not a space, tab, or line ending. 124 // Unescaped square bracket characters are not allowed 125 // inside the opening and closing square brackets of link labels. 126 // A link label can have at most 999 characters inside the square brackets.” 127 if i >= len(s) || s[i] != '[' { 128 return "", 0, false 129 } 130 j := i + 1 131 for ; j < len(s); j++ { 132 if s[j] == ']' { 133 if j-(i+1) > 999 { 134 // Goldmark does not apply 999 limit. 135 p.corner = true 136 break 137 } 138 if label := trimSpaceTabNewline(s[i+1 : j]); label != "" { 139 // Note: CommonMark Dingus does not escape. 140 return label, j + 1, true 141 } 142 break 143 } 144 if s[j] == '[' { 145 break 146 } 147 if s[j] == '\\' && j+1 < len(s) { 148 j++ 149 } 150 } 151 return "", 0, false 152} 153 154func normalizeLabel(s string) string { 155 if strings.Contains(s, "[") || strings.Contains(s, "]") { 156 // Labels cannot have [ ] so avoid the work of translating. 157 // This is especially important for pathlogical cases like 158 // [[[[[[[[[[a]]]]]]]]]] which would otherwise generate quadratic 159 // amounts of garbage. 160 return "" 161 } 162 163 // “To normalize a label, strip off the opening and closing brackets, 164 // perform the Unicode case fold, strip leading and trailing spaces, tabs, and line endings, 165 // and collapse consecutive internal spaces, tabs, and line endings to a single space.” 166 s = trimSpaceTabNewline(s) 167 var b strings.Builder 168 space := false 169 hi := false 170 for i := 0; i < len(s); i++ { 171 c := s[i] 172 switch c { 173 case ' ', '\t', '\n': 174 space = true 175 continue 176 default: 177 if space { 178 b.WriteByte(' ') 179 space = false 180 } 181 if 'A' <= c && c <= 'Z' { 182 c += 'a' - 'A' 183 } 184 if c >= 0x80 { 185 hi = true 186 } 187 b.WriteByte(c) 188 } 189 } 190 s = b.String() 191 if hi { 192 s = cases.Fold().String(s) 193 } 194 return s 195} 196 197func parseLinkDest(s string, i int) (string, int, bool) { 198 if i >= len(s) { 199 return "", 0, false 200 } 201 202 // “A sequence of zero or more characters between an opening < and a closing > 203 // that contains no line endings or unescaped < or > characters,” 204 if s[i] == '<' { 205 for j := i + 1; ; j++ { 206 if j >= len(s) || s[j] == '\n' || s[j] == '<' { 207 return "", 0, false 208 } 209 if s[j] == '>' { 210 // TODO unescape? 211 return mdUnescape(s[i+1 : j]), j + 1, true 212 } 213 if s[j] == '\\' { 214 j++ 215 } 216 } 217 } 218 219 // “or a nonempty sequence of characters that does not start with <, 220 // does not include ASCII control characters or space character, 221 // and includes parentheses only if (a) they are backslash-escaped 222 // or (b) they are part of a balanced pair of unescaped parentheses. 223 depth := 0 224 j := i 225Loop: 226 for ; j < len(s); j++ { 227 switch s[j] { 228 case '(': 229 depth++ 230 if depth > 32 { 231 // Avoid quadratic inputs by stopping if too deep. 232 // This is the same depth that cmark-gfm uses. 233 return "", 0, false 234 } 235 case ')': 236 if depth == 0 { 237 break Loop 238 } 239 depth-- 240 case '\\': 241 if j+1 < len(s) { 242 if s[j+1] == ' ' || s[j+1] == '\t' { 243 return "", 0, false 244 } 245 j++ 246 } 247 case ' ', '\t', '\n': 248 break Loop 249 } 250 } 251 252 dest := s[i:j] 253 // TODO: Validate dest? 254 // TODO: Unescape? 255 // NOTE: CommonMark Dingus does not reject control characters. 256 return mdUnescape(dest), j, true 257} 258 259func parseAutoLinkURI(s string, i int) (Inline, int, bool) { 260 // CommonMark 0.30: 261 // 262 // For purposes of this spec, a scheme is any sequence of 2–32 characters 263 // beginning with an ASCII letter and followed by any combination of 264 // ASCII letters, digits, or the symbols plus (”+”), period (”.”), or 265 // hyphen (”-”). 266 // 267 // An absolute URI, for these purposes, consists of a scheme followed by 268 // a colon (:) followed by zero or more characters other ASCII control 269 // characters, space, <, and >. If the URI includes these characters, 270 // they must be percent-encoded (e.g. %20 for a space). 271 272 j := i 273 if j+1 >= len(s) || s[j] != '<' || !isLetter(s[j+1]) { 274 return nil, 0, false 275 } 276 j++ 277 for j < len(s) && isScheme(s[j]) && j-(i+1) <= 32 { 278 j++ 279 } 280 if j-(i+1) < 2 || j-(i+1) > 32 || j >= len(s) || s[j] != ':' { 281 return nil, 0, false 282 } 283 j++ 284 for j < len(s) && isURL(s[j]) { 285 j++ 286 } 287 if j >= len(s) || s[j] != '>' { 288 return nil, 0, false 289 } 290 link := s[i+1 : j] 291 // link = mdUnescaper.Replace(link) 292 return &AutoLink{link, link}, j + 1, true 293} 294 295func parseAutoLinkEmail(s string, i int) (Inline, int, bool) { 296 // CommonMark 0.30: 297 // 298 // An email address, for these purposes, is anything that matches 299 // the non-normative regex from the HTML5 spec: 300 // 301 // /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ 302 303 j := i 304 if j+1 >= len(s) || s[j] != '<' || !isUser(s[j+1]) { 305 return nil, 0, false 306 } 307 j++ 308 for j < len(s) && isUser(s[j]) { 309 j++ 310 } 311 if j >= len(s) || s[j] != '@' { 312 return nil, 0, false 313 } 314 for { 315 j++ 316 n, ok := skipDomainElem(s[j:]) 317 if !ok { 318 return nil, 0, false 319 } 320 j += n 321 if j >= len(s) || s[j] != '.' && s[j] != '>' { 322 return nil, 0, false 323 } 324 if s[j] == '>' { 325 break 326 } 327 } 328 email := s[i+1 : j] 329 return &AutoLink{email, "mailto:" + email}, j + 1, true 330} 331 332func isUser(c byte) bool { 333 if isLetterDigit(c) { 334 return true 335 } 336 s := ".!#$%&'*+/=?^_`{|}~-" 337 for i := 0; i < len(s); i++ { 338 if c == s[i] { 339 return true 340 } 341 } 342 return false 343} 344 345func isHexDigit(c byte) bool { 346 return 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' || '0' <= c && c <= '9' 347} 348 349func isDigit(c byte) bool { 350 return '0' <= c && c <= '9' 351} 352 353func skipDomainElem(s string) (int, bool) { 354 // String of LDH, up to 63 in length, with LetterDigit 355 // at both ends (1-letter/digit names are OK). 356 // Aka /[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?/. 357 if len(s) < 1 || !isLetterDigit(s[0]) { 358 return 0, false 359 } 360 i := 1 361 for i < len(s) && isLDH(s[i]) && i <= 63 { 362 i++ 363 } 364 if i > 63 || !isLetterDigit(s[i-1]) { 365 return 0, false 366 } 367 return i, true 368} 369 370func isScheme(c byte) bool { 371 return isLetterDigit(c) || c == '+' || c == '.' || c == '-' 372} 373 374func isURL(c byte) bool { 375 return c > ' ' && c != '<' && c != '>' 376} 377 378type AutoLink struct { 379 Text string 380 URL string 381} 382 383func (*AutoLink) Inline() {} 384 385func (x *AutoLink) PrintHTML(buf *bytes.Buffer) { 386 fmt.Fprintf(buf, "<a href=\"%s\">%s</a>", htmlLinkEscaper.Replace(x.URL), htmlEscaper.Replace(x.Text)) 387} 388 389func (x *AutoLink) printMarkdown(buf *bytes.Buffer) { 390 fmt.Fprintf(buf, "<%s>", x.Text) 391} 392 393func (x *AutoLink) PrintText(buf *bytes.Buffer) { 394 fmt.Fprintf(buf, "%s", htmlEscaper.Replace(x.Text)) 395} 396 397type Link struct { 398 Inner []Inline 399 URL string 400 Title string 401 TitleChar byte // ', " or ) 402 corner bool 403} 404 405func (*Link) Inline() {} 406 407func (x *Link) PrintHTML(buf *bytes.Buffer) { 408 fmt.Fprintf(buf, "<a href=\"%s\"", htmlLinkEscaper.Replace(x.URL)) 409 if x.Title != "" { 410 fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title)) 411 } 412 buf.WriteString(">") 413 for _, c := range x.Inner { 414 c.PrintHTML(buf) 415 } 416 buf.WriteString("</a>") 417} 418 419func (x *Link) printMarkdown(buf *bytes.Buffer) { 420 buf.WriteByte('[') 421 x.printRemainingMarkdown(buf) 422} 423 424func (x *Link) printRemainingMarkdown(buf *bytes.Buffer) { 425 for _, c := range x.Inner { 426 c.printMarkdown(buf) 427 } 428 buf.WriteString("](") 429 buf.WriteString(x.URL) 430 printLinkTitleMarkdown(buf, x.Title, x.TitleChar) 431 buf.WriteByte(')') 432} 433 434func printLinkTitleMarkdown(buf *bytes.Buffer, title string, titleChar byte) { 435 if title == "" { 436 return 437 } 438 closeChar := titleChar 439 openChar := closeChar 440 if openChar == ')' { 441 openChar = '(' 442 } 443 fmt.Fprintf(buf, " %c%s%c", openChar, title /*TODO(jba): escape*/, closeChar) 444} 445 446func (x *Link) PrintText(buf *bytes.Buffer) { 447 for _, c := range x.Inner { 448 c.PrintText(buf) 449 } 450} 451 452type Image struct { 453 Inner []Inline 454 URL string 455 Title string 456 TitleChar byte 457 corner bool 458} 459 460func (*Image) Inline() {} 461 462func (x *Image) PrintHTML(buf *bytes.Buffer) { 463 fmt.Fprintf(buf, "<img src=\"%s\"", htmlLinkEscaper.Replace(x.URL)) 464 fmt.Fprintf(buf, " alt=\"") 465 i := buf.Len() 466 for _, c := range x.Inner { 467 c.PrintText(buf) 468 } 469 // GitHub and Goldmark both rewrite \n to space 470 // but the Dingus does not. 471 // The spec says title can be split across lines but not 472 // what happens at that point. 473 out := buf.Bytes() 474 for ; i < len(out); i++ { 475 if out[i] == '\n' { 476 out[i] = ' ' 477 } 478 } 479 fmt.Fprintf(buf, "\"") 480 if x.Title != "" { 481 fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title)) 482 } 483 buf.WriteString(" />") 484} 485 486func (x *Image) printMarkdown(buf *bytes.Buffer) { 487 buf.WriteString("![") 488 (*Link)(x).printRemainingMarkdown(buf) 489} 490 491func (x *Image) PrintText(buf *bytes.Buffer) { 492 for _, c := range x.Inner { 493 c.PrintText(buf) 494 } 495} 496 497// GitHub Flavored Markdown autolinks extension 498// https://github.github.com/gfm/#autolinks-extension- 499 500// autoLinkMore rewrites any extended autolinks in the body 501// and returns the result. 502// 503// body is a list of Plain, Emph, Strong, and Del nodes. 504// Two Plains only appear consecutively when one is a 505// potential emphasis marker that ended up being plain after all, like "_" or "**". 506// There are no Link nodes. 507// 508// The GitHub “spec” declares that “autolinks can only come at the 509// beginning of a line, after whitespace, or any of the delimiting 510// characters *, _, ~, and (”. However, the GitHub web site does not 511// enforce this rule: text like "[email protected] is my email" links the 512// text following the $ as an email address. It appears the actual rule 513// is that autolinks cannot come after ASCII letters, although they can 514// come after numbers or Unicode letters. 515// Since the only point of implementing GitHub Flavored Markdown 516// is to match GitHub's behavior, we do what they do, not what they say, 517// at least for now. 518func (p *parseState) autoLinkText(list []Inline) []Inline { 519 if !p.AutoLinkText { 520 return list 521 } 522 523 var out []Inline // allocated lazily when we first change list 524 for i, x := range list { 525 switch x := x.(type) { 526 case *Plain: 527 if rewrite := p.autoLinkPlain(x.Text); rewrite != nil { 528 if out == nil { 529 out = append(out, list[:i]...) 530 } 531 out = append(out, rewrite...) 532 continue 533 } 534 case *Strong: 535 x.Inner = p.autoLinkText(x.Inner) 536 case *Del: 537 x.Inner = p.autoLinkText(x.Inner) 538 case *Emph: 539 x.Inner = p.autoLinkText(x.Inner) 540 } 541 if out != nil { 542 out = append(out, x) 543 } 544 } 545 if out == nil { 546 return list 547 } 548 return out 549} 550 551func (p *parseState) autoLinkPlain(s string) []Inline { 552 vd := &validDomainChecker{s: s} 553 var out []Inline 554Restart: 555 for i := 0; i < len(s); i++ { 556 c := s[i] 557 if c == '@' { 558 if before, link, after, ok := p.parseAutoEmail(s, i); ok { 559 if before != "" { 560 out = append(out, &Plain{Text: before}) 561 } 562 out = append(out, link) 563 vd.skip(len(s) - len(after)) 564 s = after 565 goto Restart 566 } 567 } 568 569 if (c == 'h' || c == 'm' || c == 'x' || c == 'w') && (i == 0 || !isLetter(s[i-1])) { 570 if link, after, ok := p.parseAutoProto(s, i, vd); ok { 571 if i > 0 { 572 out = append(out, &Plain{Text: s[:i]}) 573 } 574 out = append(out, link) 575 vd.skip(len(s) - len(after)) 576 s = after 577 goto Restart 578 } 579 } 580 } 581 if out == nil { 582 return nil 583 } 584 out = append(out, &Plain{Text: s}) 585 return out 586} 587 588func (p *parseState) parseAutoProto(s string, i int, vd *validDomainChecker) (link *Link, after string, found bool) { 589 if s == "" { 590 return 591 } 592 switch s[i] { 593 case 'h': 594 var n int 595 if strings.HasPrefix(s[i:], "https://") { 596 n = len("https://") 597 } else if strings.HasPrefix(s[i:], "http://") { 598 n = len("http://") 599 } else { 600 return 601 } 602 return p.parseAutoHTTP(s[i:i+n], s, i, i+n, i+n+1, vd) 603 case 'w': 604 if !strings.HasPrefix(s[i:], "www.") { 605 return 606 } 607 // GitHub Flavored Markdown says to use http://, 608 // but it's not 1985 anymore. We live in the https:// future 609 // (unless the parser is explicitly configured otherwise). 610 // People who really care in their docs can write http:// themselves. 611 scheme := "https://" 612 if p.AutoLinkAssumeHTTP { 613 scheme = "http://" 614 } 615 return p.parseAutoHTTP(scheme, s, i, i, i+3, vd) 616 case 'm': 617 if !strings.HasPrefix(s[i:], "mailto:") { 618 return 619 } 620 return p.parseAutoMailto(s, i) 621 case 'x': 622 if !strings.HasPrefix(s[i:], "xmpp:") { 623 return 624 } 625 return p.parseAutoXmpp(s, i) 626 } 627 return 628} 629 630// parseAutoWWW parses an extended www autolink. 631// https://github.github.com/gfm/#extended-www-autolink 632func (p *parseState) parseAutoHTTP(scheme, s string, textstart, start, min int, vd *validDomainChecker) (link *Link, after string, found bool) { 633 n, ok := vd.parseValidDomain(start) 634 if !ok { 635 return 636 } 637 i := start + n 638 domEnd := i 639 640 // “After a valid domain, zero or more non-space non-< characters may follow.” 641 paren := 0 642 for i < len(s) { 643 r, n := utf8.DecodeRuneInString(s[i:]) 644 if isUnicodeSpace(r) || r == '<' { 645 break 646 } 647 if r == '(' { 648 paren++ 649 } 650 if r == ')' { 651 paren-- 652 } 653 i += n 654 } 655 656 // https://github.github.com/gfm/#extended-autolink-path-validation 657Trim: 658 for i > min { 659 switch s[i-1] { 660 case '?', '!', '.', ',', ':', '@', '_', '~': 661 // Trim certain trailing punctuation. 662 i-- 663 continue Trim 664 665 case ')': 666 // Trim trailing unmatched (by count only) parens. 667 if paren < 0 { 668 for s[i-1] == ')' && paren < 0 { 669 paren++ 670 i-- 671 } 672 continue Trim 673 } 674 675 case ';': 676 // Trim entity reference. 677 // After doing the work of the scan, we either cut that part off the string 678 // or we stop the trimming entirely, so there's no chance of repeating 679 // the scan on a future iteration and going accidentally quadratic. 680 // Even though the Markdown spec already requires having a complete 681 // list of all the HTML entities, the GitHub definition here just requires 682 // "looks like" an entity, meaning its an ampersand, letters/digits, and semicolon. 683 for j := i - 2; j > start; j-- { 684 if j < i-2 && s[j] == '&' { 685 i = j 686 continue Trim 687 } 688 if !isLetterDigit(s[j]) { 689 break Trim 690 } 691 } 692 } 693 break Trim 694 } 695 696 // According to the literal text of the GitHub Flavored Markdown spec 697 // and the actual behavior on GitHub, 698 // www.example.com$foo turns into <a href="https://www.example.com$foo">, 699 // but that makes the character restrictions in the valid-domain check 700 // almost meaningless. So we insist that when all is said and done, 701 // if the domain is followed by anything, that thing must be a slash, 702 // even though GitHub is not that picky. 703 // People might complain about www.example.com:1234 not working, 704 // but if you want to get fancy with that kind of thing, just write http:// in front. 705 if textstart == start && i > domEnd && s[domEnd] != '/' { 706 i = domEnd 707 } 708 709 if i < min { 710 return 711 } 712 713 link = &Link{ 714 Inner: []Inline{&Plain{Text: s[textstart:i]}}, 715 URL: scheme + s[start:i], 716 } 717 return link, s[i:], true 718} 719 720type validDomainChecker struct { 721 s string 722 cut int // before this index, no valid domains 723} 724 725func (v *validDomainChecker) skip(i int) { 726 v.s = v.s[i:] 727 v.cut -= i 728} 729 730// parseValidDomain parses a valid domain. 731// https://github.github.com/gfm/#valid-domain 732// 733// If s starts with a valid domain, parseValidDomain returns 734// the length of that domain and true. If s does not start with 735// a valid domain, parseValidDomain returns n, false, 736// where n is the length of a prefix guaranteed not to be acceptable 737// to any future call to parseValidDomain. 738// 739// “A valid domain consists of segments of alphanumeric characters, 740// underscores (_) and hyphens (-) separated by periods (.). 741// There must be at least one period, and no underscores may be 742// present in the last two segments of the domain.” 743// 744// The spec does not spell out whether segments can be empty. 745// Empirically, in GitHub's implementation they can. 746func (v *validDomainChecker) parseValidDomain(start int) (n int, found bool) { 747 if start < v.cut { 748 return 0, false 749 } 750 i := start 751 dots := 0 752 for ; i < len(v.s); i++ { 753 c := v.s[i] 754 if c == '_' { 755 dots = -2 756 continue 757 } 758 if c == '.' { 759 dots++ 760 continue 761 } 762 if !isLDH(c) { 763 break 764 } 765 } 766 if dots >= 0 && i > start { 767 return i - start, true 768 } 769 v.cut = i 770 return 0, false 771} 772 773func (p *parseState) parseAutoEmail(s string, i int) (before string, link *Link, after string, ok bool) { 774 if s[i] != '@' { 775 return 776 } 777 778 // “One ore more characters which are alphanumeric, or ., -, _, or +.” 779 j := i 780 for j > 0 && (isLDH(s[j-1]) || s[j-1] == '_' || s[j-1] == '+' || s[j-1] == '.') { 781 j-- 782 } 783 if i-j < 1 { 784 return 785 } 786 787 // “One or more characters which are alphanumeric, or - or _, separated by periods (.). 788 // There must be at least one period. The last character must not be one of - or _.” 789 dots := 0 790 k := i + 1 791 for k < len(s) && (isLDH(s[k]) || s[k] == '_' || s[k] == '.') { 792 if s[k] == '.' { 793 if s[k-1] == '.' { 794 // Empirically, .. stops the scan but [email protected] is fine. 795 break 796 } 797 dots++ 798 } 799 k++ 800 } 801 802 // “., -, and _ can occur on both sides of the @, but only . may occur at the end 803 // of the email address, in which case it will not be considered part of the address” 804 if s[k-1] == '.' { 805 dots-- 806 k-- 807 } 808 if s[k-1] == '-' || s[k-1] == '_' { 809 return 810 } 811 if k-(i+1)-dots < 2 || dots < 1 { 812 return 813 } 814 815 link = &Link{ 816 Inner: []Inline{&Plain{Text: s[j:k]}}, 817 URL: "mailto:" + s[j:k], 818 } 819 return s[:j], link, s[k:], true 820} 821 822func (p *parseState) parseAutoMailto(s string, i int) (link *Link, after string, ok bool) { 823 j := i + len("mailto:") 824 for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') { 825 j++ 826 } 827 if j >= len(s) || s[j] != '@' { 828 return 829 } 830 before, link, after, ok := p.parseAutoEmail(s[i:], j-i) 831 if before != "mailto:" || !ok { 832 return nil, "", false 833 } 834 link.Inner[0] = &Plain{Text: s[i : len(s)-len(after)]} 835 return link, after, true 836} 837 838func (p *parseState) parseAutoXmpp(s string, i int) (link *Link, after string, ok bool) { 839 j := i + len("xmpp:") 840 for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') { 841 j++ 842 } 843 if j >= len(s) || s[j] != '@' { 844 return 845 } 846 before, link, after, ok := p.parseAutoEmail(s[i:], j-i) 847 if before != "xmpp:" || !ok { 848 return nil, "", false 849 } 850 if after != "" && after[0] == '/' { 851 k := 1 852 for k < len(after) && (isLetterDigit(after[k]) || after[k] == '@' || after[k] == '.') { 853 k++ 854 } 855 after = after[k:] 856 } 857 url := s[i : len(s)-len(after)] 858 link.Inner[0] = &Plain{Text: url} 859 link.URL = url 860 return link, after, true 861} 862