1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "bytes" 9 "fmt" 10 "html" 11 "internal/godebug" 12 "io" 13 "regexp" 14 "text/template" 15 "text/template/parse" 16) 17 18// escapeTemplate rewrites the named template, which must be 19// associated with t, to guarantee that the output of any of the named 20// templates is properly escaped. If no error is returned, then the named templates have 21// been modified. Otherwise the named templates have been rendered 22// unusable. 23func escapeTemplate(tmpl *Template, node parse.Node, name string) error { 24 c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) 25 var err error 26 if c.err != nil { 27 err, c.err.Name = c.err, name 28 } else if c.state != stateText { 29 err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} 30 } 31 if err != nil { 32 // Prevent execution of unsafe templates. 33 if t := tmpl.set[name]; t != nil { 34 t.escapeErr = err 35 t.text.Tree = nil 36 t.Tree = nil 37 } 38 return err 39 } 40 tmpl.esc.commit() 41 if t := tmpl.set[name]; t != nil { 42 t.escapeErr = escapeOK 43 t.Tree = t.text.Tree 44 } 45 return nil 46} 47 48// evalArgs formats the list of arguments into a string. It is equivalent to 49// fmt.Sprint(args...), except that it dereferences all pointers. 50func evalArgs(args ...any) string { 51 // Optimization for simple common case of a single string argument. 52 if len(args) == 1 { 53 if s, ok := args[0].(string); ok { 54 return s 55 } 56 } 57 for i, arg := range args { 58 args[i] = indirectToStringerOrError(arg) 59 } 60 return fmt.Sprint(args...) 61} 62 63// funcMap maps command names to functions that render their inputs safe. 64var funcMap = template.FuncMap{ 65 "_html_template_attrescaper": attrEscaper, 66 "_html_template_commentescaper": commentEscaper, 67 "_html_template_cssescaper": cssEscaper, 68 "_html_template_cssvaluefilter": cssValueFilter, 69 "_html_template_htmlnamefilter": htmlNameFilter, 70 "_html_template_htmlescaper": htmlEscaper, 71 "_html_template_jsregexpescaper": jsRegexpEscaper, 72 "_html_template_jsstrescaper": jsStrEscaper, 73 "_html_template_jstmpllitescaper": jsTmplLitEscaper, 74 "_html_template_jsvalescaper": jsValEscaper, 75 "_html_template_nospaceescaper": htmlNospaceEscaper, 76 "_html_template_rcdataescaper": rcdataEscaper, 77 "_html_template_srcsetescaper": srcsetFilterAndEscaper, 78 "_html_template_urlescaper": urlEscaper, 79 "_html_template_urlfilter": urlFilter, 80 "_html_template_urlnormalizer": urlNormalizer, 81 "_eval_args_": evalArgs, 82} 83 84// escaper collects type inferences about templates and changes needed to make 85// templates injection safe. 86type escaper struct { 87 // ns is the nameSpace that this escaper is associated with. 88 ns *nameSpace 89 // output[templateName] is the output context for a templateName that 90 // has been mangled to include its input context. 91 output map[string]context 92 // derived[c.mangle(name)] maps to a template derived from the template 93 // named name templateName for the start context c. 94 derived map[string]*template.Template 95 // called[templateName] is a set of called mangled template names. 96 called map[string]bool 97 // xxxNodeEdits are the accumulated edits to apply during commit. 98 // Such edits are not applied immediately in case a template set 99 // executes a given template in different escaping contexts. 100 actionNodeEdits map[*parse.ActionNode][]string 101 templateNodeEdits map[*parse.TemplateNode]string 102 textNodeEdits map[*parse.TextNode][]byte 103 // rangeContext holds context about the current range loop. 104 rangeContext *rangeContext 105} 106 107// rangeContext holds information about the current range loop. 108type rangeContext struct { 109 outer *rangeContext // outer loop 110 breaks []context // context at each break action 111 continues []context // context at each continue action 112} 113 114// makeEscaper creates a blank escaper for the given set. 115func makeEscaper(n *nameSpace) escaper { 116 return escaper{ 117 n, 118 map[string]context{}, 119 map[string]*template.Template{}, 120 map[string]bool{}, 121 map[*parse.ActionNode][]string{}, 122 map[*parse.TemplateNode]string{}, 123 map[*parse.TextNode][]byte{}, 124 nil, 125 } 126} 127 128// filterFailsafe is an innocuous word that is emitted in place of unsafe values 129// by sanitizer functions. It is not a keyword in any programming language, 130// contains no special characters, is not empty, and when it appears in output 131// it is distinct enough that a developer can find the source of the problem 132// via a search engine. 133const filterFailsafe = "ZgotmplZ" 134 135// escape escapes a template node. 136func (e *escaper) escape(c context, n parse.Node) context { 137 switch n := n.(type) { 138 case *parse.ActionNode: 139 return e.escapeAction(c, n) 140 case *parse.BreakNode: 141 c.n = n 142 e.rangeContext.breaks = append(e.rangeContext.breaks, c) 143 return context{state: stateDead} 144 case *parse.CommentNode: 145 return c 146 case *parse.ContinueNode: 147 c.n = n 148 e.rangeContext.continues = append(e.rangeContext.breaks, c) 149 return context{state: stateDead} 150 case *parse.IfNode: 151 return e.escapeBranch(c, &n.BranchNode, "if") 152 case *parse.ListNode: 153 return e.escapeList(c, n) 154 case *parse.RangeNode: 155 return e.escapeBranch(c, &n.BranchNode, "range") 156 case *parse.TemplateNode: 157 return e.escapeTemplate(c, n) 158 case *parse.TextNode: 159 return e.escapeText(c, n) 160 case *parse.WithNode: 161 return e.escapeBranch(c, &n.BranchNode, "with") 162 } 163 panic("escaping " + n.String() + " is unimplemented") 164} 165 166var debugAllowActionJSTmpl = godebug.New("jstmpllitinterp") 167 168// escapeAction escapes an action template node. 169func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { 170 if len(n.Pipe.Decl) != 0 { 171 // A local variable assignment, not an interpolation. 172 return c 173 } 174 c = nudge(c) 175 // Check for disallowed use of predefined escapers in the pipeline. 176 for pos, idNode := range n.Pipe.Cmds { 177 node, ok := idNode.Args[0].(*parse.IdentifierNode) 178 if !ok { 179 // A predefined escaper "esc" will never be found as an identifier in a 180 // Chain or Field node, since: 181 // - "esc.x ..." is invalid, since predefined escapers return strings, and 182 // strings do not have methods, keys or fields. 183 // - "... .esc" is invalid, since predefined escapers are global functions, 184 // not methods or fields of any types. 185 // Therefore, it is safe to ignore these two node types. 186 continue 187 } 188 ident := node.Ident 189 if _, ok := predefinedEscapers[ident]; ok { 190 if pos < len(n.Pipe.Cmds)-1 || 191 c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { 192 return context{ 193 state: stateError, 194 err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), 195 } 196 } 197 } 198 } 199 s := make([]string, 0, 3) 200 switch c.state { 201 case stateError: 202 return c 203 case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: 204 switch c.urlPart { 205 case urlPartNone: 206 s = append(s, "_html_template_urlfilter") 207 fallthrough 208 case urlPartPreQuery: 209 switch c.state { 210 case stateCSSDqStr, stateCSSSqStr: 211 s = append(s, "_html_template_cssescaper") 212 default: 213 s = append(s, "_html_template_urlnormalizer") 214 } 215 case urlPartQueryOrFrag: 216 s = append(s, "_html_template_urlescaper") 217 case urlPartUnknown: 218 return context{ 219 state: stateError, 220 err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), 221 } 222 default: 223 panic(c.urlPart.String()) 224 } 225 case stateJS: 226 s = append(s, "_html_template_jsvalescaper") 227 // A slash after a value starts a div operator. 228 c.jsCtx = jsCtxDivOp 229 case stateJSDqStr, stateJSSqStr: 230 s = append(s, "_html_template_jsstrescaper") 231 case stateJSTmplLit: 232 s = append(s, "_html_template_jstmpllitescaper") 233 case stateJSRegexp: 234 s = append(s, "_html_template_jsregexpescaper") 235 case stateCSS: 236 s = append(s, "_html_template_cssvaluefilter") 237 case stateText: 238 s = append(s, "_html_template_htmlescaper") 239 case stateRCDATA: 240 s = append(s, "_html_template_rcdataescaper") 241 case stateAttr: 242 // Handled below in delim check. 243 case stateAttrName, stateTag: 244 c.state = stateAttrName 245 s = append(s, "_html_template_htmlnamefilter") 246 case stateSrcset: 247 s = append(s, "_html_template_srcsetescaper") 248 default: 249 if isComment(c.state) { 250 s = append(s, "_html_template_commentescaper") 251 } else { 252 panic("unexpected state " + c.state.String()) 253 } 254 } 255 switch c.delim { 256 case delimNone: 257 // No extra-escaping needed for raw text content. 258 case delimSpaceOrTagEnd: 259 s = append(s, "_html_template_nospaceescaper") 260 default: 261 s = append(s, "_html_template_attrescaper") 262 } 263 e.editActionNode(n, s) 264 return c 265} 266 267// ensurePipelineContains ensures that the pipeline ends with the commands with 268// the identifiers in s in order. If the pipeline ends with a predefined escaper 269// (i.e. "html" or "urlquery"), merge it with the identifiers in s. 270func ensurePipelineContains(p *parse.PipeNode, s []string) { 271 if len(s) == 0 { 272 // Do not rewrite pipeline if we have no escapers to insert. 273 return 274 } 275 // Precondition: p.Cmds contains at most one predefined escaper and the 276 // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is 277 // always true because of the checks in escapeAction. 278 pipelineLen := len(p.Cmds) 279 if pipelineLen > 0 { 280 lastCmd := p.Cmds[pipelineLen-1] 281 if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { 282 if esc := idNode.Ident; predefinedEscapers[esc] { 283 // Pipeline ends with a predefined escaper. 284 if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { 285 // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, 286 // where esc is the predefined escaper, and arg1...argN are its arguments. 287 // Convert this into the equivalent form 288 // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily 289 // merged with the escapers in s. 290 lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) 291 p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) 292 pipelineLen++ 293 } 294 // If any of the commands in s that we are about to insert is equivalent 295 // to the predefined escaper, use the predefined escaper instead. 296 dup := false 297 for i, escaper := range s { 298 if escFnsEq(esc, escaper) { 299 s[i] = idNode.Ident 300 dup = true 301 } 302 } 303 if dup { 304 // The predefined escaper will already be inserted along with the 305 // escapers in s, so do not copy it to the rewritten pipeline. 306 pipelineLen-- 307 } 308 } 309 } 310 } 311 // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. 312 newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) 313 insertedIdents := make(map[string]bool) 314 for i := 0; i < pipelineLen; i++ { 315 cmd := p.Cmds[i] 316 newCmds[i] = cmd 317 if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok { 318 insertedIdents[normalizeEscFn(idNode.Ident)] = true 319 } 320 } 321 for _, name := range s { 322 if !insertedIdents[normalizeEscFn(name)] { 323 // When two templates share an underlying parse tree via the use of 324 // AddParseTree and one template is executed after the other, this check 325 // ensures that escapers that were already inserted into the pipeline on 326 // the first escaping pass do not get inserted again. 327 newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) 328 } 329 } 330 p.Cmds = newCmds 331} 332 333// predefinedEscapers contains template predefined escapers that are equivalent 334// to some contextual escapers. Keep in sync with equivEscapers. 335var predefinedEscapers = map[string]bool{ 336 "html": true, 337 "urlquery": true, 338} 339 340// equivEscapers matches contextual escapers to equivalent predefined 341// template escapers. 342var equivEscapers = map[string]string{ 343 // The following pairs of HTML escapers provide equivalent security 344 // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. 345 "_html_template_attrescaper": "html", 346 "_html_template_htmlescaper": "html", 347 "_html_template_rcdataescaper": "html", 348 // These two URL escapers produce URLs safe for embedding in a URL query by 349 // percent-encoding all the reserved characters specified in RFC 3986 Section 350 // 2.2 351 "_html_template_urlescaper": "urlquery", 352 // These two functions are not actually equivalent; urlquery is stricter as it 353 // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer 354 // does not. It is therefore only safe to replace _html_template_urlnormalizer 355 // with urlquery (this happens in ensurePipelineContains), but not the otherI've 356 // way around. We keep this entry around to preserve the behavior of templates 357 // written before Go 1.9, which might depend on this substitution taking place. 358 "_html_template_urlnormalizer": "urlquery", 359} 360 361// escFnsEq reports whether the two escaping functions are equivalent. 362func escFnsEq(a, b string) bool { 363 return normalizeEscFn(a) == normalizeEscFn(b) 364} 365 366// normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of 367// escaper functions a and b that are equivalent. 368func normalizeEscFn(e string) string { 369 if norm := equivEscapers[e]; norm != "" { 370 return norm 371 } 372 return e 373} 374 375// redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) 376// for all x. 377var redundantFuncs = map[string]map[string]bool{ 378 "_html_template_commentescaper": { 379 "_html_template_attrescaper": true, 380 "_html_template_htmlescaper": true, 381 }, 382 "_html_template_cssescaper": { 383 "_html_template_attrescaper": true, 384 }, 385 "_html_template_jsregexpescaper": { 386 "_html_template_attrescaper": true, 387 }, 388 "_html_template_jsstrescaper": { 389 "_html_template_attrescaper": true, 390 }, 391 "_html_template_jstmpllitescaper": { 392 "_html_template_attrescaper": true, 393 }, 394 "_html_template_urlescaper": { 395 "_html_template_urlnormalizer": true, 396 }, 397} 398 399// appendCmd appends the given command to the end of the command pipeline 400// unless it is redundant with the last command. 401func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { 402 if n := len(cmds); n != 0 { 403 last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) 404 next, okNext := cmd.Args[0].(*parse.IdentifierNode) 405 if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { 406 return cmds 407 } 408 } 409 return append(cmds, cmd) 410} 411 412// newIdentCmd produces a command containing a single identifier node. 413func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { 414 return &parse.CommandNode{ 415 NodeType: parse.NodeCommand, 416 Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. 417 } 418} 419 420// nudge returns the context that would result from following empty string 421// transitions from the input context. 422// For example, parsing: 423// 424// `<a href=` 425// 426// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune: 427// 428// `<a href=x` 429// 430// will end in context{stateURL, delimSpaceOrTagEnd, ...}. 431// There are two transitions that happen when the 'x' is seen: 432// (1) Transition from a before-value state to a start-of-value state without 433// 434// consuming any character. 435// 436// (2) Consume 'x' and transition past the first value character. 437// In this case, nudging produces the context after (1) happens. 438func nudge(c context) context { 439 switch c.state { 440 case stateTag: 441 // In `<foo {{.}}`, the action should emit an attribute. 442 c.state = stateAttrName 443 case stateBeforeValue: 444 // In `<foo bar={{.}}`, the action is an undelimited value. 445 c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone 446 case stateAfterName: 447 // In `<foo bar {{.}}`, the action is an attribute name. 448 c.state, c.attr = stateAttrName, attrNone 449 } 450 return c 451} 452 453// join joins the two contexts of a branch template node. The result is an 454// error context if either of the input contexts are error contexts, or if the 455// input contexts differ. 456func join(a, b context, node parse.Node, nodeName string) context { 457 if a.state == stateError { 458 return a 459 } 460 if b.state == stateError { 461 return b 462 } 463 if a.state == stateDead { 464 return b 465 } 466 if b.state == stateDead { 467 return a 468 } 469 if a.eq(b) { 470 return a 471 } 472 473 c := a 474 c.urlPart = b.urlPart 475 if c.eq(b) { 476 // The contexts differ only by urlPart. 477 c.urlPart = urlPartUnknown 478 return c 479 } 480 481 c = a 482 c.jsCtx = b.jsCtx 483 if c.eq(b) { 484 // The contexts differ only by jsCtx. 485 c.jsCtx = jsCtxUnknown 486 return c 487 } 488 489 // Allow a nudged context to join with an unnudged one. 490 // This means that 491 // <p title={{if .C}}{{.}}{{end}} 492 // ends in an unquoted value state even though the else branch 493 // ends in stateBeforeValue. 494 if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) { 495 if e := join(c, d, node, nodeName); e.state != stateError { 496 return e 497 } 498 } 499 500 return context{ 501 state: stateError, 502 err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), 503 } 504} 505 506// escapeBranch escapes a branch template node: "if", "range" and "with". 507func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context { 508 if nodeName == "range" { 509 e.rangeContext = &rangeContext{outer: e.rangeContext} 510 } 511 c0 := e.escapeList(c, n.List) 512 if nodeName == "range" { 513 if c0.state != stateError { 514 c0 = joinRange(c0, e.rangeContext) 515 } 516 e.rangeContext = e.rangeContext.outer 517 if c0.state == stateError { 518 return c0 519 } 520 521 // The "true" branch of a "range" node can execute multiple times. 522 // We check that executing n.List once results in the same context 523 // as executing n.List twice. 524 e.rangeContext = &rangeContext{outer: e.rangeContext} 525 c1, _ := e.escapeListConditionally(c0, n.List, nil) 526 c0 = join(c0, c1, n, nodeName) 527 if c0.state == stateError { 528 e.rangeContext = e.rangeContext.outer 529 // Make clear that this is a problem on loop re-entry 530 // since developers tend to overlook that branch when 531 // debugging templates. 532 c0.err.Line = n.Line 533 c0.err.Description = "on range loop re-entry: " + c0.err.Description 534 return c0 535 } 536 c0 = joinRange(c0, e.rangeContext) 537 e.rangeContext = e.rangeContext.outer 538 if c0.state == stateError { 539 return c0 540 } 541 } 542 c1 := e.escapeList(c, n.ElseList) 543 return join(c0, c1, n, nodeName) 544} 545 546func joinRange(c0 context, rc *rangeContext) context { 547 // Merge contexts at break and continue statements into overall body context. 548 // In theory we could treat breaks differently from continues, but for now it is 549 // enough to treat them both as going back to the start of the loop (which may then stop). 550 for _, c := range rc.breaks { 551 c0 = join(c0, c, c.n, "range") 552 if c0.state == stateError { 553 c0.err.Line = c.n.(*parse.BreakNode).Line 554 c0.err.Description = "at range loop break: " + c0.err.Description 555 return c0 556 } 557 } 558 for _, c := range rc.continues { 559 c0 = join(c0, c, c.n, "range") 560 if c0.state == stateError { 561 c0.err.Line = c.n.(*parse.ContinueNode).Line 562 c0.err.Description = "at range loop continue: " + c0.err.Description 563 return c0 564 } 565 } 566 return c0 567} 568 569// escapeList escapes a list template node. 570func (e *escaper) escapeList(c context, n *parse.ListNode) context { 571 if n == nil { 572 return c 573 } 574 for _, m := range n.Nodes { 575 c = e.escape(c, m) 576 if c.state == stateDead { 577 break 578 } 579 } 580 return c 581} 582 583// escapeListConditionally escapes a list node but only preserves edits and 584// inferences in e if the inferences and output context satisfy filter. 585// It returns the best guess at an output context, and the result of the filter 586// which is the same as whether e was updated. 587func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) { 588 e1 := makeEscaper(e.ns) 589 e1.rangeContext = e.rangeContext 590 // Make type inferences available to f. 591 for k, v := range e.output { 592 e1.output[k] = v 593 } 594 c = e1.escapeList(c, n) 595 ok := filter != nil && filter(&e1, c) 596 if ok { 597 // Copy inferences and edits from e1 back into e. 598 for k, v := range e1.output { 599 e.output[k] = v 600 } 601 for k, v := range e1.derived { 602 e.derived[k] = v 603 } 604 for k, v := range e1.called { 605 e.called[k] = v 606 } 607 for k, v := range e1.actionNodeEdits { 608 e.editActionNode(k, v) 609 } 610 for k, v := range e1.templateNodeEdits { 611 e.editTemplateNode(k, v) 612 } 613 for k, v := range e1.textNodeEdits { 614 e.editTextNode(k, v) 615 } 616 } 617 return c, ok 618} 619 620// escapeTemplate escapes a {{template}} call node. 621func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context { 622 c, name := e.escapeTree(c, n, n.Name, n.Line) 623 if name != n.Name { 624 e.editTemplateNode(n, name) 625 } 626 return c 627} 628 629// escapeTree escapes the named template starting in the given context as 630// necessary and returns its output context. 631func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) { 632 // Mangle the template name with the input context to produce a reliable 633 // identifier. 634 dname := c.mangle(name) 635 e.called[dname] = true 636 if out, ok := e.output[dname]; ok { 637 // Already escaped. 638 return out, dname 639 } 640 t := e.template(name) 641 if t == nil { 642 // Two cases: The template exists but is empty, or has never been mentioned at 643 // all. Distinguish the cases in the error messages. 644 if e.ns.set[name] != nil { 645 return context{ 646 state: stateError, 647 err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name), 648 }, dname 649 } 650 return context{ 651 state: stateError, 652 err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name), 653 }, dname 654 } 655 if dname != name { 656 // Use any template derived during an earlier call to escapeTemplate 657 // with different top level templates, or clone if necessary. 658 dt := e.template(dname) 659 if dt == nil { 660 dt = template.New(dname) 661 dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()} 662 e.derived[dname] = dt 663 } 664 t = dt 665 } 666 return e.computeOutCtx(c, t), dname 667} 668 669// computeOutCtx takes a template and its start context and computes the output 670// context while storing any inferences in e. 671func (e *escaper) computeOutCtx(c context, t *template.Template) context { 672 // Propagate context over the body. 673 c1, ok := e.escapeTemplateBody(c, t) 674 if !ok { 675 // Look for a fixed point by assuming c1 as the output context. 676 if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 { 677 c1, ok = c2, true 678 } 679 // Use c1 as the error context if neither assumption worked. 680 } 681 if !ok && c1.state != stateError { 682 return context{ 683 state: stateError, 684 err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()), 685 } 686 } 687 return c1 688} 689 690// escapeTemplateBody escapes the given template assuming the given output 691// context, and returns the best guess at the output context and whether the 692// assumption was correct. 693func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) { 694 filter := func(e1 *escaper, c1 context) bool { 695 if c1.state == stateError { 696 // Do not update the input escaper, e. 697 return false 698 } 699 if !e1.called[t.Name()] { 700 // If t is not recursively called, then c1 is an 701 // accurate output context. 702 return true 703 } 704 // c1 is accurate if it matches our assumed output context. 705 return c.eq(c1) 706 } 707 // We need to assume an output context so that recursive template calls 708 // take the fast path out of escapeTree instead of infinitely recurring. 709 // Naively assuming that the input context is the same as the output 710 // works >90% of the time. 711 e.output[t.Name()] = c 712 return e.escapeListConditionally(c, t.Tree.Root, filter) 713} 714 715// delimEnds maps each delim to a string of characters that terminate it. 716var delimEnds = [...]string{ 717 delimDoubleQuote: `"`, 718 delimSingleQuote: "'", 719 // Determined empirically by running the below in various browsers. 720 // var div = document.createElement("DIV"); 721 // for (var i = 0; i < 0x10000; ++i) { 722 // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>"; 723 // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) 724 // document.write("<p>U+" + i.toString(16)); 725 // } 726 delimSpaceOrTagEnd: " \t\n\f\r>", 727} 728 729var ( 730 // Per WHATWG HTML specification, section 4.12.1.3, there are extremely 731 // complicated rules for how to handle the set of opening tags <!--, 732 // <script, and </script when they appear in JS literals (i.e. strings, 733 // regexs, and comments). The specification suggests a simple solution, 734 // rather than implementing the arcane ABNF, which involves simply escaping 735 // the opening bracket with \x3C. We use the below regex for this, since it 736 // makes doing the case-insensitive find-replace much simpler. 737 specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)") 738 specialScriptTagReplacement = []byte("\\x3C$1") 739) 740 741func containsSpecialScriptTag(s []byte) bool { 742 return specialScriptTagRE.Match(s) 743} 744 745func escapeSpecialScriptTags(s []byte) []byte { 746 return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement) 747} 748 749var doctypeBytes = []byte("<!DOCTYPE") 750 751// escapeText escapes a text template node. 752func (e *escaper) escapeText(c context, n *parse.TextNode) context { 753 s, written, i, b := n.Text, 0, 0, new(bytes.Buffer) 754 for i != len(s) { 755 c1, nread := contextAfterText(c, s[i:]) 756 i1 := i + nread 757 if c.state == stateText || c.state == stateRCDATA { 758 end := i1 759 if c1.state != c.state { 760 for j := end - 1; j >= i; j-- { 761 if s[j] == '<' { 762 end = j 763 break 764 } 765 } 766 } 767 for j := i; j < end; j++ { 768 if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { 769 b.Write(s[written:j]) 770 b.WriteString("<") 771 written = j + 1 772 } 773 } 774 } else if isComment(c.state) && c.delim == delimNone { 775 switch c.state { 776 case stateJSBlockCmt: 777 // https://es5.github.io/#x7.4: 778 // "Comments behave like white space and are 779 // discarded except that, if a MultiLineComment 780 // contains a line terminator character, then 781 // the entire comment is considered to be a 782 // LineTerminator for purposes of parsing by 783 // the syntactic grammar." 784 if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") { 785 b.WriteByte('\n') 786 } else { 787 b.WriteByte(' ') 788 } 789 case stateCSSBlockCmt: 790 b.WriteByte(' ') 791 } 792 written = i1 793 } 794 if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { 795 // Preserve the portion between written and the comment start. 796 cs := i1 - 2 797 if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt { 798 // "<!--" instead of "/*" or "//" 799 cs -= 2 800 } else if c1.state == stateJSHTMLCloseCmt { 801 // "-->" instead of "/*" or "//" 802 cs -= 1 803 } 804 b.Write(s[written:cs]) 805 written = i1 806 } 807 if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) { 808 b.Write(s[written:i]) 809 b.Write(escapeSpecialScriptTags(s[i:i1])) 810 written = i1 811 } 812 if i == i1 && c.state == c1.state { 813 panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) 814 } 815 c, i = c1, i1 816 } 817 818 if written != 0 && c.state != stateError { 819 if !isComment(c.state) || c.delim != delimNone { 820 b.Write(n.Text[written:]) 821 } 822 e.editTextNode(n, b.Bytes()) 823 } 824 return c 825} 826 827// contextAfterText starts in context c, consumes some tokens from the front of 828// s, then returns the context after those tokens and the unprocessed suffix. 829func contextAfterText(c context, s []byte) (context, int) { 830 if c.delim == delimNone { 831 c1, i := tSpecialTagEnd(c, s) 832 if i == 0 { 833 // A special end tag (`</script>`) has been seen and 834 // all content preceding it has been consumed. 835 return c1, 0 836 } 837 // Consider all content up to any end tag. 838 return transitionFunc[c.state](c, s[:i]) 839 } 840 841 // We are at the beginning of an attribute value. 842 843 i := bytes.IndexAny(s, delimEnds[c.delim]) 844 if i == -1 { 845 i = len(s) 846 } 847 if c.delim == delimSpaceOrTagEnd { 848 // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state 849 // lists the runes below as error characters. 850 // Error out because HTML parsers may differ on whether 851 // "<a id= onclick=f(" ends inside id's or onclick's value, 852 // "<a class=`foo " ends inside a value, 853 // "<a style=font:'Arial'" needs open-quote fixup. 854 // IE treats '`' as a quotation character. 855 if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 { 856 return context{ 857 state: stateError, 858 err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]), 859 }, len(s) 860 } 861 } 862 if i == len(s) { 863 // Remain inside the attribute. 864 // Decode the value so non-HTML rules can easily handle 865 // <button onclick="alert("Hi!")"> 866 // without having to entity decode token boundaries. 867 for u := []byte(html.UnescapeString(string(s))); len(u) != 0; { 868 c1, i1 := transitionFunc[c.state](c, u) 869 c, u = c1, u[i1:] 870 } 871 return c, len(s) 872 } 873 874 element := c.element 875 876 // If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS. 877 if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) { 878 element = elementNone 879 } 880 881 if c.delim != delimSpaceOrTagEnd { 882 // Consume any quote. 883 i++ 884 } 885 // On exiting an attribute, we discard all state information 886 // except the state and element. 887 return context{state: stateTag, element: element}, i 888} 889 890// editActionNode records a change to an action pipeline for later commit. 891func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) { 892 if _, ok := e.actionNodeEdits[n]; ok { 893 panic(fmt.Sprintf("node %s shared between templates", n)) 894 } 895 e.actionNodeEdits[n] = cmds 896} 897 898// editTemplateNode records a change to a {{template}} callee for later commit. 899func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) { 900 if _, ok := e.templateNodeEdits[n]; ok { 901 panic(fmt.Sprintf("node %s shared between templates", n)) 902 } 903 e.templateNodeEdits[n] = callee 904} 905 906// editTextNode records a change to a text node for later commit. 907func (e *escaper) editTextNode(n *parse.TextNode, text []byte) { 908 if _, ok := e.textNodeEdits[n]; ok { 909 panic(fmt.Sprintf("node %s shared between templates", n)) 910 } 911 e.textNodeEdits[n] = text 912} 913 914// commit applies changes to actions and template calls needed to contextually 915// autoescape content and adds any derived templates to the set. 916func (e *escaper) commit() { 917 for name := range e.output { 918 e.template(name).Funcs(funcMap) 919 } 920 // Any template from the name space associated with this escaper can be used 921 // to add derived templates to the underlying text/template name space. 922 tmpl := e.arbitraryTemplate() 923 for _, t := range e.derived { 924 if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil { 925 panic("error adding derived template") 926 } 927 } 928 for n, s := range e.actionNodeEdits { 929 ensurePipelineContains(n.Pipe, s) 930 } 931 for n, name := range e.templateNodeEdits { 932 n.Name = name 933 } 934 for n, s := range e.textNodeEdits { 935 n.Text = s 936 } 937 // Reset state that is specific to this commit so that the same changes are 938 // not re-applied to the template on subsequent calls to commit. 939 e.called = make(map[string]bool) 940 e.actionNodeEdits = make(map[*parse.ActionNode][]string) 941 e.templateNodeEdits = make(map[*parse.TemplateNode]string) 942 e.textNodeEdits = make(map[*parse.TextNode][]byte) 943} 944 945// template returns the named template given a mangled template name. 946func (e *escaper) template(name string) *template.Template { 947 // Any template from the name space associated with this escaper can be used 948 // to look up templates in the underlying text/template name space. 949 t := e.arbitraryTemplate().text.Lookup(name) 950 if t == nil { 951 t = e.derived[name] 952 } 953 return t 954} 955 956// arbitraryTemplate returns an arbitrary template from the name space 957// associated with e and panics if no templates are found. 958func (e *escaper) arbitraryTemplate() *Template { 959 for _, t := range e.ns.set { 960 return t 961 } 962 panic("no templates in name space") 963} 964 965// Forwarding functions so that clients need only import this package 966// to reach the general escaping functions of text/template. 967 968// HTMLEscape writes to w the escaped HTML equivalent of the plain text data b. 969func HTMLEscape(w io.Writer, b []byte) { 970 template.HTMLEscape(w, b) 971} 972 973// HTMLEscapeString returns the escaped HTML equivalent of the plain text data s. 974func HTMLEscapeString(s string) string { 975 return template.HTMLEscapeString(s) 976} 977 978// HTMLEscaper returns the escaped HTML equivalent of the textual 979// representation of its arguments. 980func HTMLEscaper(args ...any) string { 981 return template.HTMLEscaper(args...) 982} 983 984// JSEscape writes to w the escaped JavaScript equivalent of the plain text data b. 985func JSEscape(w io.Writer, b []byte) { 986 template.JSEscape(w, b) 987} 988 989// JSEscapeString returns the escaped JavaScript equivalent of the plain text data s. 990func JSEscapeString(s string) string { 991 return template.JSEscapeString(s) 992} 993 994// JSEscaper returns the escaped JavaScript equivalent of the textual 995// representation of its arguments. 996func JSEscaper(args ...any) string { 997 return template.JSEscaper(args...) 998} 999 1000// URLQueryEscaper returns the escaped value of the textual representation of 1001// its arguments in a form suitable for embedding in a URL query. 1002func URLQueryEscaper(args ...any) string { 1003 return template.URLQueryEscaper(args...) 1004} 1005