1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "bytes" 9 "encoding/json" 10 "fmt" 11 "reflect" 12 "strings" 13 "unicode/utf8" 14) 15 16// jsWhitespace contains all of the JS whitespace characters, as defined 17// by the \s character class. 18// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes. 19const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff" 20 21// nextJSCtx returns the context that determines whether a slash after the 22// given run of tokens starts a regular expression instead of a division 23// operator: / or /=. 24// 25// This assumes that the token run does not include any string tokens, comment 26// tokens, regular expression literal tokens, or division operators. 27// 28// This fails on some valid but nonsensical JavaScript programs like 29// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to 30// fail on any known useful programs. It is based on the draft 31// JavaScript 2.0 lexical grammar and requires one token of lookbehind: 32// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html 33func nextJSCtx(s []byte, preceding jsCtx) jsCtx { 34 // Trim all JS whitespace characters 35 s = bytes.TrimRight(s, jsWhitespace) 36 if len(s) == 0 { 37 return preceding 38 } 39 40 // All cases below are in the single-byte UTF-8 group. 41 switch c, n := s[len(s)-1], len(s); c { 42 case '+', '-': 43 // ++ and -- are not regexp preceders, but + and - are whether 44 // they are used as infix or prefix operators. 45 start := n - 1 46 // Count the number of adjacent dashes or pluses. 47 for start > 0 && s[start-1] == c { 48 start-- 49 } 50 if (n-start)&1 == 1 { 51 // Reached for trailing minus signs since "---" is the 52 // same as "-- -". 53 return jsCtxRegexp 54 } 55 return jsCtxDivOp 56 case '.': 57 // Handle "42." 58 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { 59 return jsCtxDivOp 60 } 61 return jsCtxRegexp 62 // Suffixes for all punctuators from section 7.7 of the language spec 63 // that only end binary operators not handled above. 64 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': 65 return jsCtxRegexp 66 // Suffixes for all punctuators from section 7.7 of the language spec 67 // that are prefix operators not handled above. 68 case '!', '~': 69 return jsCtxRegexp 70 // Matches all the punctuators from section 7.7 of the language spec 71 // that are open brackets not handled above. 72 case '(', '[': 73 return jsCtxRegexp 74 // Matches all the punctuators from section 7.7 of the language spec 75 // that precede expression starts. 76 case ':', ';', '{': 77 return jsCtxRegexp 78 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and 79 // are handled in the default except for '}' which can precede a 80 // division op as in 81 // ({ valueOf: function () { return 42 } } / 2 82 // which is valid, but, in practice, developers don't divide object 83 // literals, so our heuristic works well for code like 84 // function () { ... } /foo/.test(x) && sideEffect(); 85 // The ')' punctuator can precede a regular expression as in 86 // if (b) /foo/.test(x) && ... 87 // but this is much less likely than 88 // (a + b) / c 89 case '}': 90 return jsCtxRegexp 91 default: 92 // Look for an IdentifierName and see if it is a keyword that 93 // can precede a regular expression. 94 j := n 95 for j > 0 && isJSIdentPart(rune(s[j-1])) { 96 j-- 97 } 98 if regexpPrecederKeywords[string(s[j:])] { 99 return jsCtxRegexp 100 } 101 } 102 // Otherwise is a punctuator not listed above, or 103 // a string which precedes a div op, or an identifier 104 // which precedes a div op. 105 return jsCtxDivOp 106} 107 108// regexpPrecederKeywords is a set of reserved JS keywords that can precede a 109// regular expression in JS source. 110var regexpPrecederKeywords = map[string]bool{ 111 "break": true, 112 "case": true, 113 "continue": true, 114 "delete": true, 115 "do": true, 116 "else": true, 117 "finally": true, 118 "in": true, 119 "instanceof": true, 120 "return": true, 121 "throw": true, 122 "try": true, 123 "typeof": true, 124 "void": true, 125} 126 127var jsonMarshalType = reflect.TypeFor[json.Marshaler]() 128 129// indirectToJSONMarshaler returns the value, after dereferencing as many times 130// as necessary to reach the base type (or nil) or an implementation of json.Marshal. 131func indirectToJSONMarshaler(a any) any { 132 // text/template now supports passing untyped nil as a func call 133 // argument, so we must support it. Otherwise we'd panic below, as one 134 // cannot call the Type or Interface methods on an invalid 135 // reflect.Value. See golang.org/issue/18716. 136 if a == nil { 137 return nil 138 } 139 140 v := reflect.ValueOf(a) 141 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() { 142 v = v.Elem() 143 } 144 return v.Interface() 145} 146 147// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has 148// neither side-effects nor free variables outside (NaN, Infinity). 149func jsValEscaper(args ...any) string { 150 var a any 151 if len(args) == 1 { 152 a = indirectToJSONMarshaler(args[0]) 153 switch t := a.(type) { 154 case JS: 155 return string(t) 156 case JSStr: 157 // TODO: normalize quotes. 158 return `"` + string(t) + `"` 159 case json.Marshaler: 160 // Do not treat as a Stringer. 161 case fmt.Stringer: 162 a = t.String() 163 } 164 } else { 165 for i, arg := range args { 166 args[i] = indirectToJSONMarshaler(arg) 167 } 168 a = fmt.Sprint(args...) 169 } 170 // TODO: detect cycles before calling Marshal which loops infinitely on 171 // cyclic data. This may be an unacceptable DoS risk. 172 b, err := json.Marshal(a) 173 if err != nil { 174 // While the standard JSON marshaler does not include user controlled 175 // information in the error message, if a type has a MarshalJSON method, 176 // the content of the error message is not guaranteed. Since we insert 177 // the error into the template, as part of a comment, we attempt to 178 // prevent the error from either terminating the comment, or the script 179 // block itself. 180 // 181 // In particular we: 182 // * replace "*/" comment end tokens with "* /", which does not 183 // terminate the comment 184 // * replace "</script" with "\x3C/script", and "<!--" with 185 // "\x3C!--", which prevents confusing script block termination 186 // semantics 187 // 188 // We also put a space before the comment so that if it is flush against 189 // a division operator it is not turned into a line comment: 190 // x/{{y}} 191 // turning into 192 // x//* error marshaling y: 193 // second line of error message */null 194 errStr := err.Error() 195 errStr = strings.ReplaceAll(errStr, "*/", "* /") 196 errStr = strings.ReplaceAll(errStr, "</script", `\x3C/script`) 197 errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`) 198 return fmt.Sprintf(" /* %s */null ", errStr) 199 } 200 201 // TODO: maybe post-process output to prevent it from containing 202 // "<!--", "-->", "<![CDATA[", "]]>", or "</script" 203 // in case custom marshalers produce output containing those. 204 // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper 205 // supports ld+json content-type. 206 if len(b) == 0 { 207 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should 208 // not cause the output `x=y/*z`. 209 return " null " 210 } 211 first, _ := utf8.DecodeRune(b) 212 last, _ := utf8.DecodeLastRune(b) 213 var buf strings.Builder 214 // Prevent IdentifierNames and NumericLiterals from running into 215 // keywords: in, instanceof, typeof, void 216 pad := isJSIdentPart(first) || isJSIdentPart(last) 217 if pad { 218 buf.WriteByte(' ') 219 } 220 written := 0 221 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 222 // so it falls within the subset of JSON which is valid JS. 223 for i := 0; i < len(b); { 224 rune, n := utf8.DecodeRune(b[i:]) 225 repl := "" 226 if rune == 0x2028 { 227 repl = `\u2028` 228 } else if rune == 0x2029 { 229 repl = `\u2029` 230 } 231 if repl != "" { 232 buf.Write(b[written:i]) 233 buf.WriteString(repl) 234 written = i + n 235 } 236 i += n 237 } 238 if buf.Len() != 0 { 239 buf.Write(b[written:]) 240 if pad { 241 buf.WriteByte(' ') 242 } 243 return buf.String() 244 } 245 return string(b) 246} 247 248// jsStrEscaper produces a string that can be included between quotes in 249// JavaScript source, in JavaScript embedded in an HTML5 <script> element, 250// or in an HTML5 event handler attribute such as onclick. 251func jsStrEscaper(args ...any) string { 252 s, t := stringify(args...) 253 if t == contentTypeJSStr { 254 return replace(s, jsStrNormReplacementTable) 255 } 256 return replace(s, jsStrReplacementTable) 257} 258 259func jsTmplLitEscaper(args ...any) string { 260 s, _ := stringify(args...) 261 return replace(s, jsBqStrReplacementTable) 262} 263 264// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression 265// specials so the result is treated literally when included in a regular 266// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by 267// the literal text of {{.X}} followed by the string "bar". 268func jsRegexpEscaper(args ...any) string { 269 s, _ := stringify(args...) 270 s = replace(s, jsRegexpReplacementTable) 271 if s == "" { 272 // /{{.X}}/ should not produce a line comment when .X == "". 273 return "(?:)" 274 } 275 return s 276} 277 278// replace replaces each rune r of s with replacementTable[r], provided that 279// r < len(replacementTable). If replacementTable[r] is the empty string then 280// no replacement is made. 281// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and 282// `\u2029`. 283func replace(s string, replacementTable []string) string { 284 var b strings.Builder 285 r, w, written := rune(0), 0, 0 286 for i := 0; i < len(s); i += w { 287 // See comment in htmlEscaper. 288 r, w = utf8.DecodeRuneInString(s[i:]) 289 var repl string 290 switch { 291 case int(r) < len(lowUnicodeReplacementTable): 292 repl = lowUnicodeReplacementTable[r] 293 case int(r) < len(replacementTable) && replacementTable[r] != "": 294 repl = replacementTable[r] 295 case r == '\u2028': 296 repl = `\u2028` 297 case r == '\u2029': 298 repl = `\u2029` 299 default: 300 continue 301 } 302 if written == 0 { 303 b.Grow(len(s)) 304 } 305 b.WriteString(s[written:i]) 306 b.WriteString(repl) 307 written = i + w 308 } 309 if written == 0 { 310 return s 311 } 312 b.WriteString(s[written:]) 313 return b.String() 314} 315 316var lowUnicodeReplacementTable = []string{ 317 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`, 318 '\a': `\u0007`, 319 '\b': `\u0008`, 320 '\t': `\t`, 321 '\n': `\n`, 322 '\v': `\u000b`, // "\v" == "v" on IE 6. 323 '\f': `\f`, 324 '\r': `\r`, 325 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`, 326 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`, 327 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`, 328} 329 330var jsStrReplacementTable = []string{ 331 0: `\u0000`, 332 '\t': `\t`, 333 '\n': `\n`, 334 '\v': `\u000b`, // "\v" == "v" on IE 6. 335 '\f': `\f`, 336 '\r': `\r`, 337 // Encode HTML specials as hex so the output can be embedded 338 // in HTML attributes without further encoding. 339 '"': `\u0022`, 340 '`': `\u0060`, 341 '&': `\u0026`, 342 '\'': `\u0027`, 343 '+': `\u002b`, 344 '/': `\/`, 345 '<': `\u003c`, 346 '>': `\u003e`, 347 '\\': `\\`, 348} 349 350// jsBqStrReplacementTable is like jsStrReplacementTable except it also contains 351// the special characters for JS template literals: $, {, and }. 352var jsBqStrReplacementTable = []string{ 353 0: `\u0000`, 354 '\t': `\t`, 355 '\n': `\n`, 356 '\v': `\u000b`, // "\v" == "v" on IE 6. 357 '\f': `\f`, 358 '\r': `\r`, 359 // Encode HTML specials as hex so the output can be embedded 360 // in HTML attributes without further encoding. 361 '"': `\u0022`, 362 '`': `\u0060`, 363 '&': `\u0026`, 364 '\'': `\u0027`, 365 '+': `\u002b`, 366 '/': `\/`, 367 '<': `\u003c`, 368 '>': `\u003e`, 369 '\\': `\\`, 370 '$': `\u0024`, 371 '{': `\u007b`, 372 '}': `\u007d`, 373} 374 375// jsStrNormReplacementTable is like jsStrReplacementTable but does not 376// overencode existing escapes since this table has no entry for `\`. 377var jsStrNormReplacementTable = []string{ 378 0: `\u0000`, 379 '\t': `\t`, 380 '\n': `\n`, 381 '\v': `\u000b`, // "\v" == "v" on IE 6. 382 '\f': `\f`, 383 '\r': `\r`, 384 // Encode HTML specials as hex so the output can be embedded 385 // in HTML attributes without further encoding. 386 '"': `\u0022`, 387 '&': `\u0026`, 388 '\'': `\u0027`, 389 '`': `\u0060`, 390 '+': `\u002b`, 391 '/': `\/`, 392 '<': `\u003c`, 393 '>': `\u003e`, 394} 395var jsRegexpReplacementTable = []string{ 396 0: `\u0000`, 397 '\t': `\t`, 398 '\n': `\n`, 399 '\v': `\u000b`, // "\v" == "v" on IE 6. 400 '\f': `\f`, 401 '\r': `\r`, 402 // Encode HTML specials as hex so the output can be embedded 403 // in HTML attributes without further encoding. 404 '"': `\u0022`, 405 '$': `\$`, 406 '&': `\u0026`, 407 '\'': `\u0027`, 408 '(': `\(`, 409 ')': `\)`, 410 '*': `\*`, 411 '+': `\u002b`, 412 '-': `\-`, 413 '.': `\.`, 414 '/': `\/`, 415 '<': `\u003c`, 416 '>': `\u003e`, 417 '?': `\?`, 418 '[': `\[`, 419 '\\': `\\`, 420 ']': `\]`, 421 '^': `\^`, 422 '{': `\{`, 423 '|': `\|`, 424 '}': `\}`, 425} 426 427// isJSIdentPart reports whether the given rune is a JS identifier part. 428// It does not handle all the non-Latin letters, joiners, and combining marks, 429// but it does handle every codepoint that can occur in a numeric literal or 430// a keyword. 431func isJSIdentPart(r rune) bool { 432 switch { 433 case r == '$': 434 return true 435 case '0' <= r && r <= '9': 436 return true 437 case 'A' <= r && r <= 'Z': 438 return true 439 case r == '_': 440 return true 441 case 'a' <= r && r <= 'z': 442 return true 443 } 444 return false 445} 446 447// isJSType reports whether the given MIME type should be considered JavaScript. 448// 449// It is used to determine whether a script tag with a type attribute is a javascript container. 450func isJSType(mimeType string) bool { 451 // per 452 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type 453 // https://tools.ietf.org/html/rfc7231#section-3.1.1 454 // https://tools.ietf.org/html/rfc4329#section-3 455 // https://www.ietf.org/rfc/rfc4627.txt 456 // discard parameters 457 mimeType, _, _ = strings.Cut(mimeType, ";") 458 mimeType = strings.ToLower(mimeType) 459 mimeType = strings.TrimSpace(mimeType) 460 switch mimeType { 461 case 462 "application/ecmascript", 463 "application/javascript", 464 "application/json", 465 "application/ld+json", 466 "application/x-ecmascript", 467 "application/x-javascript", 468 "module", 469 "text/ecmascript", 470 "text/javascript", 471 "text/javascript1.0", 472 "text/javascript1.1", 473 "text/javascript1.2", 474 "text/javascript1.3", 475 "text/javascript1.4", 476 "text/javascript1.5", 477 "text/jscript", 478 "text/livescript", 479 "text/x-ecmascript", 480 "text/x-javascript": 481 return true 482 default: 483 return false 484 } 485} 486