1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"encoding/json"
10	"fmt"
11	"reflect"
12	"strings"
13	"unicode/utf8"
14)
15
16// jsWhitespace contains all of the JS whitespace characters, as defined
17// by the \s character class.
18// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes.
19const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff"
20
21// nextJSCtx returns the context that determines whether a slash after the
22// given run of tokens starts a regular expression instead of a division
23// operator: / or /=.
24//
25// This assumes that the token run does not include any string tokens, comment
26// tokens, regular expression literal tokens, or division operators.
27//
28// This fails on some valid but nonsensical JavaScript programs like
29// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
30// fail on any known useful programs. It is based on the draft
31// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
32// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
33func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
34	// Trim all JS whitespace characters
35	s = bytes.TrimRight(s, jsWhitespace)
36	if len(s) == 0 {
37		return preceding
38	}
39
40	// All cases below are in the single-byte UTF-8 group.
41	switch c, n := s[len(s)-1], len(s); c {
42	case '+', '-':
43		// ++ and -- are not regexp preceders, but + and - are whether
44		// they are used as infix or prefix operators.
45		start := n - 1
46		// Count the number of adjacent dashes or pluses.
47		for start > 0 && s[start-1] == c {
48			start--
49		}
50		if (n-start)&1 == 1 {
51			// Reached for trailing minus signs since "---" is the
52			// same as "-- -".
53			return jsCtxRegexp
54		}
55		return jsCtxDivOp
56	case '.':
57		// Handle "42."
58		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
59			return jsCtxDivOp
60		}
61		return jsCtxRegexp
62	// Suffixes for all punctuators from section 7.7 of the language spec
63	// that only end binary operators not handled above.
64	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
65		return jsCtxRegexp
66	// Suffixes for all punctuators from section 7.7 of the language spec
67	// that are prefix operators not handled above.
68	case '!', '~':
69		return jsCtxRegexp
70	// Matches all the punctuators from section 7.7 of the language spec
71	// that are open brackets not handled above.
72	case '(', '[':
73		return jsCtxRegexp
74	// Matches all the punctuators from section 7.7 of the language spec
75	// that precede expression starts.
76	case ':', ';', '{':
77		return jsCtxRegexp
78	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
79	// are handled in the default except for '}' which can precede a
80	// division op as in
81	//    ({ valueOf: function () { return 42 } } / 2
82	// which is valid, but, in practice, developers don't divide object
83	// literals, so our heuristic works well for code like
84	//    function () { ... }  /foo/.test(x) && sideEffect();
85	// The ')' punctuator can precede a regular expression as in
86	//     if (b) /foo/.test(x) && ...
87	// but this is much less likely than
88	//     (a + b) / c
89	case '}':
90		return jsCtxRegexp
91	default:
92		// Look for an IdentifierName and see if it is a keyword that
93		// can precede a regular expression.
94		j := n
95		for j > 0 && isJSIdentPart(rune(s[j-1])) {
96			j--
97		}
98		if regexpPrecederKeywords[string(s[j:])] {
99			return jsCtxRegexp
100		}
101	}
102	// Otherwise is a punctuator not listed above, or
103	// a string which precedes a div op, or an identifier
104	// which precedes a div op.
105	return jsCtxDivOp
106}
107
108// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
109// regular expression in JS source.
110var regexpPrecederKeywords = map[string]bool{
111	"break":      true,
112	"case":       true,
113	"continue":   true,
114	"delete":     true,
115	"do":         true,
116	"else":       true,
117	"finally":    true,
118	"in":         true,
119	"instanceof": true,
120	"return":     true,
121	"throw":      true,
122	"try":        true,
123	"typeof":     true,
124	"void":       true,
125}
126
127var jsonMarshalType = reflect.TypeFor[json.Marshaler]()
128
129// indirectToJSONMarshaler returns the value, after dereferencing as many times
130// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
131func indirectToJSONMarshaler(a any) any {
132	// text/template now supports passing untyped nil as a func call
133	// argument, so we must support it. Otherwise we'd panic below, as one
134	// cannot call the Type or Interface methods on an invalid
135	// reflect.Value. See golang.org/issue/18716.
136	if a == nil {
137		return nil
138	}
139
140	v := reflect.ValueOf(a)
141	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
142		v = v.Elem()
143	}
144	return v.Interface()
145}
146
147// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
148// neither side-effects nor free variables outside (NaN, Infinity).
149func jsValEscaper(args ...any) string {
150	var a any
151	if len(args) == 1 {
152		a = indirectToJSONMarshaler(args[0])
153		switch t := a.(type) {
154		case JS:
155			return string(t)
156		case JSStr:
157			// TODO: normalize quotes.
158			return `"` + string(t) + `"`
159		case json.Marshaler:
160			// Do not treat as a Stringer.
161		case fmt.Stringer:
162			a = t.String()
163		}
164	} else {
165		for i, arg := range args {
166			args[i] = indirectToJSONMarshaler(arg)
167		}
168		a = fmt.Sprint(args...)
169	}
170	// TODO: detect cycles before calling Marshal which loops infinitely on
171	// cyclic data. This may be an unacceptable DoS risk.
172	b, err := json.Marshal(a)
173	if err != nil {
174		// While the standard JSON marshaler does not include user controlled
175		// information in the error message, if a type has a MarshalJSON method,
176		// the content of the error message is not guaranteed. Since we insert
177		// the error into the template, as part of a comment, we attempt to
178		// prevent the error from either terminating the comment, or the script
179		// block itself.
180		//
181		// In particular we:
182		//   * replace "*/" comment end tokens with "* /", which does not
183		//     terminate the comment
184		//   * replace "</script" with "\x3C/script", and "<!--" with
185		//     "\x3C!--", which prevents confusing script block termination
186		//     semantics
187		//
188		// We also put a space before the comment so that if it is flush against
189		// a division operator it is not turned into a line comment:
190		//     x/{{y}}
191		// turning into
192		//     x//* error marshaling y:
193		//          second line of error message */null
194		errStr := err.Error()
195		errStr = strings.ReplaceAll(errStr, "*/", "* /")
196		errStr = strings.ReplaceAll(errStr, "</script", `\x3C/script`)
197		errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`)
198		return fmt.Sprintf(" /* %s */null ", errStr)
199	}
200
201	// TODO: maybe post-process output to prevent it from containing
202	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
203	// in case custom marshalers produce output containing those.
204	// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
205	// supports ld+json content-type.
206	if len(b) == 0 {
207		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
208		// not cause the output `x=y/*z`.
209		return " null "
210	}
211	first, _ := utf8.DecodeRune(b)
212	last, _ := utf8.DecodeLastRune(b)
213	var buf strings.Builder
214	// Prevent IdentifierNames and NumericLiterals from running into
215	// keywords: in, instanceof, typeof, void
216	pad := isJSIdentPart(first) || isJSIdentPart(last)
217	if pad {
218		buf.WriteByte(' ')
219	}
220	written := 0
221	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
222	// so it falls within the subset of JSON which is valid JS.
223	for i := 0; i < len(b); {
224		rune, n := utf8.DecodeRune(b[i:])
225		repl := ""
226		if rune == 0x2028 {
227			repl = `\u2028`
228		} else if rune == 0x2029 {
229			repl = `\u2029`
230		}
231		if repl != "" {
232			buf.Write(b[written:i])
233			buf.WriteString(repl)
234			written = i + n
235		}
236		i += n
237	}
238	if buf.Len() != 0 {
239		buf.Write(b[written:])
240		if pad {
241			buf.WriteByte(' ')
242		}
243		return buf.String()
244	}
245	return string(b)
246}
247
248// jsStrEscaper produces a string that can be included between quotes in
249// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
250// or in an HTML5 event handler attribute such as onclick.
251func jsStrEscaper(args ...any) string {
252	s, t := stringify(args...)
253	if t == contentTypeJSStr {
254		return replace(s, jsStrNormReplacementTable)
255	}
256	return replace(s, jsStrReplacementTable)
257}
258
259func jsTmplLitEscaper(args ...any) string {
260	s, _ := stringify(args...)
261	return replace(s, jsBqStrReplacementTable)
262}
263
264// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
265// specials so the result is treated literally when included in a regular
266// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
267// the literal text of {{.X}} followed by the string "bar".
268func jsRegexpEscaper(args ...any) string {
269	s, _ := stringify(args...)
270	s = replace(s, jsRegexpReplacementTable)
271	if s == "" {
272		// /{{.X}}/ should not produce a line comment when .X == "".
273		return "(?:)"
274	}
275	return s
276}
277
278// replace replaces each rune r of s with replacementTable[r], provided that
279// r < len(replacementTable). If replacementTable[r] is the empty string then
280// no replacement is made.
281// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
282// `\u2029`.
283func replace(s string, replacementTable []string) string {
284	var b strings.Builder
285	r, w, written := rune(0), 0, 0
286	for i := 0; i < len(s); i += w {
287		// See comment in htmlEscaper.
288		r, w = utf8.DecodeRuneInString(s[i:])
289		var repl string
290		switch {
291		case int(r) < len(lowUnicodeReplacementTable):
292			repl = lowUnicodeReplacementTable[r]
293		case int(r) < len(replacementTable) && replacementTable[r] != "":
294			repl = replacementTable[r]
295		case r == '\u2028':
296			repl = `\u2028`
297		case r == '\u2029':
298			repl = `\u2029`
299		default:
300			continue
301		}
302		if written == 0 {
303			b.Grow(len(s))
304		}
305		b.WriteString(s[written:i])
306		b.WriteString(repl)
307		written = i + w
308	}
309	if written == 0 {
310		return s
311	}
312	b.WriteString(s[written:])
313	return b.String()
314}
315
316var lowUnicodeReplacementTable = []string{
317	0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
318	'\a': `\u0007`,
319	'\b': `\u0008`,
320	'\t': `\t`,
321	'\n': `\n`,
322	'\v': `\u000b`, // "\v" == "v" on IE 6.
323	'\f': `\f`,
324	'\r': `\r`,
325	0xe:  `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
326	0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
327	0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
328}
329
330var jsStrReplacementTable = []string{
331	0:    `\u0000`,
332	'\t': `\t`,
333	'\n': `\n`,
334	'\v': `\u000b`, // "\v" == "v" on IE 6.
335	'\f': `\f`,
336	'\r': `\r`,
337	// Encode HTML specials as hex so the output can be embedded
338	// in HTML attributes without further encoding.
339	'"':  `\u0022`,
340	'`':  `\u0060`,
341	'&':  `\u0026`,
342	'\'': `\u0027`,
343	'+':  `\u002b`,
344	'/':  `\/`,
345	'<':  `\u003c`,
346	'>':  `\u003e`,
347	'\\': `\\`,
348}
349
350// jsBqStrReplacementTable is like jsStrReplacementTable except it also contains
351// the special characters for JS template literals: $, {, and }.
352var jsBqStrReplacementTable = []string{
353	0:    `\u0000`,
354	'\t': `\t`,
355	'\n': `\n`,
356	'\v': `\u000b`, // "\v" == "v" on IE 6.
357	'\f': `\f`,
358	'\r': `\r`,
359	// Encode HTML specials as hex so the output can be embedded
360	// in HTML attributes without further encoding.
361	'"':  `\u0022`,
362	'`':  `\u0060`,
363	'&':  `\u0026`,
364	'\'': `\u0027`,
365	'+':  `\u002b`,
366	'/':  `\/`,
367	'<':  `\u003c`,
368	'>':  `\u003e`,
369	'\\': `\\`,
370	'$':  `\u0024`,
371	'{':  `\u007b`,
372	'}':  `\u007d`,
373}
374
375// jsStrNormReplacementTable is like jsStrReplacementTable but does not
376// overencode existing escapes since this table has no entry for `\`.
377var jsStrNormReplacementTable = []string{
378	0:    `\u0000`,
379	'\t': `\t`,
380	'\n': `\n`,
381	'\v': `\u000b`, // "\v" == "v" on IE 6.
382	'\f': `\f`,
383	'\r': `\r`,
384	// Encode HTML specials as hex so the output can be embedded
385	// in HTML attributes without further encoding.
386	'"':  `\u0022`,
387	'&':  `\u0026`,
388	'\'': `\u0027`,
389	'`':  `\u0060`,
390	'+':  `\u002b`,
391	'/':  `\/`,
392	'<':  `\u003c`,
393	'>':  `\u003e`,
394}
395var jsRegexpReplacementTable = []string{
396	0:    `\u0000`,
397	'\t': `\t`,
398	'\n': `\n`,
399	'\v': `\u000b`, // "\v" == "v" on IE 6.
400	'\f': `\f`,
401	'\r': `\r`,
402	// Encode HTML specials as hex so the output can be embedded
403	// in HTML attributes without further encoding.
404	'"':  `\u0022`,
405	'$':  `\$`,
406	'&':  `\u0026`,
407	'\'': `\u0027`,
408	'(':  `\(`,
409	')':  `\)`,
410	'*':  `\*`,
411	'+':  `\u002b`,
412	'-':  `\-`,
413	'.':  `\.`,
414	'/':  `\/`,
415	'<':  `\u003c`,
416	'>':  `\u003e`,
417	'?':  `\?`,
418	'[':  `\[`,
419	'\\': `\\`,
420	']':  `\]`,
421	'^':  `\^`,
422	'{':  `\{`,
423	'|':  `\|`,
424	'}':  `\}`,
425}
426
427// isJSIdentPart reports whether the given rune is a JS identifier part.
428// It does not handle all the non-Latin letters, joiners, and combining marks,
429// but it does handle every codepoint that can occur in a numeric literal or
430// a keyword.
431func isJSIdentPart(r rune) bool {
432	switch {
433	case r == '$':
434		return true
435	case '0' <= r && r <= '9':
436		return true
437	case 'A' <= r && r <= 'Z':
438		return true
439	case r == '_':
440		return true
441	case 'a' <= r && r <= 'z':
442		return true
443	}
444	return false
445}
446
447// isJSType reports whether the given MIME type should be considered JavaScript.
448//
449// It is used to determine whether a script tag with a type attribute is a javascript container.
450func isJSType(mimeType string) bool {
451	// per
452	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
453	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
454	//   https://tools.ietf.org/html/rfc4329#section-3
455	//   https://www.ietf.org/rfc/rfc4627.txt
456	// discard parameters
457	mimeType, _, _ = strings.Cut(mimeType, ";")
458	mimeType = strings.ToLower(mimeType)
459	mimeType = strings.TrimSpace(mimeType)
460	switch mimeType {
461	case
462		"application/ecmascript",
463		"application/javascript",
464		"application/json",
465		"application/ld+json",
466		"application/x-ecmascript",
467		"application/x-javascript",
468		"module",
469		"text/ecmascript",
470		"text/javascript",
471		"text/javascript1.0",
472		"text/javascript1.1",
473		"text/javascript1.2",
474		"text/javascript1.3",
475		"text/javascript1.4",
476		"text/javascript1.5",
477		"text/jscript",
478		"text/livescript",
479		"text/x-ecmascript",
480		"text/x-javascript":
481		return true
482	default:
483		return false
484	}
485}
486