1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8	"strings"
9	"testing"
10)
11
12type unescapeTest struct {
13	// A short description of the test case.
14	desc string
15	// The HTML text.
16	html string
17	// The unescaped text.
18	unescaped string
19}
20
21var unescapeTests = []unescapeTest{
22	// Handle no entities.
23	{
24		"copy",
25		"A\ttext\nstring",
26		"A\ttext\nstring",
27	},
28	// Handle simple named entities.
29	{
30		"simple",
31		"& > <",
32		"& > <",
33	},
34	// Handle hitting the end of the string.
35	{
36		"stringEnd",
37		"&amp &amp",
38		"& &",
39	},
40	// Handle entities with two codepoints.
41	{
42		"multiCodepoint",
43		"text &gesl; blah",
44		"text \u22db\ufe00 blah",
45	},
46	// Handle decimal numeric entities.
47	{
48		"decimalEntity",
49		"Delta = &#916; ",
50		"Delta = Δ ",
51	},
52	// Handle hexadecimal numeric entities.
53	{
54		"hexadecimalEntity",
55		"Lambda = &#x3bb; = &#X3Bb ",
56		"Lambda = λ = λ ",
57	},
58	// Handle numeric early termination.
59	{
60		"numericEnds",
61		"&# &#x &#128;43 &copy = &#169f = &#xa9",
62		"&# &#x €43 © = ©f = ©",
63	},
64	// Handle numeric ISO-8859-1 entity replacements.
65	{
66		"numericReplacements",
67		"Footnote&#x87;",
68		"Footnote‡",
69	},
70	// Handle single ampersand.
71	{
72		"copySingleAmpersand",
73		"&",
74		"&",
75	},
76	// Handle ampersand followed by non-entity.
77	{
78		"copyAmpersandNonEntity",
79		"text &test",
80		"text &test",
81	},
82	// Handle "&#".
83	{
84		"copyAmpersandHash",
85		"text &#",
86		"text &#",
87	},
88}
89
90func TestUnescape(t *testing.T) {
91	for _, tt := range unescapeTests {
92		unescaped := UnescapeString(tt.html)
93		if unescaped != tt.unescaped {
94			t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
95		}
96	}
97}
98
99func TestUnescapeEscape(t *testing.T) {
100	ss := []string{
101		``,
102		`abc def`,
103		`a & b`,
104		`a&amp;b`,
105		`a &amp b`,
106		`&quot;`,
107		`"`,
108		`"<&>"`,
109		`&quot;&lt;&amp;&gt;&quot;`,
110		`3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
111		`The special characters are: <, >, &, ' and "`,
112	}
113	for _, s := range ss {
114		if got := UnescapeString(EscapeString(s)); got != s {
115			t.Errorf("got %q want %q", got, s)
116		}
117	}
118}
119
120var (
121	benchEscapeData     = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
122	benchEscapeNone     = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
123	benchUnescapeSparse = strings.Repeat(strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 10)+"&amp;", 10)
124	benchUnescapeDense  = strings.Repeat("&amp;&lt; &amp; &lt;", 100)
125)
126
127func BenchmarkEscape(b *testing.B) {
128	n := 0
129	for i := 0; i < b.N; i++ {
130		n += len(EscapeString(benchEscapeData))
131	}
132}
133
134func BenchmarkEscapeNone(b *testing.B) {
135	n := 0
136	for i := 0; i < b.N; i++ {
137		n += len(EscapeString(benchEscapeNone))
138	}
139}
140
141func BenchmarkUnescape(b *testing.B) {
142	s := EscapeString(benchEscapeData)
143	n := 0
144	for i := 0; i < b.N; i++ {
145		n += len(UnescapeString(s))
146	}
147}
148
149func BenchmarkUnescapeNone(b *testing.B) {
150	s := EscapeString(benchEscapeNone)
151	n := 0
152	for i := 0; i < b.N; i++ {
153		n += len(UnescapeString(s))
154	}
155}
156
157func BenchmarkUnescapeSparse(b *testing.B) {
158	n := 0
159	for i := 0; i < b.N; i++ {
160		n += len(UnescapeString(benchUnescapeSparse))
161	}
162}
163
164func BenchmarkUnescapeDense(b *testing.B) {
165	n := 0
166	for i := 0; i < b.N; i++ {
167		n += len(UnescapeString(benchUnescapeDense))
168	}
169}
170