1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package utf16_test 6 7import ( 8 "internal/testenv" 9 "reflect" 10 "testing" 11 "unicode" 12 . "unicode/utf16" 13) 14 15// Validate the constants redefined from unicode. 16func TestConstants(t *testing.T) { 17 if MaxRune != unicode.MaxRune { 18 t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune) 19 } 20 if ReplacementChar != unicode.ReplacementChar { 21 t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar) 22 } 23} 24 25func TestRuneLen(t *testing.T) { 26 for _, tt := range []struct { 27 r rune 28 length int 29 }{ 30 {0, 1}, 31 {Surr1 - 1, 1}, 32 {Surr3, 1}, 33 {SurrSelf - 1, 1}, 34 {SurrSelf, 2}, 35 {MaxRune, 2}, 36 {MaxRune + 1, -1}, 37 {-1, -1}, 38 } { 39 if length := RuneLen(tt.r); length != tt.length { 40 t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, length, tt.length) 41 } 42 } 43} 44 45type encodeTest struct { 46 in []rune 47 out []uint16 48} 49 50var encodeTests = []encodeTest{ 51 {[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}}, 52 {[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}, 53 []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}}, 54 {[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1}, 55 []uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}}, 56} 57 58func TestEncode(t *testing.T) { 59 for _, tt := range encodeTests { 60 out := Encode(tt.in) 61 if !reflect.DeepEqual(out, tt.out) { 62 t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out) 63 } 64 } 65} 66 67func TestAppendRune(t *testing.T) { 68 for _, tt := range encodeTests { 69 var out []uint16 70 for _, u := range tt.in { 71 out = AppendRune(out, u) 72 } 73 if !reflect.DeepEqual(out, tt.out) { 74 t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out) 75 } 76 } 77} 78 79func TestEncodeRune(t *testing.T) { 80 for i, tt := range encodeTests { 81 j := 0 82 for _, r := range tt.in { 83 r1, r2 := EncodeRune(r) 84 if r < 0x10000 || r > unicode.MaxRune { 85 if j >= len(tt.out) { 86 t.Errorf("#%d: ran out of tt.out", i) 87 break 88 } 89 if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar { 90 t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2) 91 } 92 j++ 93 } else { 94 if j+1 >= len(tt.out) { 95 t.Errorf("#%d: ran out of tt.out", i) 96 break 97 } 98 if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) { 99 t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1]) 100 } 101 j += 2 102 dec := DecodeRune(r1, r2) 103 if dec != r { 104 t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r) 105 } 106 } 107 } 108 if j != len(tt.out) { 109 t.Errorf("#%d: EncodeRune didn't generate enough output", i) 110 } 111 } 112} 113 114type decodeTest struct { 115 in []uint16 116 out []rune 117} 118 119var decodeTests = []decodeTest{ 120 {[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}}, 121 {[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}, 122 []rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}}, 123 {[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}}, 124 {[]uint16{0xdfff}, []rune{0xfffd}}, 125} 126 127func TestAllocationsDecode(t *testing.T) { 128 testenv.SkipIfOptimizationOff(t) 129 130 for _, tt := range decodeTests { 131 allocs := testing.AllocsPerRun(10, func() { 132 out := Decode(tt.in) 133 if out == nil { 134 t.Errorf("Decode(%x) = nil", tt.in) 135 } 136 }) 137 if allocs > 0 { 138 t.Errorf("Decode allocated %v times", allocs) 139 } 140 } 141} 142 143func TestDecode(t *testing.T) { 144 for _, tt := range decodeTests { 145 out := Decode(tt.in) 146 if !reflect.DeepEqual(out, tt.out) { 147 t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out) 148 } 149 } 150} 151 152var decodeRuneTests = []struct { 153 r1, r2 rune 154 want rune 155}{ 156 {0xd800, 0xdc00, 0x10000}, 157 {0xd800, 0xdc01, 0x10001}, 158 {0xd808, 0xdf45, 0x12345}, 159 {0xdbff, 0xdfff, 0x10ffff}, 160 {0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted 161} 162 163func TestDecodeRune(t *testing.T) { 164 for i, tt := range decodeRuneTests { 165 got := DecodeRune(tt.r1, tt.r2) 166 if got != tt.want { 167 t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want) 168 } 169 } 170} 171 172var surrogateTests = []struct { 173 r rune 174 want bool 175}{ 176 // from https://en.wikipedia.org/wiki/UTF-16 177 {'\u007A', false}, // LATIN SMALL LETTER Z 178 {'\u6C34', false}, // CJK UNIFIED IDEOGRAPH-6C34 (water) 179 {'\uFEFF', false}, // Byte Order Mark 180 {'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point) 181 {'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF 182 {'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point) 183 184 {rune(0xd7ff), false}, // surr1-1 185 {rune(0xd800), true}, // surr1 186 {rune(0xdc00), true}, // surr2 187 {rune(0xe000), false}, // surr3 188 {rune(0xdfff), true}, // surr3-1 189} 190 191func TestIsSurrogate(t *testing.T) { 192 for i, tt := range surrogateTests { 193 got := IsSurrogate(tt.r) 194 if got != tt.want { 195 t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want) 196 } 197 } 198} 199 200func BenchmarkDecodeValidASCII(b *testing.B) { 201 // "hello world" 202 data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100} 203 for i := 0; i < b.N; i++ { 204 Decode(data) 205 } 206} 207 208func BenchmarkDecodeValidJapaneseChars(b *testing.B) { 209 // "日本語日本語日本語" 210 data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486} 211 for i := 0; i < b.N; i++ { 212 Decode(data) 213 } 214} 215 216func BenchmarkDecodeRune(b *testing.B) { 217 rs := make([]rune, 10) 218 // U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS 219 for i, u := range []rune{'', '', '', '', ''} { 220 rs[2*i], rs[2*i+1] = EncodeRune(u) 221 } 222 223 b.ResetTimer() 224 for i := 0; i < b.N; i++ { 225 for j := 0; j < 5; j++ { 226 DecodeRune(rs[2*j], rs[2*j+1]) 227 } 228 } 229} 230 231func BenchmarkEncodeValidASCII(b *testing.B) { 232 data := []rune{'h', 'e', 'l', 'l', 'o'} 233 for i := 0; i < b.N; i++ { 234 Encode(data) 235 } 236} 237 238func BenchmarkEncodeValidJapaneseChars(b *testing.B) { 239 data := []rune{'日', '本', '語'} 240 for i := 0; i < b.N; i++ { 241 Encode(data) 242 } 243} 244 245func BenchmarkAppendRuneValidASCII(b *testing.B) { 246 data := []rune{'h', 'e', 'l', 'l', 'o'} 247 a := make([]uint16, 0, len(data)*2) 248 for i := 0; i < b.N; i++ { 249 for _, u := range data { 250 a = AppendRune(a, u) 251 } 252 a = a[:0] 253 } 254} 255 256func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) { 257 data := []rune{'日', '本', '語'} 258 a := make([]uint16, 0, len(data)*2) 259 for i := 0; i < b.N; i++ { 260 for _, u := range data { 261 a = AppendRune(a, u) 262 } 263 a = a[:0] 264 } 265} 266 267func BenchmarkEncodeRune(b *testing.B) { 268 for i := 0; i < b.N; i++ { 269 for _, u := range []rune{'', '', '', '', ''} { 270 EncodeRune(u) 271 } 272 } 273} 274