Lines Matching +full:utf +full:- +full:8
8 * http://www.apache.org/licenses/LICENSE-2.0
29 // Modern versions of UTF-8 (https://datatracker.ietf.org/doc/html/rfc3629 and
30 // newer) explicitly disallow code points beyond U+10FFFF, which exclude all 5-
31 // and 6-byte sequences. Earlier versions of UTF-8 allowed the wider range:
65 // ever do this (what does it mean to begin decoding a UTF-32 character while in TEST()
66 // still in the middle of a UTF-8 sequence?), considering that a decoding in TEST()
70 // UTF-32 character. in TEST()
72 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
79 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xc2", 1, &ps)); in TEST()
81 EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(out, 0x00a2, &ps)); in TEST()
85 // ignore the state entirely for the UTF-32 functions rather than reset it. in TEST()
90 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xc2", 1, &ps)); in TEST()
95 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xf0\xa4", 1, &ps)); in TEST()
102 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
110 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
124 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
127 // 1-byte UTF-8. in TEST()
131 // 2-byte UTF-8. in TEST()
136 // 3-byte UTF-8. in TEST()
142 // 4-byte UTF-8 from a surrogate pair... in TEST()
153 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
159 EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdfcd, nullptr)); in TEST()
162 EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdbea, nullptr)); in TEST()
166 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
173 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
179 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "hello", 0, nullptr)); in TEST()
182 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "hello", 0, nullptr)); in TEST()
183 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "", 0, nullptr)); in TEST()
191 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
194 // 1-byte UTF-8. in TEST()
197 // 2-byte UTF-8. in TEST()
200 // 3-byte UTF-8. in TEST()
203 // 4-byte UTF-8 will be returned as a surrogate pair... in TEST()
206 ASSERT_EQ(static_cast<size_t>(-3), mbrtoc16(&out, in TEST()
214 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
221 EXPECT_EQ(static_cast<size_t>(-1), result); in TEST()
232 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
237 EXPECT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\xf0\x80\xbf\xbf", 6, nullptr)); in TEST()
243 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
250 EXPECT_EQ(static_cast<size_t>(-1), result); in TEST()
261 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in test_mbrtoc16_incomplete()
265 // 2-byte UTF-8. in test_mbrtoc16_incomplete()
266 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps)); in test_mbrtoc16_incomplete()
270 // 3-byte UTF-8. in test_mbrtoc16_incomplete()
271 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xe2", 1, ps)); in test_mbrtoc16_incomplete()
272 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x82", 1, ps)); in test_mbrtoc16_incomplete()
276 // 4-byte UTF-8. in test_mbrtoc16_incomplete()
277 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xf4", 1, ps)); in test_mbrtoc16_incomplete()
278 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x8a\xaf", 2, ps)); in test_mbrtoc16_incomplete()
284 ASSERT_EQ(static_cast<size_t>(-3), mbrtoc16(&out, in test_mbrtoc16_incomplete()
291 // Invalid 2-byte in test_mbrtoc16_incomplete()
292 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps)); in test_mbrtoc16_incomplete()
294 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\x20" "cdef", 5, ps)); in test_mbrtoc16_incomplete()
299 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
324 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
327 // 1-byte UTF-8. in TEST()
331 // 2-byte UTF-8. in TEST()
336 // 3-byte UTF-8. in TEST()
342 // 4-byte UTF-8. in TEST()
351 EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(bytes, 0xffffffff, nullptr)); in TEST()
356 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
359 char32_t out[8] = {}; in TEST()
367 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
374 EXPECT_EQ(static_cast<size_t>(-1), result); in TEST()
385 char32_t out[8]; in TEST()
388 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(out, "hello", 0, nullptr)); in TEST()
391 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(out, "hello", 0, nullptr)); in TEST()
392 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(out, "", 0, nullptr)); in TEST()
396 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "hello", 0, nullptr)); in TEST()
397 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "", 0, nullptr)); in TEST()
402 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()
405 // 1-byte UTF-8. in TEST()
408 // 2-byte UTF-8. in TEST()
414 // 3-byte UTF-8. in TEST()
420 // 4-byte UTF-8. in TEST()
427 // Illegal 5-byte UTF-8. in TEST()
429 EXPECT_EQ(static_cast<size_t>(-1), mbrtoc32(out, in TEST()
435 // Illegal over-long sequence. in TEST()
437 EXPECT_EQ(static_cast<size_t>(-1), mbrtoc32(out, in TEST()
445 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in test_mbrtoc32_incomplete()
449 // 2-byte UTF-8. in test_mbrtoc32_incomplete()
450 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps)); in test_mbrtoc32_incomplete()
454 // 3-byte UTF-8. in test_mbrtoc32_incomplete()
455 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xe2", 1, ps)); in test_mbrtoc32_incomplete()
456 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\x82", 1, ps)); in test_mbrtoc32_incomplete()
460 // 4-byte UTF-8. in test_mbrtoc32_incomplete()
461 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xf0", 1, ps)); in test_mbrtoc32_incomplete()
462 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xa4\xad", 2, ps)); in test_mbrtoc32_incomplete()
467 // Invalid 2-byte in test_mbrtoc32_incomplete()
468 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps)); in test_mbrtoc32_incomplete()
470 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(&out, "\x20" "cdef", 5, ps)); in test_mbrtoc32_incomplete()
475 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); in TEST()