1*8f0ba417SAndroid Build Coastguard Worker /*
2*8f0ba417SAndroid Build Coastguard Worker * Copyright (C) 2015 The Android Open Source Project
3*8f0ba417SAndroid Build Coastguard Worker *
4*8f0ba417SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*8f0ba417SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*8f0ba417SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*8f0ba417SAndroid Build Coastguard Worker *
8*8f0ba417SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*8f0ba417SAndroid Build Coastguard Worker *
10*8f0ba417SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*8f0ba417SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*8f0ba417SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*8f0ba417SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*8f0ba417SAndroid Build Coastguard Worker * limitations under the License.
15*8f0ba417SAndroid Build Coastguard Worker */
16*8f0ba417SAndroid Build Coastguard Worker
17*8f0ba417SAndroid Build Coastguard Worker #include "android-base/utf8.h"
18*8f0ba417SAndroid Build Coastguard Worker
19*8f0ba417SAndroid Build Coastguard Worker #include <gtest/gtest.h>
20*8f0ba417SAndroid Build Coastguard Worker
21*8f0ba417SAndroid Build Coastguard Worker #include <fcntl.h>
22*8f0ba417SAndroid Build Coastguard Worker #include <stdlib.h>
23*8f0ba417SAndroid Build Coastguard Worker
24*8f0ba417SAndroid Build Coastguard Worker #include "android-base/file.h"
25*8f0ba417SAndroid Build Coastguard Worker #include "android-base/macros.h"
26*8f0ba417SAndroid Build Coastguard Worker #include "android-base/unique_fd.h"
27*8f0ba417SAndroid Build Coastguard Worker
28*8f0ba417SAndroid Build Coastguard Worker namespace android {
29*8f0ba417SAndroid Build Coastguard Worker namespace base {
30*8f0ba417SAndroid Build Coastguard Worker
TEST(UTFStringConversionsTest,ConvertInvalidUTF8)31*8f0ba417SAndroid Build Coastguard Worker TEST(UTFStringConversionsTest, ConvertInvalidUTF8) {
32*8f0ba417SAndroid Build Coastguard Worker std::wstring wide;
33*8f0ba417SAndroid Build Coastguard Worker
34*8f0ba417SAndroid Build Coastguard Worker errno = 0;
35*8f0ba417SAndroid Build Coastguard Worker
36*8f0ba417SAndroid Build Coastguard Worker // Standalone \xa2 is an invalid UTF-8 sequence, so this should return an
37*8f0ba417SAndroid Build Coastguard Worker // error. Concatenate two C/C++ literal string constants to prevent the
38*8f0ba417SAndroid Build Coastguard Worker // compiler from giving an error about "\xa2af" containing a "hex escape
39*8f0ba417SAndroid Build Coastguard Worker // sequence out of range".
40*8f0ba417SAndroid Build Coastguard Worker EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide));
41*8f0ba417SAndroid Build Coastguard Worker
42*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(EILSEQ, errno);
43*8f0ba417SAndroid Build Coastguard Worker
44*8f0ba417SAndroid Build Coastguard Worker // Even if an invalid character is encountered, UTF8ToWide() should still do
45*8f0ba417SAndroid Build Coastguard Worker // its best to convert the rest of the string. sysdeps_win32.cpp:
46*8f0ba417SAndroid Build Coastguard Worker // _console_write_utf8() depends on this behavior.
47*8f0ba417SAndroid Build Coastguard Worker //
48*8f0ba417SAndroid Build Coastguard Worker // Thus, we verify that the valid characters are converted, but we ignore the
49*8f0ba417SAndroid Build Coastguard Worker // specific replacement character that UTF8ToWide() may replace the invalid
50*8f0ba417SAndroid Build Coastguard Worker // UTF-8 characters with because we want to allow that to change if the
51*8f0ba417SAndroid Build Coastguard Worker // implementation changes.
52*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(0U, wide.find(L"before"));
53*8f0ba417SAndroid Build Coastguard Worker const wchar_t after_wide[] = L"after";
54*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(wide.length() - (arraysize(after_wide) - 1), wide.find(after_wide));
55*8f0ba417SAndroid Build Coastguard Worker }
56*8f0ba417SAndroid Build Coastguard Worker
57*8f0ba417SAndroid Build Coastguard Worker // Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/utf_string_conversions_unittest.cc
58*8f0ba417SAndroid Build Coastguard Worker
59*8f0ba417SAndroid Build Coastguard Worker // Copyright (c) 2010 The Chromium Authors. All rights reserved.
60*8f0ba417SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
61*8f0ba417SAndroid Build Coastguard Worker // found in the NOTICE file.
62*8f0ba417SAndroid Build Coastguard Worker
63*8f0ba417SAndroid Build Coastguard Worker // The tests below from utf_string_conversions_unittest.cc check for this
64*8f0ba417SAndroid Build Coastguard Worker // preprocessor symbol, so define it, as it is appropriate for Windows.
65*8f0ba417SAndroid Build Coastguard Worker #define WCHAR_T_IS_UTF16
66*8f0ba417SAndroid Build Coastguard Worker static_assert(sizeof(wchar_t) == 2, "wchar_t is not 2 bytes");
67*8f0ba417SAndroid Build Coastguard Worker
68*8f0ba417SAndroid Build Coastguard Worker // The tests below from utf_string_conversions_unittest.cc call versions of
69*8f0ba417SAndroid Build Coastguard Worker // UTF8ToWide() and WideToUTF8() that don't return success/failure, so these are
70*8f0ba417SAndroid Build Coastguard Worker // stub implementations with that signature. These are just for testing and
71*8f0ba417SAndroid Build Coastguard Worker // should not be moved to base because they assert/expect no errors which is
72*8f0ba417SAndroid Build Coastguard Worker // probably not a good idea (or at least it is something that should be left
73*8f0ba417SAndroid Build Coastguard Worker // up to the caller, not a base library).
74*8f0ba417SAndroid Build Coastguard Worker
UTF8ToWide(const std::string & utf8)75*8f0ba417SAndroid Build Coastguard Worker static std::wstring UTF8ToWide(const std::string& utf8) {
76*8f0ba417SAndroid Build Coastguard Worker std::wstring utf16;
77*8f0ba417SAndroid Build Coastguard Worker EXPECT_TRUE(UTF8ToWide(utf8, &utf16));
78*8f0ba417SAndroid Build Coastguard Worker return utf16;
79*8f0ba417SAndroid Build Coastguard Worker }
80*8f0ba417SAndroid Build Coastguard Worker
WideToUTF8(const std::wstring & utf16)81*8f0ba417SAndroid Build Coastguard Worker static std::string WideToUTF8(const std::wstring& utf16) {
82*8f0ba417SAndroid Build Coastguard Worker std::string utf8;
83*8f0ba417SAndroid Build Coastguard Worker EXPECT_TRUE(WideToUTF8(utf16, &utf8));
84*8f0ba417SAndroid Build Coastguard Worker return utf8;
85*8f0ba417SAndroid Build Coastguard Worker }
86*8f0ba417SAndroid Build Coastguard Worker
87*8f0ba417SAndroid Build Coastguard Worker namespace {
88*8f0ba417SAndroid Build Coastguard Worker
89*8f0ba417SAndroid Build Coastguard Worker const wchar_t* const kConvertRoundtripCases[] = {
90*8f0ba417SAndroid Build Coastguard Worker L"Google Video",
91*8f0ba417SAndroid Build Coastguard Worker // "网页 图片 资讯更多 »"
92*8f0ba417SAndroid Build Coastguard Worker L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
93*8f0ba417SAndroid Build Coastguard Worker // "Παγκόσμιος Ιστός"
94*8f0ba417SAndroid Build Coastguard Worker L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
95*8f0ba417SAndroid Build Coastguard Worker L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
96*8f0ba417SAndroid Build Coastguard Worker // "Поиск страниц на русском"
97*8f0ba417SAndroid Build Coastguard Worker L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
98*8f0ba417SAndroid Build Coastguard Worker L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
99*8f0ba417SAndroid Build Coastguard Worker L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
100*8f0ba417SAndroid Build Coastguard Worker // "전체서비스"
101*8f0ba417SAndroid Build Coastguard Worker L"\xc804\xccb4\xc11c\xbe44\xc2a4",
102*8f0ba417SAndroid Build Coastguard Worker
103*8f0ba417SAndroid Build Coastguard Worker // Test characters that take more than 16 bits. This will depend on whether
104*8f0ba417SAndroid Build Coastguard Worker // wchar_t is 16 or 32 bits.
105*8f0ba417SAndroid Build Coastguard Worker #if defined(WCHAR_T_IS_UTF16)
106*8f0ba417SAndroid Build Coastguard Worker L"\xd800\xdf00",
107*8f0ba417SAndroid Build Coastguard Worker // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
108*8f0ba417SAndroid Build Coastguard Worker L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
109*8f0ba417SAndroid Build Coastguard Worker #elif defined(WCHAR_T_IS_UTF32)
110*8f0ba417SAndroid Build Coastguard Worker L"\x10300",
111*8f0ba417SAndroid Build Coastguard Worker // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
112*8f0ba417SAndroid Build Coastguard Worker L"\x11d40\x11d41\x11d42\x11d43\x11d44",
113*8f0ba417SAndroid Build Coastguard Worker #endif
114*8f0ba417SAndroid Build Coastguard Worker };
115*8f0ba417SAndroid Build Coastguard Worker
116*8f0ba417SAndroid Build Coastguard Worker } // namespace
117*8f0ba417SAndroid Build Coastguard Worker
TEST(UTFStringConversionsTest,ConvertUTF8AndWide)118*8f0ba417SAndroid Build Coastguard Worker TEST(UTFStringConversionsTest, ConvertUTF8AndWide) {
119*8f0ba417SAndroid Build Coastguard Worker // we round-trip all the wide strings through UTF-8 to make sure everything
120*8f0ba417SAndroid Build Coastguard Worker // agrees on the conversion. This uses the stream operators to test them
121*8f0ba417SAndroid Build Coastguard Worker // simultaneously.
122*8f0ba417SAndroid Build Coastguard Worker for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
123*8f0ba417SAndroid Build Coastguard Worker std::ostringstream utf8;
124*8f0ba417SAndroid Build Coastguard Worker utf8 << WideToUTF8(kConvertRoundtripCases[i]);
125*8f0ba417SAndroid Build Coastguard Worker std::wostringstream wide;
126*8f0ba417SAndroid Build Coastguard Worker wide << UTF8ToWide(utf8.str());
127*8f0ba417SAndroid Build Coastguard Worker
128*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(kConvertRoundtripCases[i], wide.str());
129*8f0ba417SAndroid Build Coastguard Worker }
130*8f0ba417SAndroid Build Coastguard Worker }
131*8f0ba417SAndroid Build Coastguard Worker
TEST(UTFStringConversionsTest,ConvertUTF8AndWideEmptyString)132*8f0ba417SAndroid Build Coastguard Worker TEST(UTFStringConversionsTest, ConvertUTF8AndWideEmptyString) {
133*8f0ba417SAndroid Build Coastguard Worker // An empty std::wstring should be converted to an empty std::string,
134*8f0ba417SAndroid Build Coastguard Worker // and vice versa.
135*8f0ba417SAndroid Build Coastguard Worker std::wstring wempty;
136*8f0ba417SAndroid Build Coastguard Worker std::string empty;
137*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(empty, WideToUTF8(wempty));
138*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(wempty, UTF8ToWide(empty));
139*8f0ba417SAndroid Build Coastguard Worker }
140*8f0ba417SAndroid Build Coastguard Worker
TEST(UTFStringConversionsTest,ConvertUTF8ToWide)141*8f0ba417SAndroid Build Coastguard Worker TEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
142*8f0ba417SAndroid Build Coastguard Worker struct UTF8ToWideCase {
143*8f0ba417SAndroid Build Coastguard Worker const char* utf8;
144*8f0ba417SAndroid Build Coastguard Worker const wchar_t* wide;
145*8f0ba417SAndroid Build Coastguard Worker bool success;
146*8f0ba417SAndroid Build Coastguard Worker } convert_cases[] = {
147*8f0ba417SAndroid Build Coastguard Worker // Regular UTF-8 input.
148*8f0ba417SAndroid Build Coastguard Worker {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
149*8f0ba417SAndroid Build Coastguard Worker // Non-character is passed through.
150*8f0ba417SAndroid Build Coastguard Worker {"\xef\xbf\xbfHello", L"\xffffHello", true},
151*8f0ba417SAndroid Build Coastguard Worker // Truncated UTF-8 sequence.
152*8f0ba417SAndroid Build Coastguard Worker {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
153*8f0ba417SAndroid Build Coastguard Worker // Truncated off the end.
154*8f0ba417SAndroid Build Coastguard Worker {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false},
155*8f0ba417SAndroid Build Coastguard Worker // Non-shortest-form UTF-8.
156*8f0ba417SAndroid Build Coastguard Worker {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
157*8f0ba417SAndroid Build Coastguard Worker // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
158*8f0ba417SAndroid Build Coastguard Worker // Note that for whatever reason, this test fails on Windows XP.
159*8f0ba417SAndroid Build Coastguard Worker {"\xed\xb0\x80", L"\xfffd", false},
160*8f0ba417SAndroid Build Coastguard Worker // Non-BMP characters. The second is a non-character regarded as valid.
161*8f0ba417SAndroid Build Coastguard Worker // The result will either be in UTF-16 or UTF-32.
162*8f0ba417SAndroid Build Coastguard Worker #if defined(WCHAR_T_IS_UTF16)
163*8f0ba417SAndroid Build Coastguard Worker {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
164*8f0ba417SAndroid Build Coastguard Worker {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},
165*8f0ba417SAndroid Build Coastguard Worker #elif defined(WCHAR_T_IS_UTF32)
166*8f0ba417SAndroid Build Coastguard Worker {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
167*8f0ba417SAndroid Build Coastguard Worker {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},
168*8f0ba417SAndroid Build Coastguard Worker #endif
169*8f0ba417SAndroid Build Coastguard Worker };
170*8f0ba417SAndroid Build Coastguard Worker
171*8f0ba417SAndroid Build Coastguard Worker for (size_t i = 0; i < arraysize(convert_cases); i++) {
172*8f0ba417SAndroid Build Coastguard Worker std::wstring converted;
173*8f0ba417SAndroid Build Coastguard Worker errno = 0;
174*8f0ba417SAndroid Build Coastguard Worker const bool success = UTF8ToWide(convert_cases[i].utf8,
175*8f0ba417SAndroid Build Coastguard Worker strlen(convert_cases[i].utf8),
176*8f0ba417SAndroid Build Coastguard Worker &converted);
177*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(convert_cases[i].success, success);
178*8f0ba417SAndroid Build Coastguard Worker // The original test always compared expected and converted, but don't do
179*8f0ba417SAndroid Build Coastguard Worker // that because our implementation of UTF8ToWide() does not guarantee to
180*8f0ba417SAndroid Build Coastguard Worker // produce the same output in error situations.
181*8f0ba417SAndroid Build Coastguard Worker if (success) {
182*8f0ba417SAndroid Build Coastguard Worker std::wstring expected(convert_cases[i].wide);
183*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(expected, converted);
184*8f0ba417SAndroid Build Coastguard Worker } else {
185*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(EILSEQ, errno);
186*8f0ba417SAndroid Build Coastguard Worker }
187*8f0ba417SAndroid Build Coastguard Worker }
188*8f0ba417SAndroid Build Coastguard Worker
189*8f0ba417SAndroid Build Coastguard Worker // Manually test an embedded NULL.
190*8f0ba417SAndroid Build Coastguard Worker std::wstring converted;
191*8f0ba417SAndroid Build Coastguard Worker EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted));
192*8f0ba417SAndroid Build Coastguard Worker ASSERT_EQ(3U, converted.length());
193*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(static_cast<wchar_t>(0), converted[0]);
194*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ('Z', converted[1]);
195*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ('\t', converted[2]);
196*8f0ba417SAndroid Build Coastguard Worker
197*8f0ba417SAndroid Build Coastguard Worker // Make sure that conversion replaces, not appends.
198*8f0ba417SAndroid Build Coastguard Worker EXPECT_TRUE(UTF8ToWide("B", 1, &converted));
199*8f0ba417SAndroid Build Coastguard Worker ASSERT_EQ(1U, converted.length());
200*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ('B', converted[0]);
201*8f0ba417SAndroid Build Coastguard Worker }
202*8f0ba417SAndroid Build Coastguard Worker
203*8f0ba417SAndroid Build Coastguard Worker #if defined(WCHAR_T_IS_UTF16)
204*8f0ba417SAndroid Build Coastguard Worker // This test is only valid when wchar_t == UTF-16.
TEST(UTFStringConversionsTest,ConvertUTF16ToUTF8)205*8f0ba417SAndroid Build Coastguard Worker TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
206*8f0ba417SAndroid Build Coastguard Worker struct WideToUTF8Case {
207*8f0ba417SAndroid Build Coastguard Worker const wchar_t* utf16;
208*8f0ba417SAndroid Build Coastguard Worker const char* utf8;
209*8f0ba417SAndroid Build Coastguard Worker bool success;
210*8f0ba417SAndroid Build Coastguard Worker } convert_cases[] = {
211*8f0ba417SAndroid Build Coastguard Worker // Regular UTF-16 input.
212*8f0ba417SAndroid Build Coastguard Worker {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
213*8f0ba417SAndroid Build Coastguard Worker // Test a non-BMP character.
214*8f0ba417SAndroid Build Coastguard Worker {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
215*8f0ba417SAndroid Build Coastguard Worker // Non-characters are passed through.
216*8f0ba417SAndroid Build Coastguard Worker {L"\xffffHello", "\xEF\xBF\xBFHello", true},
217*8f0ba417SAndroid Build Coastguard Worker {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
218*8f0ba417SAndroid Build Coastguard Worker // The first character is a truncated UTF-16 character.
219*8f0ba417SAndroid Build Coastguard Worker // Note that for whatever reason, this test fails on Windows XP.
220*8f0ba417SAndroid Build Coastguard Worker {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd",
221*8f0ba417SAndroid Build Coastguard Worker #if (WINVER >= 0x0600)
222*8f0ba417SAndroid Build Coastguard Worker // Only Vista and later has a new API/flag that correctly returns false.
223*8f0ba417SAndroid Build Coastguard Worker false
224*8f0ba417SAndroid Build Coastguard Worker #else
225*8f0ba417SAndroid Build Coastguard Worker true
226*8f0ba417SAndroid Build Coastguard Worker #endif
227*8f0ba417SAndroid Build Coastguard Worker },
228*8f0ba417SAndroid Build Coastguard Worker // Truncated at the end.
229*8f0ba417SAndroid Build Coastguard Worker // Note that for whatever reason, this test fails on Windows XP.
230*8f0ba417SAndroid Build Coastguard Worker {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd",
231*8f0ba417SAndroid Build Coastguard Worker #if (WINVER >= 0x0600)
232*8f0ba417SAndroid Build Coastguard Worker // Only Vista and later has a new API/flag that correctly returns false.
233*8f0ba417SAndroid Build Coastguard Worker false
234*8f0ba417SAndroid Build Coastguard Worker #else
235*8f0ba417SAndroid Build Coastguard Worker true
236*8f0ba417SAndroid Build Coastguard Worker #endif
237*8f0ba417SAndroid Build Coastguard Worker },
238*8f0ba417SAndroid Build Coastguard Worker };
239*8f0ba417SAndroid Build Coastguard Worker
240*8f0ba417SAndroid Build Coastguard Worker for (size_t i = 0; i < arraysize(convert_cases); i++) {
241*8f0ba417SAndroid Build Coastguard Worker std::string converted;
242*8f0ba417SAndroid Build Coastguard Worker errno = 0;
243*8f0ba417SAndroid Build Coastguard Worker const bool success = WideToUTF8(convert_cases[i].utf16,
244*8f0ba417SAndroid Build Coastguard Worker wcslen(convert_cases[i].utf16),
245*8f0ba417SAndroid Build Coastguard Worker &converted);
246*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(convert_cases[i].success, success);
247*8f0ba417SAndroid Build Coastguard Worker // The original test always compared expected and converted, but don't do
248*8f0ba417SAndroid Build Coastguard Worker // that because our implementation of WideToUTF8() does not guarantee to
249*8f0ba417SAndroid Build Coastguard Worker // produce the same output in error situations.
250*8f0ba417SAndroid Build Coastguard Worker if (success) {
251*8f0ba417SAndroid Build Coastguard Worker std::string expected(convert_cases[i].utf8);
252*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(expected, converted);
253*8f0ba417SAndroid Build Coastguard Worker } else {
254*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(EILSEQ, errno);
255*8f0ba417SAndroid Build Coastguard Worker }
256*8f0ba417SAndroid Build Coastguard Worker }
257*8f0ba417SAndroid Build Coastguard Worker }
258*8f0ba417SAndroid Build Coastguard Worker
259*8f0ba417SAndroid Build Coastguard Worker #elif defined(WCHAR_T_IS_UTF32)
260*8f0ba417SAndroid Build Coastguard Worker // This test is only valid when wchar_t == UTF-32.
TEST(UTFStringConversionsTest,ConvertUTF32ToUTF8)261*8f0ba417SAndroid Build Coastguard Worker TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) {
262*8f0ba417SAndroid Build Coastguard Worker struct WideToUTF8Case {
263*8f0ba417SAndroid Build Coastguard Worker const wchar_t* utf32;
264*8f0ba417SAndroid Build Coastguard Worker const char* utf8;
265*8f0ba417SAndroid Build Coastguard Worker bool success;
266*8f0ba417SAndroid Build Coastguard Worker } convert_cases[] = {
267*8f0ba417SAndroid Build Coastguard Worker // Regular 16-bit input.
268*8f0ba417SAndroid Build Coastguard Worker {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
269*8f0ba417SAndroid Build Coastguard Worker // Test a non-BMP character.
270*8f0ba417SAndroid Build Coastguard Worker {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
271*8f0ba417SAndroid Build Coastguard Worker // Non-characters are passed through.
272*8f0ba417SAndroid Build Coastguard Worker {L"\xffffHello", "\xEF\xBF\xBFHello", true},
273*8f0ba417SAndroid Build Coastguard Worker {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
274*8f0ba417SAndroid Build Coastguard Worker // Invalid Unicode code points.
275*8f0ba417SAndroid Build Coastguard Worker {L"\xfffffffHello", "\xEF\xBF\xBDHello", false},
276*8f0ba417SAndroid Build Coastguard Worker // The first character is a truncated UTF-16 character.
277*8f0ba417SAndroid Build Coastguard Worker {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},
278*8f0ba417SAndroid Build Coastguard Worker {L"\xdc01Hello", "\xef\xbf\xbdHello", false},
279*8f0ba417SAndroid Build Coastguard Worker };
280*8f0ba417SAndroid Build Coastguard Worker
281*8f0ba417SAndroid Build Coastguard Worker for (size_t i = 0; i < arraysize(convert_cases); i++) {
282*8f0ba417SAndroid Build Coastguard Worker std::string converted;
283*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(convert_cases[i].success,
284*8f0ba417SAndroid Build Coastguard Worker WideToUTF8(convert_cases[i].utf32,
285*8f0ba417SAndroid Build Coastguard Worker wcslen(convert_cases[i].utf32),
286*8f0ba417SAndroid Build Coastguard Worker &converted));
287*8f0ba417SAndroid Build Coastguard Worker std::string expected(convert_cases[i].utf8);
288*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(expected, converted);
289*8f0ba417SAndroid Build Coastguard Worker }
290*8f0ba417SAndroid Build Coastguard Worker }
291*8f0ba417SAndroid Build Coastguard Worker #endif // defined(WCHAR_T_IS_UTF32)
292*8f0ba417SAndroid Build Coastguard Worker
293*8f0ba417SAndroid Build Coastguard Worker // The test below uses these types and functions, so just do enough to get the
294*8f0ba417SAndroid Build Coastguard Worker // test running.
295*8f0ba417SAndroid Build Coastguard Worker typedef wchar_t char16;
296*8f0ba417SAndroid Build Coastguard Worker typedef std::wstring string16;
297*8f0ba417SAndroid Build Coastguard Worker
298*8f0ba417SAndroid Build Coastguard Worker template<typename T>
WriteInto(T * t,size_t size)299*8f0ba417SAndroid Build Coastguard Worker static void* WriteInto(T* t, size_t size) {
300*8f0ba417SAndroid Build Coastguard Worker // std::(w)string::resize() already includes space for a NULL terminator.
301*8f0ba417SAndroid Build Coastguard Worker t->resize(size - 1);
302*8f0ba417SAndroid Build Coastguard Worker return &(*t)[0];
303*8f0ba417SAndroid Build Coastguard Worker }
304*8f0ba417SAndroid Build Coastguard Worker
305*8f0ba417SAndroid Build Coastguard Worker // A stub implementation that calls a helper from above, just to get the test
306*8f0ba417SAndroid Build Coastguard Worker // below working. This is just for testing and should not be moved to base
307*8f0ba417SAndroid Build Coastguard Worker // because this ignores errors which is probably not a good idea, plus it takes
308*8f0ba417SAndroid Build Coastguard Worker // a string16 type which we don't really have.
UTF16ToUTF8(const string16 & utf16)309*8f0ba417SAndroid Build Coastguard Worker static std::string UTF16ToUTF8(const string16& utf16) {
310*8f0ba417SAndroid Build Coastguard Worker return WideToUTF8(utf16);
311*8f0ba417SAndroid Build Coastguard Worker }
312*8f0ba417SAndroid Build Coastguard Worker
TEST(UTFStringConversionsTest,ConvertMultiString)313*8f0ba417SAndroid Build Coastguard Worker TEST(UTFStringConversionsTest, ConvertMultiString) {
314*8f0ba417SAndroid Build Coastguard Worker static char16 multi16[] = {
315*8f0ba417SAndroid Build Coastguard Worker 'f', 'o', 'o', '\0',
316*8f0ba417SAndroid Build Coastguard Worker 'b', 'a', 'r', '\0',
317*8f0ba417SAndroid Build Coastguard Worker 'b', 'a', 'z', '\0',
318*8f0ba417SAndroid Build Coastguard Worker '\0'
319*8f0ba417SAndroid Build Coastguard Worker };
320*8f0ba417SAndroid Build Coastguard Worker static char multi[] = {
321*8f0ba417SAndroid Build Coastguard Worker 'f', 'o', 'o', '\0',
322*8f0ba417SAndroid Build Coastguard Worker 'b', 'a', 'r', '\0',
323*8f0ba417SAndroid Build Coastguard Worker 'b', 'a', 'z', '\0',
324*8f0ba417SAndroid Build Coastguard Worker '\0'
325*8f0ba417SAndroid Build Coastguard Worker };
326*8f0ba417SAndroid Build Coastguard Worker string16 multistring16;
327*8f0ba417SAndroid Build Coastguard Worker memcpy(WriteInto(&multistring16, arraysize(multi16)), multi16,
328*8f0ba417SAndroid Build Coastguard Worker sizeof(multi16));
329*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(arraysize(multi16) - 1, multistring16.length());
330*8f0ba417SAndroid Build Coastguard Worker std::string expected;
331*8f0ba417SAndroid Build Coastguard Worker memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));
332*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(arraysize(multi) - 1, expected.length());
333*8f0ba417SAndroid Build Coastguard Worker const std::string& converted = UTF16ToUTF8(multistring16);
334*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(arraysize(multi) - 1, converted.length());
335*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(expected, converted);
336*8f0ba417SAndroid Build Coastguard Worker }
337*8f0ba417SAndroid Build Coastguard Worker
338*8f0ba417SAndroid Build Coastguard Worker // The tests below from sys_string_conversions_unittest.cc call SysWideToUTF8()
339*8f0ba417SAndroid Build Coastguard Worker // and SysUTF8ToWide(), so these are stub implementations that call the helpers
340*8f0ba417SAndroid Build Coastguard Worker // above. These are just for testing and should not be moved to base because
341*8f0ba417SAndroid Build Coastguard Worker // they ignore errors which is probably not a good idea.
342*8f0ba417SAndroid Build Coastguard Worker
SysWideToUTF8(const std::wstring & utf16)343*8f0ba417SAndroid Build Coastguard Worker static std::string SysWideToUTF8(const std::wstring& utf16) {
344*8f0ba417SAndroid Build Coastguard Worker return WideToUTF8(utf16);
345*8f0ba417SAndroid Build Coastguard Worker }
346*8f0ba417SAndroid Build Coastguard Worker
SysUTF8ToWide(const std::string & utf8)347*8f0ba417SAndroid Build Coastguard Worker static std::wstring SysUTF8ToWide(const std::string& utf8) {
348*8f0ba417SAndroid Build Coastguard Worker return UTF8ToWide(utf8);
349*8f0ba417SAndroid Build Coastguard Worker }
350*8f0ba417SAndroid Build Coastguard Worker
351*8f0ba417SAndroid Build Coastguard Worker // Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/sys_string_conversions_unittest.cc
352*8f0ba417SAndroid Build Coastguard Worker
353*8f0ba417SAndroid Build Coastguard Worker // Copyright (c) 2011 The Chromium Authors. All rights reserved.
354*8f0ba417SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
355*8f0ba417SAndroid Build Coastguard Worker // found in the LICENSE file.
356*8f0ba417SAndroid Build Coastguard Worker
357*8f0ba417SAndroid Build Coastguard Worker #ifdef WCHAR_T_IS_UTF32
358*8f0ba417SAndroid Build Coastguard Worker static const std::wstring kSysWideOldItalicLetterA = L"\x10300";
359*8f0ba417SAndroid Build Coastguard Worker #else
360*8f0ba417SAndroid Build Coastguard Worker static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00";
361*8f0ba417SAndroid Build Coastguard Worker #endif
362*8f0ba417SAndroid Build Coastguard Worker
TEST(SysStrings,SysWideToUTF8)363*8f0ba417SAndroid Build Coastguard Worker TEST(SysStrings, SysWideToUTF8) {
364*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world"));
365*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d"));
366*8f0ba417SAndroid Build Coastguard Worker
367*8f0ba417SAndroid Build Coastguard Worker // >16 bits
368*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA));
369*8f0ba417SAndroid Build Coastguard Worker
370*8f0ba417SAndroid Build Coastguard Worker // Error case. When Windows finds a UTF-16 character going off the end of
371*8f0ba417SAndroid Build Coastguard Worker // a string, it just converts that literal value to UTF-8, even though this
372*8f0ba417SAndroid Build Coastguard Worker // is invalid.
373*8f0ba417SAndroid Build Coastguard Worker //
374*8f0ba417SAndroid Build Coastguard Worker // This is what XP does, but Vista has different behavior, so we don't bother
375*8f0ba417SAndroid Build Coastguard Worker // verifying it:
376*8f0ba417SAndroid Build Coastguard Worker // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",
377*8f0ba417SAndroid Build Coastguard Worker // SysWideToUTF8(L"\x4f60\xd800zyxw"));
378*8f0ba417SAndroid Build Coastguard Worker
379*8f0ba417SAndroid Build Coastguard Worker // Test embedded NULLs.
380*8f0ba417SAndroid Build Coastguard Worker std::wstring wide_null(L"a");
381*8f0ba417SAndroid Build Coastguard Worker wide_null.push_back(0);
382*8f0ba417SAndroid Build Coastguard Worker wide_null.push_back('b');
383*8f0ba417SAndroid Build Coastguard Worker
384*8f0ba417SAndroid Build Coastguard Worker std::string expected_null("a");
385*8f0ba417SAndroid Build Coastguard Worker expected_null.push_back(0);
386*8f0ba417SAndroid Build Coastguard Worker expected_null.push_back('b');
387*8f0ba417SAndroid Build Coastguard Worker
388*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(expected_null, SysWideToUTF8(wide_null));
389*8f0ba417SAndroid Build Coastguard Worker }
390*8f0ba417SAndroid Build Coastguard Worker
TEST(SysStrings,SysUTF8ToWide)391*8f0ba417SAndroid Build Coastguard Worker TEST(SysStrings, SysUTF8ToWide) {
392*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world"));
393*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
394*8f0ba417SAndroid Build Coastguard Worker // >16 bits
395*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80"));
396*8f0ba417SAndroid Build Coastguard Worker
397*8f0ba417SAndroid Build Coastguard Worker // Error case. When Windows finds an invalid UTF-8 character, it just skips
398*8f0ba417SAndroid Build Coastguard Worker // it. This seems weird because it's inconsistent with the reverse conversion.
399*8f0ba417SAndroid Build Coastguard Worker //
400*8f0ba417SAndroid Build Coastguard Worker // This is what XP does, but Vista has different behavior, so we don't bother
401*8f0ba417SAndroid Build Coastguard Worker // verifying it:
402*8f0ba417SAndroid Build Coastguard Worker // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));
403*8f0ba417SAndroid Build Coastguard Worker
404*8f0ba417SAndroid Build Coastguard Worker // Test embedded NULLs.
405*8f0ba417SAndroid Build Coastguard Worker std::string utf8_null("a");
406*8f0ba417SAndroid Build Coastguard Worker utf8_null.push_back(0);
407*8f0ba417SAndroid Build Coastguard Worker utf8_null.push_back('b');
408*8f0ba417SAndroid Build Coastguard Worker
409*8f0ba417SAndroid Build Coastguard Worker std::wstring expected_null(L"a");
410*8f0ba417SAndroid Build Coastguard Worker expected_null.push_back(0);
411*8f0ba417SAndroid Build Coastguard Worker expected_null.push_back('b');
412*8f0ba417SAndroid Build Coastguard Worker
413*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null));
414*8f0ba417SAndroid Build Coastguard Worker }
415*8f0ba417SAndroid Build Coastguard Worker
TEST(UTF8PathToWindowsLongPathTest,DontAddPrefixIfShorterThanMaxPath)416*8f0ba417SAndroid Build Coastguard Worker TEST(UTF8PathToWindowsLongPathTest, DontAddPrefixIfShorterThanMaxPath) {
417*8f0ba417SAndroid Build Coastguard Worker std::string utf8 = "c:\\mypath\\myfile.txt";
418*8f0ba417SAndroid Build Coastguard Worker
419*8f0ba417SAndroid Build Coastguard Worker std::wstring wide;
420*8f0ba417SAndroid Build Coastguard Worker EXPECT_TRUE(UTF8PathToWindowsLongPath(utf8.c_str(), &wide));
421*8f0ba417SAndroid Build Coastguard Worker
422*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(std::string::npos, wide.find(LR"(\\?\)"));
423*8f0ba417SAndroid Build Coastguard Worker }
424*8f0ba417SAndroid Build Coastguard Worker
TEST(UTF8PathToWindowsLongPathTest,AddPrefixIfLongerThanMaxPath)425*8f0ba417SAndroid Build Coastguard Worker TEST(UTF8PathToWindowsLongPathTest, AddPrefixIfLongerThanMaxPath) {
426*8f0ba417SAndroid Build Coastguard Worker std::string utf8 = "c:\\mypath";
427*8f0ba417SAndroid Build Coastguard Worker while (utf8.length() < 300 /* MAX_PATH is 260 */) {
428*8f0ba417SAndroid Build Coastguard Worker utf8 += "\\mypathsegment";
429*8f0ba417SAndroid Build Coastguard Worker }
430*8f0ba417SAndroid Build Coastguard Worker
431*8f0ba417SAndroid Build Coastguard Worker std::wstring wide;
432*8f0ba417SAndroid Build Coastguard Worker EXPECT_TRUE(UTF8PathToWindowsLongPath(utf8.c_str(), &wide));
433*8f0ba417SAndroid Build Coastguard Worker
434*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(0U, wide.find(LR"(\\?\)"));
435*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(std::string::npos, wide.find(L"/"));
436*8f0ba417SAndroid Build Coastguard Worker }
437*8f0ba417SAndroid Build Coastguard Worker
TEST(UTF8PathToWindowsLongPathTest,AddPrefixAndFixSeparatorsIfLongerThanMaxPath)438*8f0ba417SAndroid Build Coastguard Worker TEST(UTF8PathToWindowsLongPathTest, AddPrefixAndFixSeparatorsIfLongerThanMaxPath) {
439*8f0ba417SAndroid Build Coastguard Worker std::string utf8 = "c:/mypath";
440*8f0ba417SAndroid Build Coastguard Worker while (utf8.length() < 300 /* MAX_PATH is 260 */) {
441*8f0ba417SAndroid Build Coastguard Worker utf8 += "/mypathsegment";
442*8f0ba417SAndroid Build Coastguard Worker }
443*8f0ba417SAndroid Build Coastguard Worker
444*8f0ba417SAndroid Build Coastguard Worker std::wstring wide;
445*8f0ba417SAndroid Build Coastguard Worker EXPECT_TRUE(UTF8PathToWindowsLongPath(utf8.c_str(), &wide));
446*8f0ba417SAndroid Build Coastguard Worker
447*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(0U, wide.find(LR"(\\?\)"));
448*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(std::string::npos, wide.find(L"/"));
449*8f0ba417SAndroid Build Coastguard Worker }
450*8f0ba417SAndroid Build Coastguard Worker
451*8f0ba417SAndroid Build Coastguard Worker namespace utf8 {
452*8f0ba417SAndroid Build Coastguard Worker
TEST(Utf8FilesTest,CanCreateOpenAndDeleteFileWithLongPath)453*8f0ba417SAndroid Build Coastguard Worker TEST(Utf8FilesTest, CanCreateOpenAndDeleteFileWithLongPath) {
454*8f0ba417SAndroid Build Coastguard Worker TemporaryDir td;
455*8f0ba417SAndroid Build Coastguard Worker
456*8f0ba417SAndroid Build Coastguard Worker // Create long directory path
457*8f0ba417SAndroid Build Coastguard Worker std::string utf8 = td.path;
458*8f0ba417SAndroid Build Coastguard Worker while (utf8.length() < 300 /* MAX_PATH is 260 */) {
459*8f0ba417SAndroid Build Coastguard Worker utf8 += "\\mypathsegment";
460*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(0, mkdir(utf8.c_str(), 0));
461*8f0ba417SAndroid Build Coastguard Worker }
462*8f0ba417SAndroid Build Coastguard Worker
463*8f0ba417SAndroid Build Coastguard Worker // Create file
464*8f0ba417SAndroid Build Coastguard Worker utf8 += "\\test-file.bin";
465*8f0ba417SAndroid Build Coastguard Worker int flags = O_WRONLY | O_CREAT | O_TRUNC | O_BINARY;
466*8f0ba417SAndroid Build Coastguard Worker int mode = 0666;
467*8f0ba417SAndroid Build Coastguard Worker android::base::unique_fd fd(open(utf8.c_str(), flags, mode));
468*8f0ba417SAndroid Build Coastguard Worker EXPECT_NE(-1, fd.get());
469*8f0ba417SAndroid Build Coastguard Worker
470*8f0ba417SAndroid Build Coastguard Worker // Close file
471*8f0ba417SAndroid Build Coastguard Worker fd.reset();
472*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(-1, fd.get());
473*8f0ba417SAndroid Build Coastguard Worker
474*8f0ba417SAndroid Build Coastguard Worker // Open file with fopen
475*8f0ba417SAndroid Build Coastguard Worker FILE* file = fopen(utf8.c_str(), "rb");
476*8f0ba417SAndroid Build Coastguard Worker EXPECT_NE(nullptr, file);
477*8f0ba417SAndroid Build Coastguard Worker
478*8f0ba417SAndroid Build Coastguard Worker if (file) {
479*8f0ba417SAndroid Build Coastguard Worker fclose(file);
480*8f0ba417SAndroid Build Coastguard Worker }
481*8f0ba417SAndroid Build Coastguard Worker
482*8f0ba417SAndroid Build Coastguard Worker // Delete file
483*8f0ba417SAndroid Build Coastguard Worker EXPECT_EQ(0, unlink(utf8.c_str()));
484*8f0ba417SAndroid Build Coastguard Worker }
485*8f0ba417SAndroid Build Coastguard Worker
486*8f0ba417SAndroid Build Coastguard Worker } // namespace utf8
487*8f0ba417SAndroid Build Coastguard Worker } // namespace base
488*8f0ba417SAndroid Build Coastguard Worker } // namespace android
489