1*6777b538SAndroid Build Coastguard Worker // Copyright 2014 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_icu.h"
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <stddef.h>
8*6777b538SAndroid Build Coastguard Worker
9*6777b538SAndroid Build Coastguard Worker #include "base/logging.h"
10*6777b538SAndroid Build Coastguard Worker #include "base/memory/raw_ptr.h"
11*6777b538SAndroid Build Coastguard Worker #include "testing/gtest/include/gtest/gtest.h"
12*6777b538SAndroid Build Coastguard Worker #include "third_party/icu/source/common/unicode/ucnv.h"
13*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
14*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_icu_test_helpers.h"
15*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_stdstring.h"
16*6777b538SAndroid Build Coastguard Worker #include "url/url_test_utils.h"
17*6777b538SAndroid Build Coastguard Worker
18*6777b538SAndroid Build Coastguard Worker namespace url {
19*6777b538SAndroid Build Coastguard Worker
20*6777b538SAndroid Build Coastguard Worker namespace {
21*6777b538SAndroid Build Coastguard Worker
TEST(URLCanonIcuTest,ICUCharsetConverter)22*6777b538SAndroid Build Coastguard Worker TEST(URLCanonIcuTest, ICUCharsetConverter) {
23*6777b538SAndroid Build Coastguard Worker struct ICUCase {
24*6777b538SAndroid Build Coastguard Worker const wchar_t* input;
25*6777b538SAndroid Build Coastguard Worker const char* encoding;
26*6777b538SAndroid Build Coastguard Worker const char* expected;
27*6777b538SAndroid Build Coastguard Worker } icu_cases[] = {
28*6777b538SAndroid Build Coastguard Worker // UTF-8.
29*6777b538SAndroid Build Coastguard Worker {L"Hello, world", "utf-8", "Hello, world"},
30*6777b538SAndroid Build Coastguard Worker {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
31*6777b538SAndroid Build Coastguard Worker // Non-BMP UTF-8.
32*6777b538SAndroid Build Coastguard Worker {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
33*6777b538SAndroid Build Coastguard Worker // Big5
34*6777b538SAndroid Build Coastguard Worker {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
35*6777b538SAndroid Build Coastguard Worker // Unrepresentable character in the destination set.
36*6777b538SAndroid Build Coastguard Worker {L"hello\x4f60\x06de\x597dworld", "big5",
37*6777b538SAndroid Build Coastguard Worker "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
38*6777b538SAndroid Build Coastguard Worker };
39*6777b538SAndroid Build Coastguard Worker
40*6777b538SAndroid Build Coastguard Worker for (size_t i = 0; i < std::size(icu_cases); i++) {
41*6777b538SAndroid Build Coastguard Worker test::UConvScoper conv(icu_cases[i].encoding);
42*6777b538SAndroid Build Coastguard Worker ASSERT_TRUE(conv.converter() != NULL);
43*6777b538SAndroid Build Coastguard Worker ICUCharsetConverter converter(conv.converter());
44*6777b538SAndroid Build Coastguard Worker
45*6777b538SAndroid Build Coastguard Worker std::string str;
46*6777b538SAndroid Build Coastguard Worker StdStringCanonOutput output(&str);
47*6777b538SAndroid Build Coastguard Worker
48*6777b538SAndroid Build Coastguard Worker std::u16string input_str(
49*6777b538SAndroid Build Coastguard Worker test_utils::TruncateWStringToUTF16(icu_cases[i].input));
50*6777b538SAndroid Build Coastguard Worker int input_len = static_cast<int>(input_str.length());
51*6777b538SAndroid Build Coastguard Worker converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
52*6777b538SAndroid Build Coastguard Worker output.Complete();
53*6777b538SAndroid Build Coastguard Worker
54*6777b538SAndroid Build Coastguard Worker EXPECT_STREQ(icu_cases[i].expected, str.c_str());
55*6777b538SAndroid Build Coastguard Worker }
56*6777b538SAndroid Build Coastguard Worker
57*6777b538SAndroid Build Coastguard Worker // Test string sizes around the resize boundary for the output to make sure
58*6777b538SAndroid Build Coastguard Worker // the converter resizes as needed.
59*6777b538SAndroid Build Coastguard Worker const int static_size = 16;
60*6777b538SAndroid Build Coastguard Worker test::UConvScoper conv("utf-8");
61*6777b538SAndroid Build Coastguard Worker ASSERT_TRUE(conv.converter());
62*6777b538SAndroid Build Coastguard Worker ICUCharsetConverter converter(conv.converter());
63*6777b538SAndroid Build Coastguard Worker for (int i = static_size - 2; i <= static_size + 2; i++) {
64*6777b538SAndroid Build Coastguard Worker // Make a string with the appropriate length.
65*6777b538SAndroid Build Coastguard Worker std::u16string input;
66*6777b538SAndroid Build Coastguard Worker for (int ch = 0; ch < i; ch++)
67*6777b538SAndroid Build Coastguard Worker input.push_back('a');
68*6777b538SAndroid Build Coastguard Worker
69*6777b538SAndroid Build Coastguard Worker RawCanonOutput<static_size> output;
70*6777b538SAndroid Build Coastguard Worker converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
71*6777b538SAndroid Build Coastguard Worker &output);
72*6777b538SAndroid Build Coastguard Worker EXPECT_EQ(input.length(), output.length());
73*6777b538SAndroid Build Coastguard Worker }
74*6777b538SAndroid Build Coastguard Worker }
75*6777b538SAndroid Build Coastguard Worker
TEST(URLCanonIcuTest,QueryWithConverter)76*6777b538SAndroid Build Coastguard Worker TEST(URLCanonIcuTest, QueryWithConverter) {
77*6777b538SAndroid Build Coastguard Worker struct QueryCase {
78*6777b538SAndroid Build Coastguard Worker const char* input8;
79*6777b538SAndroid Build Coastguard Worker const wchar_t* input16;
80*6777b538SAndroid Build Coastguard Worker const char* encoding;
81*6777b538SAndroid Build Coastguard Worker const char* expected;
82*6777b538SAndroid Build Coastguard Worker } query_cases[] = {
83*6777b538SAndroid Build Coastguard Worker // Regular ASCII case in some different encodings.
84*6777b538SAndroid Build Coastguard Worker {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
85*6777b538SAndroid Build Coastguard Worker {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
86*6777b538SAndroid Build Coastguard Worker {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
87*6777b538SAndroid Build Coastguard Worker // Chinese input/output
88*6777b538SAndroid Build Coastguard Worker {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312",
89*6777b538SAndroid Build Coastguard Worker "?q=%C4%E3%BA%C3"},
90*6777b538SAndroid Build Coastguard Worker {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
91*6777b538SAndroid Build Coastguard Worker // Unencodable character in the destination character set should be
92*6777b538SAndroid Build Coastguard Worker // escaped. The escape sequence unescapes to be the entity name:
93*6777b538SAndroid Build Coastguard Worker // "?q=你"
94*6777b538SAndroid Build Coastguard Worker {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1",
95*6777b538SAndroid Build Coastguard Worker "?q=Chinese%26%2365319%3B"},
96*6777b538SAndroid Build Coastguard Worker };
97*6777b538SAndroid Build Coastguard Worker
98*6777b538SAndroid Build Coastguard Worker for (size_t i = 0; i < std::size(query_cases); i++) {
99*6777b538SAndroid Build Coastguard Worker Component out_comp;
100*6777b538SAndroid Build Coastguard Worker
101*6777b538SAndroid Build Coastguard Worker test::UConvScoper conv(query_cases[i].encoding);
102*6777b538SAndroid Build Coastguard Worker ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
103*6777b538SAndroid Build Coastguard Worker ICUCharsetConverter converter(conv.converter());
104*6777b538SAndroid Build Coastguard Worker
105*6777b538SAndroid Build Coastguard Worker if (query_cases[i].input8) {
106*6777b538SAndroid Build Coastguard Worker int len = static_cast<int>(strlen(query_cases[i].input8));
107*6777b538SAndroid Build Coastguard Worker Component in_comp(0, len);
108*6777b538SAndroid Build Coastguard Worker std::string out_str;
109*6777b538SAndroid Build Coastguard Worker
110*6777b538SAndroid Build Coastguard Worker StdStringCanonOutput output(&out_str);
111*6777b538SAndroid Build Coastguard Worker CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output,
112*6777b538SAndroid Build Coastguard Worker &out_comp);
113*6777b538SAndroid Build Coastguard Worker output.Complete();
114*6777b538SAndroid Build Coastguard Worker
115*6777b538SAndroid Build Coastguard Worker EXPECT_EQ(query_cases[i].expected, out_str);
116*6777b538SAndroid Build Coastguard Worker }
117*6777b538SAndroid Build Coastguard Worker
118*6777b538SAndroid Build Coastguard Worker if (query_cases[i].input16) {
119*6777b538SAndroid Build Coastguard Worker std::u16string input16(
120*6777b538SAndroid Build Coastguard Worker test_utils::TruncateWStringToUTF16(query_cases[i].input16));
121*6777b538SAndroid Build Coastguard Worker int len = static_cast<int>(input16.length());
122*6777b538SAndroid Build Coastguard Worker Component in_comp(0, len);
123*6777b538SAndroid Build Coastguard Worker std::string out_str;
124*6777b538SAndroid Build Coastguard Worker
125*6777b538SAndroid Build Coastguard Worker StdStringCanonOutput output(&out_str);
126*6777b538SAndroid Build Coastguard Worker CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output,
127*6777b538SAndroid Build Coastguard Worker &out_comp);
128*6777b538SAndroid Build Coastguard Worker output.Complete();
129*6777b538SAndroid Build Coastguard Worker
130*6777b538SAndroid Build Coastguard Worker EXPECT_EQ(query_cases[i].expected, out_str);
131*6777b538SAndroid Build Coastguard Worker }
132*6777b538SAndroid Build Coastguard Worker }
133*6777b538SAndroid Build Coastguard Worker
134*6777b538SAndroid Build Coastguard Worker // Extra test for input with embedded NULL;
135*6777b538SAndroid Build Coastguard Worker std::string out_str;
136*6777b538SAndroid Build Coastguard Worker StdStringCanonOutput output(&out_str);
137*6777b538SAndroid Build Coastguard Worker Component out_comp;
138*6777b538SAndroid Build Coastguard Worker CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
139*6777b538SAndroid Build Coastguard Worker output.Complete();
140*6777b538SAndroid Build Coastguard Worker EXPECT_EQ("?a%20%00z%01", out_str);
141*6777b538SAndroid Build Coastguard Worker }
142*6777b538SAndroid Build Coastguard Worker
143*6777b538SAndroid Build Coastguard Worker } // namespace
144*6777b538SAndroid Build Coastguard Worker
145*6777b538SAndroid Build Coastguard Worker } // namespace url
146