1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker // ICU-based character set converter.
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <stdint.h>
8*6777b538SAndroid Build Coastguard Worker #include <stdlib.h>
9*6777b538SAndroid Build Coastguard Worker #include <string.h>
10*6777b538SAndroid Build Coastguard Worker
11*6777b538SAndroid Build Coastguard Worker #include "base/check.h"
12*6777b538SAndroid Build Coastguard Worker #include "base/memory/raw_ptr.h"
13*6777b538SAndroid Build Coastguard Worker #include "base/memory/raw_ptr_exclusion.h"
14*6777b538SAndroid Build Coastguard Worker #include "third_party/icu/source/common/unicode/ucnv.h"
15*6777b538SAndroid Build Coastguard Worker #include "third_party/icu/source/common/unicode/ucnv_cb.h"
16*6777b538SAndroid Build Coastguard Worker #include "third_party/icu/source/common/unicode/utypes.h"
17*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_icu.h"
18*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h" // for _itoa_s
19*6777b538SAndroid Build Coastguard Worker
20*6777b538SAndroid Build Coastguard Worker namespace url {
21*6777b538SAndroid Build Coastguard Worker
22*6777b538SAndroid Build Coastguard Worker namespace {
23*6777b538SAndroid Build Coastguard Worker
24*6777b538SAndroid Build Coastguard Worker // Called when converting a character that can not be represented, this will
25*6777b538SAndroid Build Coastguard Worker // append an escaped version of the numerical character reference for that code
26*6777b538SAndroid Build Coastguard Worker // point. It is of the form "Ӓ" and we will escape the non-digits to
27*6777b538SAndroid Build Coastguard Worker // "%26%231234%3B". Why? This is what Netscape did back in the olden days.
appendURLEscapedChar(const void * context,UConverterFromUnicodeArgs * from_args,const UChar * code_units,int32_t length,UChar32 code_point,UConverterCallbackReason reason,UErrorCode * err)28*6777b538SAndroid Build Coastguard Worker void appendURLEscapedChar(const void* context,
29*6777b538SAndroid Build Coastguard Worker UConverterFromUnicodeArgs* from_args,
30*6777b538SAndroid Build Coastguard Worker const UChar* code_units,
31*6777b538SAndroid Build Coastguard Worker int32_t length,
32*6777b538SAndroid Build Coastguard Worker UChar32 code_point,
33*6777b538SAndroid Build Coastguard Worker UConverterCallbackReason reason,
34*6777b538SAndroid Build Coastguard Worker UErrorCode* err) {
35*6777b538SAndroid Build Coastguard Worker if (reason == UCNV_UNASSIGNED) {
36*6777b538SAndroid Build Coastguard Worker *err = U_ZERO_ERROR;
37*6777b538SAndroid Build Coastguard Worker
38*6777b538SAndroid Build Coastguard Worker const static int prefix_len = 6;
39*6777b538SAndroid Build Coastguard Worker const static char prefix[prefix_len + 1] = "%26%23"; // "&#" percent-escaped
40*6777b538SAndroid Build Coastguard Worker ucnv_cbFromUWriteBytes(from_args, prefix, prefix_len, 0, err);
41*6777b538SAndroid Build Coastguard Worker
42*6777b538SAndroid Build Coastguard Worker DCHECK(code_point < 0x110000);
43*6777b538SAndroid Build Coastguard Worker char number[8]; // Max Unicode code point is 7 digits.
44*6777b538SAndroid Build Coastguard Worker _itoa_s(code_point, number, 10);
45*6777b538SAndroid Build Coastguard Worker int number_len = static_cast<int>(strlen(number));
46*6777b538SAndroid Build Coastguard Worker ucnv_cbFromUWriteBytes(from_args, number, number_len, 0, err);
47*6777b538SAndroid Build Coastguard Worker
48*6777b538SAndroid Build Coastguard Worker const static int postfix_len = 3;
49*6777b538SAndroid Build Coastguard Worker const static char postfix[postfix_len + 1] = "%3B"; // ";" percent-escaped
50*6777b538SAndroid Build Coastguard Worker ucnv_cbFromUWriteBytes(from_args, postfix, postfix_len, 0, err);
51*6777b538SAndroid Build Coastguard Worker }
52*6777b538SAndroid Build Coastguard Worker }
53*6777b538SAndroid Build Coastguard Worker
54*6777b538SAndroid Build Coastguard Worker // A class for scoping the installation of the invalid character callback.
55*6777b538SAndroid Build Coastguard Worker class AppendHandlerInstaller {
56*6777b538SAndroid Build Coastguard Worker public:
57*6777b538SAndroid Build Coastguard Worker // The owner of this object must ensure that the converter is alive for the
58*6777b538SAndroid Build Coastguard Worker // duration of this object's lifetime.
AppendHandlerInstaller(UConverter * converter)59*6777b538SAndroid Build Coastguard Worker AppendHandlerInstaller(UConverter* converter) : converter_(converter) {
60*6777b538SAndroid Build Coastguard Worker UErrorCode err = U_ZERO_ERROR;
61*6777b538SAndroid Build Coastguard Worker ucnv_setFromUCallBack(converter_, appendURLEscapedChar, 0,
62*6777b538SAndroid Build Coastguard Worker &old_callback_, &old_context_, &err);
63*6777b538SAndroid Build Coastguard Worker }
64*6777b538SAndroid Build Coastguard Worker
~AppendHandlerInstaller()65*6777b538SAndroid Build Coastguard Worker ~AppendHandlerInstaller() {
66*6777b538SAndroid Build Coastguard Worker UErrorCode err = U_ZERO_ERROR;
67*6777b538SAndroid Build Coastguard Worker ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err);
68*6777b538SAndroid Build Coastguard Worker }
69*6777b538SAndroid Build Coastguard Worker
70*6777b538SAndroid Build Coastguard Worker private:
71*6777b538SAndroid Build Coastguard Worker raw_ptr<UConverter> converter_;
72*6777b538SAndroid Build Coastguard Worker
73*6777b538SAndroid Build Coastguard Worker UConverterFromUCallback old_callback_;
74*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
75*6777b538SAndroid Build Coastguard Worker // #addr-of
76*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const void* old_context_;
77*6777b538SAndroid Build Coastguard Worker };
78*6777b538SAndroid Build Coastguard Worker
79*6777b538SAndroid Build Coastguard Worker } // namespace
80*6777b538SAndroid Build Coastguard Worker
ICUCharsetConverter(UConverter * converter)81*6777b538SAndroid Build Coastguard Worker ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)
82*6777b538SAndroid Build Coastguard Worker : converter_(converter) {
83*6777b538SAndroid Build Coastguard Worker }
84*6777b538SAndroid Build Coastguard Worker
85*6777b538SAndroid Build Coastguard Worker ICUCharsetConverter::~ICUCharsetConverter() = default;
86*6777b538SAndroid Build Coastguard Worker
ConvertFromUTF16(const char16_t * input,int input_len,CanonOutput * output)87*6777b538SAndroid Build Coastguard Worker void ICUCharsetConverter::ConvertFromUTF16(const char16_t* input,
88*6777b538SAndroid Build Coastguard Worker int input_len,
89*6777b538SAndroid Build Coastguard Worker CanonOutput* output) {
90*6777b538SAndroid Build Coastguard Worker // Install our error handler. It will be called for character that can not
91*6777b538SAndroid Build Coastguard Worker // be represented in the destination character set.
92*6777b538SAndroid Build Coastguard Worker AppendHandlerInstaller handler(converter_);
93*6777b538SAndroid Build Coastguard Worker
94*6777b538SAndroid Build Coastguard Worker int begin_offset = output->length();
95*6777b538SAndroid Build Coastguard Worker int dest_capacity = output->capacity() - begin_offset;
96*6777b538SAndroid Build Coastguard Worker output->set_length(output->length());
97*6777b538SAndroid Build Coastguard Worker
98*6777b538SAndroid Build Coastguard Worker do {
99*6777b538SAndroid Build Coastguard Worker UErrorCode err = U_ZERO_ERROR;
100*6777b538SAndroid Build Coastguard Worker char* dest = &output->data()[begin_offset];
101*6777b538SAndroid Build Coastguard Worker int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity,
102*6777b538SAndroid Build Coastguard Worker input, input_len, &err);
103*6777b538SAndroid Build Coastguard Worker if (err != U_BUFFER_OVERFLOW_ERROR) {
104*6777b538SAndroid Build Coastguard Worker output->set_length(begin_offset + required_capacity);
105*6777b538SAndroid Build Coastguard Worker return;
106*6777b538SAndroid Build Coastguard Worker }
107*6777b538SAndroid Build Coastguard Worker
108*6777b538SAndroid Build Coastguard Worker // Output didn't fit, expand
109*6777b538SAndroid Build Coastguard Worker dest_capacity = required_capacity;
110*6777b538SAndroid Build Coastguard Worker output->Resize(begin_offset + dest_capacity);
111*6777b538SAndroid Build Coastguard Worker } while (true);
112*6777b538SAndroid Build Coastguard Worker }
113*6777b538SAndroid Build Coastguard Worker
114*6777b538SAndroid Build Coastguard Worker } // namespace url
115