1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker // Canonicalizers for random bits that aren't big enough for their own files.
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <string.h>
8*6777b538SAndroid Build Coastguard Worker
9*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
10*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h"
11*6777b538SAndroid Build Coastguard Worker
12*6777b538SAndroid Build Coastguard Worker namespace url {
13*6777b538SAndroid Build Coastguard Worker
14*6777b538SAndroid Build Coastguard Worker namespace {
15*6777b538SAndroid Build Coastguard Worker
16*6777b538SAndroid Build Coastguard Worker // Returns true if the given character should be removed from the middle of a
17*6777b538SAndroid Build Coastguard Worker // URL.
IsRemovableURLWhitespace(int ch)18*6777b538SAndroid Build Coastguard Worker inline bool IsRemovableURLWhitespace(int ch) {
19*6777b538SAndroid Build Coastguard Worker return ch == '\r' || ch == '\n' || ch == '\t';
20*6777b538SAndroid Build Coastguard Worker }
21*6777b538SAndroid Build Coastguard Worker
22*6777b538SAndroid Build Coastguard Worker // Backend for RemoveURLWhitespace (see declaration in url_canon.h).
23*6777b538SAndroid Build Coastguard Worker // It sucks that we have to do this, since this takes about 13% of the total URL
24*6777b538SAndroid Build Coastguard Worker // canonicalization time.
25*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
DoRemoveURLWhitespace(const CHAR * input,int input_len,CanonOutputT<CHAR> * buffer,int * output_len,bool * potentially_dangling_markup)26*6777b538SAndroid Build Coastguard Worker const CHAR* DoRemoveURLWhitespace(const CHAR* input,
27*6777b538SAndroid Build Coastguard Worker int input_len,
28*6777b538SAndroid Build Coastguard Worker CanonOutputT<CHAR>* buffer,
29*6777b538SAndroid Build Coastguard Worker int* output_len,
30*6777b538SAndroid Build Coastguard Worker bool* potentially_dangling_markup) {
31*6777b538SAndroid Build Coastguard Worker // Fast verification that there's nothing that needs removal. This is the 99%
32*6777b538SAndroid Build Coastguard Worker // case, so we want it to be fast and don't care about impacting the speed
33*6777b538SAndroid Build Coastguard Worker // when we do find whitespace.
34*6777b538SAndroid Build Coastguard Worker bool found_whitespace = false;
35*6777b538SAndroid Build Coastguard Worker if (sizeof(*input) == 1 && input_len >= kMinimumLengthForSIMD) {
36*6777b538SAndroid Build Coastguard Worker // For large strings, memchr is much faster than any scalar code we can
37*6777b538SAndroid Build Coastguard Worker // write, even if we need to run it three times. (If this turns out to still
38*6777b538SAndroid Build Coastguard Worker // be a bottleneck, we could write our own vector code, but given that
39*6777b538SAndroid Build Coastguard Worker // memchr is so fast, it's unlikely to be relevant.)
40*6777b538SAndroid Build Coastguard Worker found_whitespace = memchr(input, '\n', input_len) != nullptr ||
41*6777b538SAndroid Build Coastguard Worker memchr(input, '\r', input_len) != nullptr ||
42*6777b538SAndroid Build Coastguard Worker memchr(input, '\t', input_len) != nullptr;
43*6777b538SAndroid Build Coastguard Worker } else {
44*6777b538SAndroid Build Coastguard Worker for (int i = 0; i < input_len; i++) {
45*6777b538SAndroid Build Coastguard Worker if (!IsRemovableURLWhitespace(input[i]))
46*6777b538SAndroid Build Coastguard Worker continue;
47*6777b538SAndroid Build Coastguard Worker found_whitespace = true;
48*6777b538SAndroid Build Coastguard Worker break;
49*6777b538SAndroid Build Coastguard Worker }
50*6777b538SAndroid Build Coastguard Worker }
51*6777b538SAndroid Build Coastguard Worker
52*6777b538SAndroid Build Coastguard Worker if (!found_whitespace) {
53*6777b538SAndroid Build Coastguard Worker // Didn't find any whitespace, we don't need to do anything. We can just
54*6777b538SAndroid Build Coastguard Worker // return the input as the output.
55*6777b538SAndroid Build Coastguard Worker *output_len = input_len;
56*6777b538SAndroid Build Coastguard Worker return input;
57*6777b538SAndroid Build Coastguard Worker }
58*6777b538SAndroid Build Coastguard Worker
59*6777b538SAndroid Build Coastguard Worker // Skip whitespace removal for `data:` URLs.
60*6777b538SAndroid Build Coastguard Worker //
61*6777b538SAndroid Build Coastguard Worker // TODO(mkwst): Ideally, this would use something like `base::StartsWith`, but
62*6777b538SAndroid Build Coastguard Worker // that turns out to be difficult to do correctly given this function's
63*6777b538SAndroid Build Coastguard Worker // character type templating.
64*6777b538SAndroid Build Coastguard Worker if (input_len > 5 && input[0] == 'd' && input[1] == 'a' && input[2] == 't' &&
65*6777b538SAndroid Build Coastguard Worker input[3] == 'a' && input[4] == ':') {
66*6777b538SAndroid Build Coastguard Worker *output_len = input_len;
67*6777b538SAndroid Build Coastguard Worker return input;
68*6777b538SAndroid Build Coastguard Worker }
69*6777b538SAndroid Build Coastguard Worker
70*6777b538SAndroid Build Coastguard Worker // Remove the whitespace into the new buffer and return it.
71*6777b538SAndroid Build Coastguard Worker for (int i = 0; i < input_len; i++) {
72*6777b538SAndroid Build Coastguard Worker if (!IsRemovableURLWhitespace(input[i])) {
73*6777b538SAndroid Build Coastguard Worker if (potentially_dangling_markup && input[i] == 0x3C)
74*6777b538SAndroid Build Coastguard Worker *potentially_dangling_markup = true;
75*6777b538SAndroid Build Coastguard Worker buffer->push_back(input[i]);
76*6777b538SAndroid Build Coastguard Worker }
77*6777b538SAndroid Build Coastguard Worker }
78*6777b538SAndroid Build Coastguard Worker *output_len = buffer->length();
79*6777b538SAndroid Build Coastguard Worker return buffer->data();
80*6777b538SAndroid Build Coastguard Worker }
81*6777b538SAndroid Build Coastguard Worker
82*6777b538SAndroid Build Coastguard Worker // Contains the canonical version of each possible input letter in the scheme
83*6777b538SAndroid Build Coastguard Worker // (basically, lower-cased). The corresponding entry will be 0 if the letter
84*6777b538SAndroid Build Coastguard Worker // is not allowed in a scheme.
85*6777b538SAndroid Build Coastguard Worker // clang-format off
86*6777b538SAndroid Build Coastguard Worker const char kSchemeCanonical[0x80] = {
87*6777b538SAndroid Build Coastguard Worker // 00-1f: all are invalid
88*6777b538SAndroid Build Coastguard Worker 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89*6777b538SAndroid Build Coastguard Worker 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90*6777b538SAndroid Build Coastguard Worker // ' ' ! " # $ % & ' ( ) * + , - . /
91*6777b538SAndroid Build Coastguard Worker 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '+', 0, '-', '.', 0,
92*6777b538SAndroid Build Coastguard Worker // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
93*6777b538SAndroid Build Coastguard Worker '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0 , 0 , 0 , 0 , 0 , 0 ,
94*6777b538SAndroid Build Coastguard Worker // @ A B C D E F G H I J K L M N O
95*6777b538SAndroid Build Coastguard Worker 0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
96*6777b538SAndroid Build Coastguard Worker // P Q R S T U V W X Y Z [ \ ] ^ _
97*6777b538SAndroid Build Coastguard Worker 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0 , 0, 0 , 0,
98*6777b538SAndroid Build Coastguard Worker // ` a b c d e f g h i j k l m n o
99*6777b538SAndroid Build Coastguard Worker 0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
100*6777b538SAndroid Build Coastguard Worker // p q r s t u v w x y z { | } ~
101*6777b538SAndroid Build Coastguard Worker 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0 , 0 , 0 , 0 , 0 };
102*6777b538SAndroid Build Coastguard Worker // clang-format on
103*6777b538SAndroid Build Coastguard Worker
104*6777b538SAndroid Build Coastguard Worker // This could be a table lookup as well by setting the high bit for each
105*6777b538SAndroid Build Coastguard Worker // valid character, but it's only called once per URL, and it makes the lookup
106*6777b538SAndroid Build Coastguard Worker // table easier to read not having extra stuff in it.
IsSchemeFirstChar(unsigned char c)107*6777b538SAndroid Build Coastguard Worker inline bool IsSchemeFirstChar(unsigned char c) {
108*6777b538SAndroid Build Coastguard Worker return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
109*6777b538SAndroid Build Coastguard Worker }
110*6777b538SAndroid Build Coastguard Worker
111*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoScheme(const CHAR * spec,const Component & scheme,CanonOutput * output,Component * out_scheme)112*6777b538SAndroid Build Coastguard Worker bool DoScheme(const CHAR* spec,
113*6777b538SAndroid Build Coastguard Worker const Component& scheme,
114*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
115*6777b538SAndroid Build Coastguard Worker Component* out_scheme) {
116*6777b538SAndroid Build Coastguard Worker if (scheme.is_empty()) {
117*6777b538SAndroid Build Coastguard Worker // Scheme is unspecified or empty, convert to empty by appending a colon.
118*6777b538SAndroid Build Coastguard Worker *out_scheme = Component(output->length(), 0);
119*6777b538SAndroid Build Coastguard Worker output->push_back(':');
120*6777b538SAndroid Build Coastguard Worker return false;
121*6777b538SAndroid Build Coastguard Worker }
122*6777b538SAndroid Build Coastguard Worker
123*6777b538SAndroid Build Coastguard Worker // The output scheme starts from the current position.
124*6777b538SAndroid Build Coastguard Worker out_scheme->begin = output->length();
125*6777b538SAndroid Build Coastguard Worker
126*6777b538SAndroid Build Coastguard Worker // Danger: it's important that this code does not strip any characters;
127*6777b538SAndroid Build Coastguard Worker // it only emits the canonical version (be it valid or escaped) for each
128*6777b538SAndroid Build Coastguard Worker // of the input characters. Stripping would put it out of sync with
129*6777b538SAndroid Build Coastguard Worker // FindAndCompareScheme, which could cause some security checks on
130*6777b538SAndroid Build Coastguard Worker // schemes to be incorrect.
131*6777b538SAndroid Build Coastguard Worker bool success = true;
132*6777b538SAndroid Build Coastguard Worker size_t begin = static_cast<size_t>(scheme.begin);
133*6777b538SAndroid Build Coastguard Worker size_t end = static_cast<size_t>(scheme.end());
134*6777b538SAndroid Build Coastguard Worker for (size_t i = begin; i < end; i++) {
135*6777b538SAndroid Build Coastguard Worker UCHAR ch = static_cast<UCHAR>(spec[i]);
136*6777b538SAndroid Build Coastguard Worker char replacement = 0;
137*6777b538SAndroid Build Coastguard Worker if (ch < 0x80) {
138*6777b538SAndroid Build Coastguard Worker if (i == begin) {
139*6777b538SAndroid Build Coastguard Worker // Need to do a special check for the first letter of the scheme.
140*6777b538SAndroid Build Coastguard Worker if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
141*6777b538SAndroid Build Coastguard Worker replacement = kSchemeCanonical[ch];
142*6777b538SAndroid Build Coastguard Worker } else {
143*6777b538SAndroid Build Coastguard Worker replacement = kSchemeCanonical[ch];
144*6777b538SAndroid Build Coastguard Worker }
145*6777b538SAndroid Build Coastguard Worker }
146*6777b538SAndroid Build Coastguard Worker
147*6777b538SAndroid Build Coastguard Worker if (replacement) {
148*6777b538SAndroid Build Coastguard Worker output->push_back(replacement);
149*6777b538SAndroid Build Coastguard Worker } else if (ch == '%') {
150*6777b538SAndroid Build Coastguard Worker // Canonicalizing the scheme multiple times should lead to the same
151*6777b538SAndroid Build Coastguard Worker // result. Since invalid characters will be escaped, we need to preserve
152*6777b538SAndroid Build Coastguard Worker // the percent to avoid multiple escaping. The scheme will be invalid.
153*6777b538SAndroid Build Coastguard Worker success = false;
154*6777b538SAndroid Build Coastguard Worker output->push_back('%');
155*6777b538SAndroid Build Coastguard Worker } else {
156*6777b538SAndroid Build Coastguard Worker // Invalid character, store it but mark this scheme as invalid.
157*6777b538SAndroid Build Coastguard Worker success = false;
158*6777b538SAndroid Build Coastguard Worker
159*6777b538SAndroid Build Coastguard Worker // This will escape the output and also handle encoding issues.
160*6777b538SAndroid Build Coastguard Worker // Ignore the return value since we already failed.
161*6777b538SAndroid Build Coastguard Worker AppendUTF8EscapedChar(spec, &i, end, output);
162*6777b538SAndroid Build Coastguard Worker }
163*6777b538SAndroid Build Coastguard Worker }
164*6777b538SAndroid Build Coastguard Worker
165*6777b538SAndroid Build Coastguard Worker // The output scheme ends with the the current position, before appending
166*6777b538SAndroid Build Coastguard Worker // the colon.
167*6777b538SAndroid Build Coastguard Worker out_scheme->len = output->length() - out_scheme->begin;
168*6777b538SAndroid Build Coastguard Worker output->push_back(':');
169*6777b538SAndroid Build Coastguard Worker return success;
170*6777b538SAndroid Build Coastguard Worker }
171*6777b538SAndroid Build Coastguard Worker
172*6777b538SAndroid Build Coastguard Worker // The username and password components reference ranges in the corresponding
173*6777b538SAndroid Build Coastguard Worker // *_spec strings. Typically, these specs will be the same (we're
174*6777b538SAndroid Build Coastguard Worker // canonicalizing a single source string), but may be different when
175*6777b538SAndroid Build Coastguard Worker // replacing components.
176*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoUserInfo(const CHAR * username_spec,const Component & username,const CHAR * password_spec,const Component & password,CanonOutput * output,Component * out_username,Component * out_password)177*6777b538SAndroid Build Coastguard Worker bool DoUserInfo(const CHAR* username_spec,
178*6777b538SAndroid Build Coastguard Worker const Component& username,
179*6777b538SAndroid Build Coastguard Worker const CHAR* password_spec,
180*6777b538SAndroid Build Coastguard Worker const Component& password,
181*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
182*6777b538SAndroid Build Coastguard Worker Component* out_username,
183*6777b538SAndroid Build Coastguard Worker Component* out_password) {
184*6777b538SAndroid Build Coastguard Worker if (username.is_empty() && password.is_empty()) {
185*6777b538SAndroid Build Coastguard Worker // Common case: no user info. We strip empty username/passwords.
186*6777b538SAndroid Build Coastguard Worker *out_username = Component();
187*6777b538SAndroid Build Coastguard Worker *out_password = Component();
188*6777b538SAndroid Build Coastguard Worker return true;
189*6777b538SAndroid Build Coastguard Worker }
190*6777b538SAndroid Build Coastguard Worker
191*6777b538SAndroid Build Coastguard Worker // Write the username.
192*6777b538SAndroid Build Coastguard Worker out_username->begin = output->length();
193*6777b538SAndroid Build Coastguard Worker if (username.is_nonempty()) {
194*6777b538SAndroid Build Coastguard Worker // This will escape characters not valid for the username.
195*6777b538SAndroid Build Coastguard Worker AppendStringOfType(&username_spec[username.begin],
196*6777b538SAndroid Build Coastguard Worker static_cast<size_t>(username.len), CHAR_USERINFO,
197*6777b538SAndroid Build Coastguard Worker output);
198*6777b538SAndroid Build Coastguard Worker }
199*6777b538SAndroid Build Coastguard Worker out_username->len = output->length() - out_username->begin;
200*6777b538SAndroid Build Coastguard Worker
201*6777b538SAndroid Build Coastguard Worker // When there is a password, we need the separator. Note that we strip
202*6777b538SAndroid Build Coastguard Worker // empty but specified passwords.
203*6777b538SAndroid Build Coastguard Worker if (password.is_nonempty()) {
204*6777b538SAndroid Build Coastguard Worker output->push_back(':');
205*6777b538SAndroid Build Coastguard Worker out_password->begin = output->length();
206*6777b538SAndroid Build Coastguard Worker AppendStringOfType(&password_spec[password.begin],
207*6777b538SAndroid Build Coastguard Worker static_cast<size_t>(password.len), CHAR_USERINFO,
208*6777b538SAndroid Build Coastguard Worker output);
209*6777b538SAndroid Build Coastguard Worker out_password->len = output->length() - out_password->begin;
210*6777b538SAndroid Build Coastguard Worker } else {
211*6777b538SAndroid Build Coastguard Worker *out_password = Component();
212*6777b538SAndroid Build Coastguard Worker }
213*6777b538SAndroid Build Coastguard Worker
214*6777b538SAndroid Build Coastguard Worker output->push_back('@');
215*6777b538SAndroid Build Coastguard Worker return true;
216*6777b538SAndroid Build Coastguard Worker }
217*6777b538SAndroid Build Coastguard Worker
218*6777b538SAndroid Build Coastguard Worker // Helper functions for converting port integers to strings.
WritePortInt(char * output,int output_len,int port)219*6777b538SAndroid Build Coastguard Worker inline void WritePortInt(char* output, int output_len, int port) {
220*6777b538SAndroid Build Coastguard Worker _itoa_s(port, output, output_len, 10);
221*6777b538SAndroid Build Coastguard Worker }
222*6777b538SAndroid Build Coastguard Worker
223*6777b538SAndroid Build Coastguard Worker // This function will prepend the colon if there will be a port.
224*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoPort(const CHAR * spec,const Component & port,int default_port_for_scheme,CanonOutput * output,Component * out_port)225*6777b538SAndroid Build Coastguard Worker bool DoPort(const CHAR* spec,
226*6777b538SAndroid Build Coastguard Worker const Component& port,
227*6777b538SAndroid Build Coastguard Worker int default_port_for_scheme,
228*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
229*6777b538SAndroid Build Coastguard Worker Component* out_port) {
230*6777b538SAndroid Build Coastguard Worker int port_num = ParsePort(spec, port);
231*6777b538SAndroid Build Coastguard Worker if (port_num == PORT_UNSPECIFIED || port_num == default_port_for_scheme) {
232*6777b538SAndroid Build Coastguard Worker *out_port = Component();
233*6777b538SAndroid Build Coastguard Worker return true; // Leave port empty.
234*6777b538SAndroid Build Coastguard Worker }
235*6777b538SAndroid Build Coastguard Worker
236*6777b538SAndroid Build Coastguard Worker if (port_num == PORT_INVALID) {
237*6777b538SAndroid Build Coastguard Worker // Invalid port: We'll copy the text from the input so the user can see
238*6777b538SAndroid Build Coastguard Worker // what the error was, and mark the URL as invalid by returning false.
239*6777b538SAndroid Build Coastguard Worker output->push_back(':');
240*6777b538SAndroid Build Coastguard Worker out_port->begin = output->length();
241*6777b538SAndroid Build Coastguard Worker AppendInvalidNarrowString(spec, static_cast<size_t>(port.begin),
242*6777b538SAndroid Build Coastguard Worker static_cast<size_t>(port.end()), output);
243*6777b538SAndroid Build Coastguard Worker out_port->len = output->length() - out_port->begin;
244*6777b538SAndroid Build Coastguard Worker return false;
245*6777b538SAndroid Build Coastguard Worker }
246*6777b538SAndroid Build Coastguard Worker
247*6777b538SAndroid Build Coastguard Worker // Convert port number back to an integer. Max port value is 5 digits, and
248*6777b538SAndroid Build Coastguard Worker // the Parsed::ExtractPort will have made sure the integer is in range.
249*6777b538SAndroid Build Coastguard Worker const int buf_size = 6;
250*6777b538SAndroid Build Coastguard Worker char buf[buf_size];
251*6777b538SAndroid Build Coastguard Worker WritePortInt(buf, buf_size, port_num);
252*6777b538SAndroid Build Coastguard Worker
253*6777b538SAndroid Build Coastguard Worker // Append the port number to the output, preceded by a colon.
254*6777b538SAndroid Build Coastguard Worker output->push_back(':');
255*6777b538SAndroid Build Coastguard Worker out_port->begin = output->length();
256*6777b538SAndroid Build Coastguard Worker for (int i = 0; i < buf_size && buf[i]; i++)
257*6777b538SAndroid Build Coastguard Worker output->push_back(buf[i]);
258*6777b538SAndroid Build Coastguard Worker
259*6777b538SAndroid Build Coastguard Worker out_port->len = output->length() - out_port->begin;
260*6777b538SAndroid Build Coastguard Worker return true;
261*6777b538SAndroid Build Coastguard Worker }
262*6777b538SAndroid Build Coastguard Worker
263*6777b538SAndroid Build Coastguard Worker // clang-format off
264*6777b538SAndroid Build Coastguard Worker // Percent-escape all characters from the fragment percent-encode set
265*6777b538SAndroid Build Coastguard Worker // https://url.spec.whatwg.org/#fragment-percent-encode-set
266*6777b538SAndroid Build Coastguard Worker const bool kShouldEscapeCharInFragment[0x80] = {
267*6777b538SAndroid Build Coastguard Worker // Control characters (0x00-0x1F)
268*6777b538SAndroid Build Coastguard Worker true, true, true, true, true, true, true, true,
269*6777b538SAndroid Build Coastguard Worker true, true, true, true, true, true, true, true,
270*6777b538SAndroid Build Coastguard Worker true, true, true, true, true, true, true, true,
271*6777b538SAndroid Build Coastguard Worker true, true, true, true, true, true, true, true,
272*6777b538SAndroid Build Coastguard Worker // ' ' ! " # $ % & '
273*6777b538SAndroid Build Coastguard Worker true, false, true, false, false, false, false, false,
274*6777b538SAndroid Build Coastguard Worker // ( ) * + , - . /
275*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
276*6777b538SAndroid Build Coastguard Worker // 0 1 2 3 4 5 6 7
277*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
278*6777b538SAndroid Build Coastguard Worker // 8 9 : ; < = > ?
279*6777b538SAndroid Build Coastguard Worker false, false, false, false, true, false, true, false,
280*6777b538SAndroid Build Coastguard Worker // @ A B C D E F G
281*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
282*6777b538SAndroid Build Coastguard Worker // H I J K L M N O
283*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
284*6777b538SAndroid Build Coastguard Worker // P Q R S T U V W
285*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
286*6777b538SAndroid Build Coastguard Worker // X Y Z [ \ ] ^ _
287*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
288*6777b538SAndroid Build Coastguard Worker // ` a b c d e f g
289*6777b538SAndroid Build Coastguard Worker true, false, false, false, false, false, false, false,
290*6777b538SAndroid Build Coastguard Worker // h i j k l m n o
291*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
292*6777b538SAndroid Build Coastguard Worker // p q r s t u v w
293*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, false,
294*6777b538SAndroid Build Coastguard Worker // x y z { | } ~ DELETE
295*6777b538SAndroid Build Coastguard Worker false, false, false, false, false, false, false, true
296*6777b538SAndroid Build Coastguard Worker };
297*6777b538SAndroid Build Coastguard Worker // clang-format on
298*6777b538SAndroid Build Coastguard Worker
299*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoCanonicalizeRef(const CHAR * spec,const Component & ref,CanonOutput * output,Component * out_ref)300*6777b538SAndroid Build Coastguard Worker void DoCanonicalizeRef(const CHAR* spec,
301*6777b538SAndroid Build Coastguard Worker const Component& ref,
302*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
303*6777b538SAndroid Build Coastguard Worker Component* out_ref) {
304*6777b538SAndroid Build Coastguard Worker if (!ref.is_valid()) {
305*6777b538SAndroid Build Coastguard Worker // Common case of no ref.
306*6777b538SAndroid Build Coastguard Worker *out_ref = Component();
307*6777b538SAndroid Build Coastguard Worker return;
308*6777b538SAndroid Build Coastguard Worker }
309*6777b538SAndroid Build Coastguard Worker
310*6777b538SAndroid Build Coastguard Worker // Append the ref separator. Note that we need to do this even when the ref
311*6777b538SAndroid Build Coastguard Worker // is empty but present.
312*6777b538SAndroid Build Coastguard Worker output->push_back('#');
313*6777b538SAndroid Build Coastguard Worker out_ref->begin = output->length();
314*6777b538SAndroid Build Coastguard Worker
315*6777b538SAndroid Build Coastguard Worker // Now iterate through all the characters, converting to UTF-8 and validating.
316*6777b538SAndroid Build Coastguard Worker size_t end = static_cast<size_t>(ref.end());
317*6777b538SAndroid Build Coastguard Worker for (size_t i = static_cast<size_t>(ref.begin); i < end; i++) {
318*6777b538SAndroid Build Coastguard Worker UCHAR current_char = static_cast<UCHAR>(spec[i]);
319*6777b538SAndroid Build Coastguard Worker if (current_char < 0x80) {
320*6777b538SAndroid Build Coastguard Worker if (kShouldEscapeCharInFragment[current_char])
321*6777b538SAndroid Build Coastguard Worker AppendEscapedChar(static_cast<unsigned char>(spec[i]), output);
322*6777b538SAndroid Build Coastguard Worker else
323*6777b538SAndroid Build Coastguard Worker output->push_back(static_cast<char>(spec[i]));
324*6777b538SAndroid Build Coastguard Worker } else {
325*6777b538SAndroid Build Coastguard Worker AppendUTF8EscapedChar(spec, &i, end, output);
326*6777b538SAndroid Build Coastguard Worker }
327*6777b538SAndroid Build Coastguard Worker }
328*6777b538SAndroid Build Coastguard Worker
329*6777b538SAndroid Build Coastguard Worker out_ref->len = output->length() - out_ref->begin;
330*6777b538SAndroid Build Coastguard Worker }
331*6777b538SAndroid Build Coastguard Worker
332*6777b538SAndroid Build Coastguard Worker } // namespace
333*6777b538SAndroid Build Coastguard Worker
RemoveURLWhitespace(const char * input,int input_len,CanonOutputT<char> * buffer,int * output_len,bool * potentially_dangling_markup)334*6777b538SAndroid Build Coastguard Worker const char* RemoveURLWhitespace(const char* input,
335*6777b538SAndroid Build Coastguard Worker int input_len,
336*6777b538SAndroid Build Coastguard Worker CanonOutputT<char>* buffer,
337*6777b538SAndroid Build Coastguard Worker int* output_len,
338*6777b538SAndroid Build Coastguard Worker bool* potentially_dangling_markup) {
339*6777b538SAndroid Build Coastguard Worker return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
340*6777b538SAndroid Build Coastguard Worker potentially_dangling_markup);
341*6777b538SAndroid Build Coastguard Worker }
342*6777b538SAndroid Build Coastguard Worker
RemoveURLWhitespace(const char16_t * input,int input_len,CanonOutputT<char16_t> * buffer,int * output_len,bool * potentially_dangling_markup)343*6777b538SAndroid Build Coastguard Worker const char16_t* RemoveURLWhitespace(const char16_t* input,
344*6777b538SAndroid Build Coastguard Worker int input_len,
345*6777b538SAndroid Build Coastguard Worker CanonOutputT<char16_t>* buffer,
346*6777b538SAndroid Build Coastguard Worker int* output_len,
347*6777b538SAndroid Build Coastguard Worker bool* potentially_dangling_markup) {
348*6777b538SAndroid Build Coastguard Worker return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
349*6777b538SAndroid Build Coastguard Worker potentially_dangling_markup);
350*6777b538SAndroid Build Coastguard Worker }
351*6777b538SAndroid Build Coastguard Worker
CanonicalSchemeChar(char16_t ch)352*6777b538SAndroid Build Coastguard Worker char CanonicalSchemeChar(char16_t ch) {
353*6777b538SAndroid Build Coastguard Worker if (ch >= 0x80)
354*6777b538SAndroid Build Coastguard Worker return 0; // Non-ASCII is not supported by schemes.
355*6777b538SAndroid Build Coastguard Worker return kSchemeCanonical[ch];
356*6777b538SAndroid Build Coastguard Worker }
357*6777b538SAndroid Build Coastguard Worker
CanonicalizeScheme(const char * spec,const Component & scheme,CanonOutput * output,Component * out_scheme)358*6777b538SAndroid Build Coastguard Worker bool CanonicalizeScheme(const char* spec,
359*6777b538SAndroid Build Coastguard Worker const Component& scheme,
360*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
361*6777b538SAndroid Build Coastguard Worker Component* out_scheme) {
362*6777b538SAndroid Build Coastguard Worker return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme);
363*6777b538SAndroid Build Coastguard Worker }
364*6777b538SAndroid Build Coastguard Worker
CanonicalizeScheme(const char16_t * spec,const Component & scheme,CanonOutput * output,Component * out_scheme)365*6777b538SAndroid Build Coastguard Worker bool CanonicalizeScheme(const char16_t* spec,
366*6777b538SAndroid Build Coastguard Worker const Component& scheme,
367*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
368*6777b538SAndroid Build Coastguard Worker Component* out_scheme) {
369*6777b538SAndroid Build Coastguard Worker return DoScheme<char16_t, char16_t>(spec, scheme, output, out_scheme);
370*6777b538SAndroid Build Coastguard Worker }
371*6777b538SAndroid Build Coastguard Worker
CanonicalizeUserInfo(const char * username_source,const Component & username,const char * password_source,const Component & password,CanonOutput * output,Component * out_username,Component * out_password)372*6777b538SAndroid Build Coastguard Worker bool CanonicalizeUserInfo(const char* username_source,
373*6777b538SAndroid Build Coastguard Worker const Component& username,
374*6777b538SAndroid Build Coastguard Worker const char* password_source,
375*6777b538SAndroid Build Coastguard Worker const Component& password,
376*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
377*6777b538SAndroid Build Coastguard Worker Component* out_username,
378*6777b538SAndroid Build Coastguard Worker Component* out_password) {
379*6777b538SAndroid Build Coastguard Worker return DoUserInfo<char, unsigned char>(username_source, username,
380*6777b538SAndroid Build Coastguard Worker password_source, password, output,
381*6777b538SAndroid Build Coastguard Worker out_username, out_password);
382*6777b538SAndroid Build Coastguard Worker }
383*6777b538SAndroid Build Coastguard Worker
CanonicalizeUserInfo(const char16_t * username_source,const Component & username,const char16_t * password_source,const Component & password,CanonOutput * output,Component * out_username,Component * out_password)384*6777b538SAndroid Build Coastguard Worker bool CanonicalizeUserInfo(const char16_t* username_source,
385*6777b538SAndroid Build Coastguard Worker const Component& username,
386*6777b538SAndroid Build Coastguard Worker const char16_t* password_source,
387*6777b538SAndroid Build Coastguard Worker const Component& password,
388*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
389*6777b538SAndroid Build Coastguard Worker Component* out_username,
390*6777b538SAndroid Build Coastguard Worker Component* out_password) {
391*6777b538SAndroid Build Coastguard Worker return DoUserInfo<char16_t, char16_t>(username_source, username,
392*6777b538SAndroid Build Coastguard Worker password_source, password, output,
393*6777b538SAndroid Build Coastguard Worker out_username, out_password);
394*6777b538SAndroid Build Coastguard Worker }
395*6777b538SAndroid Build Coastguard Worker
CanonicalizePort(const char * spec,const Component & port,int default_port_for_scheme,CanonOutput * output,Component * out_port)396*6777b538SAndroid Build Coastguard Worker bool CanonicalizePort(const char* spec,
397*6777b538SAndroid Build Coastguard Worker const Component& port,
398*6777b538SAndroid Build Coastguard Worker int default_port_for_scheme,
399*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
400*6777b538SAndroid Build Coastguard Worker Component* out_port) {
401*6777b538SAndroid Build Coastguard Worker return DoPort<char, unsigned char>(spec, port, default_port_for_scheme,
402*6777b538SAndroid Build Coastguard Worker output, out_port);
403*6777b538SAndroid Build Coastguard Worker }
404*6777b538SAndroid Build Coastguard Worker
CanonicalizePort(const char16_t * spec,const Component & port,int default_port_for_scheme,CanonOutput * output,Component * out_port)405*6777b538SAndroid Build Coastguard Worker bool CanonicalizePort(const char16_t* spec,
406*6777b538SAndroid Build Coastguard Worker const Component& port,
407*6777b538SAndroid Build Coastguard Worker int default_port_for_scheme,
408*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
409*6777b538SAndroid Build Coastguard Worker Component* out_port) {
410*6777b538SAndroid Build Coastguard Worker return DoPort<char16_t, char16_t>(spec, port, default_port_for_scheme, output,
411*6777b538SAndroid Build Coastguard Worker out_port);
412*6777b538SAndroid Build Coastguard Worker }
413*6777b538SAndroid Build Coastguard Worker
CanonicalizeRef(const char * spec,const Component & ref,CanonOutput * output,Component * out_ref)414*6777b538SAndroid Build Coastguard Worker void CanonicalizeRef(const char* spec,
415*6777b538SAndroid Build Coastguard Worker const Component& ref,
416*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
417*6777b538SAndroid Build Coastguard Worker Component* out_ref) {
418*6777b538SAndroid Build Coastguard Worker DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref);
419*6777b538SAndroid Build Coastguard Worker }
420*6777b538SAndroid Build Coastguard Worker
CanonicalizeRef(const char16_t * spec,const Component & ref,CanonOutput * output,Component * out_ref)421*6777b538SAndroid Build Coastguard Worker void CanonicalizeRef(const char16_t* spec,
422*6777b538SAndroid Build Coastguard Worker const Component& ref,
423*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
424*6777b538SAndroid Build Coastguard Worker Component* out_ref) {
425*6777b538SAndroid Build Coastguard Worker DoCanonicalizeRef<char16_t, char16_t>(spec, ref, output, out_ref);
426*6777b538SAndroid Build Coastguard Worker }
427*6777b538SAndroid Build Coastguard Worker
428*6777b538SAndroid Build Coastguard Worker } // namespace url
429