xref: /aosp_15_r20/external/cronet/url/url_canon_host.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/check.h"
6 #include "base/cpu_reduction_experiment.h"
7 #include "url/url_canon.h"
8 #include "url/url_canon_internal.h"
9 #include "url/url_features.h"
10 
11 namespace url {
12 
13 namespace {
14 
15 // This table lists the canonical version of all characters we allow in the
16 // input, with 0 indicating it is disallowed. We use the magic kEsc value to
17 // indicate that this character should be escaped. At present, ' ' (SPACE) and
18 // '*' (asterisk) are still non-compliant to the URL Standard. See
19 // https://crbug.com/1416013 for details.
20 const unsigned char kEsc = 0xff;
21 // clang-format off
22 const unsigned char kHostCharLookup[0x80] = {
23 // 00-1f: all are invalid
24      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
25      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
26 //  ' '   !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
27     kEsc,'!', '"',  0,  '$',  0,  '&', '\'','(', ')', kEsc, '+', ',', '-', '.',  0,
28 //   0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
29     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';' , 0,  '=',  0,   0,
30 //   @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
31      0,  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
32 //   P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
33     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[',  0,  ']',  0,  '_',
34 //   `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
35     '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
36 //   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
37     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{',  0, '}',  '~',  0 };
38 // clang-format on
39 
40 // https://url.spec.whatwg.org/#forbidden-host-code-point
41 const uint8_t kForbiddenHost = 0x1;
42 
43 // TODO(crbug.com/1416006): Merge other lookup tables into this table. That can
44 // be probably done after https://crbug.com/1416013 is resolved.
45 //
46 // This table is currently only used for an opaque-host in non-special URLs.
47 const uint8_t kHostCharacterTable[128] = {
48     kForbiddenHost,  // 0x00 (NUL)
49     0,               // 0x01
50     0,               // 0x02
51     0,               // 0x03
52     0,               // 0x04
53     0,               // 0x05
54     0,               // 0x06
55     0,               // 0x07
56     0,               // 0x08
57     kForbiddenHost,  // 0x09 (TAB)
58     kForbiddenHost,  // 0x0A (LF)
59     0,               // 0x0B
60     0,               // 0x0C
61     kForbiddenHost,  // 0x0D (CR)
62     0,               // 0x0E
63     0,               // 0x0F
64     0,               // 0x10
65     0,               // 0x11
66     0,               // 0x12
67     0,               // 0x13
68     0,               // 0x14
69     0,               // 0x15
70     0,               // 0x16
71     0,               // 0x17
72     0,               // 0x18
73     0,               // 0x19
74     0,               // 0x1A
75     0,               // 0x1B
76     0,               // 0x1C
77     0,               // 0x1D
78     0,               // 0x1E
79     0,               // 0x1F
80     kForbiddenHost,  // ' '
81     0,               // '!'
82     0,               // '"'
83     kForbiddenHost,  // '#'
84     0,               // '$'
85     0,               // '%'
86     0,               // '&'
87     0,               // '\''
88     0,               // '('
89     0,               // ')'
90     0,               // '*'
91     0,               // '+'
92     0,               // ','
93     0,               // '-'
94     0,               // '.'
95     kForbiddenHost,  // '/'
96     0,               // '0'
97     0,               // '1'
98     0,               // '2'
99     0,               // '3'
100     0,               // '4'
101     0,               // '5'
102     0,               // '6'
103     0,               // '7'
104     0,               // '8'
105     0,               // '9'
106     kForbiddenHost,  // ':'
107     0,               // ';'
108     kForbiddenHost,  // '<'
109     0,               // '='
110     kForbiddenHost,  // '>'
111     kForbiddenHost,  // '?'
112     kForbiddenHost,  // '@'
113     0,               // 'A'
114     0,               // 'B'
115     0,               // 'C'
116     0,               // 'D'
117     0,               // 'E'
118     0,               // 'F'
119     0,               // 'G'
120     0,               // 'H'
121     0,               // 'I'
122     0,               // 'J'
123     0,               // 'K'
124     0,               // 'L'
125     0,               // 'M'
126     0,               // 'N'
127     0,               // 'O'
128     0,               // 'P'
129     0,               // 'Q'
130     0,               // 'R'
131     0,               // 'S'
132     0,               // 'T'
133     0,               // 'U'
134     0,               // 'V'
135     0,               // 'W'
136     0,               // 'X'
137     0,               // 'Y'
138     0,               // 'Z'
139     kForbiddenHost,  // '['
140     kForbiddenHost,  // '\\'
141     kForbiddenHost,  // ']'
142     kForbiddenHost,  // '^'
143     0,               // '_'
144     0,               // '`'
145     0,               // 'a'
146     0,               // 'b'
147     0,               // 'c'
148     0,               // 'd'
149     0,               // 'e'
150     0,               // 'f'
151     0,               // 'g'
152     0,               // 'h'
153     0,               // 'i'
154     0,               // 'j'
155     0,               // 'k'
156     0,               // 'l'
157     0,               // 'm'
158     0,               // 'n'
159     0,               // 'o'
160     0,               // 'p'
161     0,               // 'q'
162     0,               // 'r'
163     0,               // 's'
164     0,               // 't'
165     0,               // 'u'
166     0,               // 'v'
167     0,               // 'w'
168     0,               // 'x'
169     0,               // 'y'
170     0,               // 'z'
171     0,               // '{'
172     kForbiddenHost,  // '|'
173     0,               // '}'
174     0,               // '~'
175     0,               // 0x7F (DEL)
176 };
177 // clang-format on
178 
IsForbiddenHostCodePoint(uint8_t ch)179 bool IsForbiddenHostCodePoint(uint8_t ch) {
180   return ch <= 0x7F && (kHostCharacterTable[ch] & kForbiddenHost);
181 }
182 
183 // RFC1034 maximum FQDN length.
184 constexpr size_t kMaxHostLength = 253;
185 
186 // Generous padding to account for the fact that UTS#46 normalization can cause
187 // a long string to actually shrink and fit within the 253 character RFC1034
188 // FQDN length limit. Note that this can still be too short for pathological
189 // cases: An arbitrary number of characters (e.g. U+00AD SOFT HYPHEN) can be
190 // removed from the input by UTS#46 processing. However, this should be
191 // sufficient for all normally-encountered, non-abusive hostname strings.
192 constexpr size_t kMaxHostBufferLength = kMaxHostLength * 5;
193 
194 constexpr size_t kTempHostBufferLen = 1024;
195 using StackBuffer = RawCanonOutputT<char, kTempHostBufferLen>;
196 using StackBufferW = RawCanonOutputT<char16_t, kTempHostBufferLen>;
197 
198 // Scans a host name and fills in the output flags according to what we find.
199 // |has_non_ascii| will be true if there are any non-7-bit characters, and
200 // |has_escaped| will be true if there is a percent sign.
201 template<typename CHAR, typename UCHAR>
ScanHostname(const CHAR * spec,const Component & host,bool * has_non_ascii,bool * has_escaped)202 void ScanHostname(const CHAR* spec,
203                   const Component& host,
204                   bool* has_non_ascii,
205                   bool* has_escaped) {
206   int end = host.end();
207   *has_non_ascii = false;
208   *has_escaped = false;
209   for (int i = host.begin; i < end; i++) {
210     if (static_cast<UCHAR>(spec[i]) >= 0x80)
211       *has_non_ascii = true;
212     else if (spec[i] == '%')
213       *has_escaped = true;
214   }
215 }
216 
217 // Canonicalizes a host name that is entirely 8-bit characters (even though
218 // the type holding them may be 16 bits. Escaped characters will be unescaped.
219 // Non-7-bit characters (for example, UTF-8) will be passed unchanged.
220 //
221 // The |*has_non_ascii| flag will be true if there are non-7-bit characters in
222 // the output.
223 //
224 // This function is used in two situations:
225 //
226 //  * When the caller knows there is no non-ASCII or percent escaped
227 //    characters. This is what DoHost does. The result will be a completely
228 //    canonicalized host since we know nothing weird can happen (escaped
229 //    characters could be unescaped to non-7-bit, so they have to be treated
230 //    with suspicion at this point). It does not use the |has_non_ascii| flag.
231 //
232 //  * When the caller has an 8-bit string that may need unescaping.
233 //    DoComplexHost calls us this situation to do unescaping and validation.
234 //    After this, it may do other IDN operations depending on the value of the
235 //    |*has_non_ascii| flag.
236 //
237 // The return value indicates if the output is a potentially valid host name.
238 template <typename INCHAR, typename OUTCHAR>
DoSimpleHost(const INCHAR * host,size_t host_len,CanonOutputT<OUTCHAR> * output,bool * has_non_ascii)239 bool DoSimpleHost(const INCHAR* host,
240                   size_t host_len,
241                   CanonOutputT<OUTCHAR>* output,
242                   bool* has_non_ascii) {
243   *has_non_ascii = false;
244 
245   bool success = true;
246   for (size_t i = 0; i < host_len; ++i) {
247     unsigned int source = host[i];
248     if (source == '%') {
249       // Unescape first, if possible.
250       // Source will be used only if decode operation was successful.
251       if (!DecodeEscaped(host, &i, host_len,
252                          reinterpret_cast<unsigned char*>(&source))) {
253         // Invalid escaped character. There is nothing that can make this
254         // host valid. We append an escaped percent so the URL looks reasonable
255         // and mark as failed.
256         AppendEscapedChar('%', output);
257         success = false;
258         continue;
259       }
260     }
261 
262     if (source < 0x80) {
263       // We have ASCII input, we can use our lookup table.
264       unsigned char replacement = kHostCharLookup[source];
265       if (!replacement) {
266         // Invalid character, add it as percent-escaped and mark as failed.
267         AppendEscapedChar(source, output);
268         success = false;
269       } else if (replacement == kEsc) {
270         // This character is valid but should be escaped.
271         AppendEscapedChar(source, output);
272       } else {
273         // Common case, the given character is valid in a hostname, the lookup
274         // table tells us the canonical representation of that character (lower
275         // cased).
276         output->push_back(replacement);
277       }
278     } else {
279       // It's a non-ascii char. Just push it to the output.
280       // In case where we have char16 input, and char output it's safe to
281       // cast char16->char only if input string was converted to ASCII.
282       output->push_back(static_cast<OUTCHAR>(source));
283       *has_non_ascii = true;
284     }
285   }
286   return success;
287 }
288 
289 // Canonicalizes a host that requires IDN conversion. Returns true on success
DoIDNHost(const char16_t * src,size_t src_len,CanonOutput * output)290 bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
291   int original_output_len = output->length();  // So we can rewind below.
292 
293   // We need to escape URL before doing IDN conversion, since punicode strings
294   // cannot be escaped after they are created.
295   RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
296   bool has_non_ascii;
297   DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii);
298   if (url_escaped_host.length() > kMaxHostBufferLength) {
299     AppendInvalidNarrowString(src, 0, src_len, output);
300     return false;
301   }
302 
303   StackBufferW wide_output;
304   if (!IDNToASCII(url_escaped_host.view(), &wide_output)) {
305     // Some error, give up. This will write some reasonable looking
306     // representation of the string to the output.
307     AppendInvalidNarrowString(src, 0, src_len, output);
308     return false;
309   }
310 
311   // Now we check the ASCII output like a normal host. It will also handle
312   // unescaping. Although we unescaped everything before this function call, if
313   // somebody does %00 as fullwidth, ICU will convert this to ASCII.
314   bool success = DoSimpleHost(wide_output.data(), wide_output.length(), output,
315                               &has_non_ascii);
316   if (has_non_ascii) {
317     // ICU generated something that DoSimpleHost didn't think looked like
318     // ASCII. This is quite rare, but ICU might convert some characters to
319     // percent signs which might generate new escape sequences which might in
320     // turn be invalid. An example is U+FE6A "small percent" which ICU will
321     // name prep into an ASCII percent and then we can interpret the following
322     // characters as escaped characters.
323     //
324     // If DoSimpleHost didn't think the output was ASCII, just escape the
325     // thing we gave ICU and give up. DoSimpleHost will have handled a further
326     // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates
327     // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't
328     // do more (like handle escaped non-ASCII sequences). Handling the escaped
329     // ASCII isn't strictly necessary, but DoSimpleHost handles this case
330     // anyway so we handle it/
331     output->set_length(original_output_len);
332     AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
333                               output);
334     return false;
335   }
336   return success;
337 }
338 
339 // 8-bit convert host to its ASCII version: this converts the UTF-8 input to
340 // UTF-16. The has_escaped flag should be set if the input string requires
341 // unescaping.
DoComplexHost(const char * host,size_t host_len,bool has_non_ascii,bool has_escaped,CanonOutput * output)342 bool DoComplexHost(const char* host,
343                    size_t host_len,
344                    bool has_non_ascii,
345                    bool has_escaped,
346                    CanonOutput* output) {
347   // Save the current position in the output. We may write stuff and rewind it
348   // below, so we need to know where to rewind to.
349   size_t begin_length = output->length();
350 
351   // Points to the UTF-8 data we want to convert. This will either be the
352   // input or the unescaped version written to |*output| if necessary.
353   const char* utf8_source;
354   size_t utf8_source_len;
355   bool are_all_escaped_valid = true;
356   if (has_escaped) {
357     // Unescape before converting to UTF-16 for IDN. We write this into the
358     // output because it most likely does not require IDNization, and we can
359     // save another huge stack buffer. It will be replaced below if it requires
360     // IDN. This will also update our non-ASCII flag so we know whether the
361     // unescaped input requires IDN.
362     if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) {
363       // Error with some escape sequence. We'll call the current output
364       // complete. DoSimpleHost will have written some "reasonable" output
365       // for the invalid escapes, but the output could be non-ASCII and
366       // needs to go through re-encoding below.
367       are_all_escaped_valid = false;
368     }
369 
370     // Unescaping may have left us with ASCII input, in which case the
371     // unescaped version we wrote to output is complete.
372     if (!has_non_ascii) {
373       return are_all_escaped_valid;
374     }
375 
376     // Save the pointer into the data was just converted (it may be appended to
377     // other data in the output buffer).
378     utf8_source = &output->data()[begin_length];
379     utf8_source_len = output->length() - begin_length;
380   } else {
381     // We don't need to unescape, use input for IDNization later. (We know the
382     // input has non-ASCII, or the simple version would have been called
383     // instead of us.)
384     utf8_source = host;
385     utf8_source_len = host_len;
386   }
387 
388   // Non-ASCII input requires IDN, convert to UTF-16 and do the IDN conversion.
389   // Above, we may have used the output to write the unescaped values to, so
390   // we have to rewind it to where we started after we convert it to UTF-16.
391   StackBufferW utf16;
392   if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
393     // In this error case, the input may or may not be the output.
394     StackBuffer utf8;
395     for (size_t i = 0; i < utf8_source_len; i++)
396       utf8.push_back(utf8_source[i]);
397     output->set_length(begin_length);
398     AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
399     return false;
400   }
401   output->set_length(begin_length);
402 
403   // This will call DoSimpleHost which will do normal ASCII canonicalization
404   // and also check for IP addresses in the outpt.
405   return DoIDNHost(utf16.data(), utf16.length(), output) &&
406          are_all_escaped_valid;
407 }
408 
409 // UTF-16 convert host to its ASCII version. The set up is already ready for
410 // the backend, so we just pass through. The has_escaped flag should be set if
411 // the input string requires unescaping.
DoComplexHost(const char16_t * host,size_t host_len,bool has_non_ascii,bool has_escaped,CanonOutput * output)412 bool DoComplexHost(const char16_t* host,
413                    size_t host_len,
414                    bool has_non_ascii,
415                    bool has_escaped,
416                    CanonOutput* output) {
417   if (has_escaped) {
418     // Yikes, we have escaped characters with wide input. The escaped
419     // characters should be interpreted as UTF-8. To solve this problem,
420     // we convert to UTF-8, unescape, then convert back to UTF-16 for IDN.
421     //
422     // We don't bother to optimize the conversion in the ASCII case (which
423     // *could* just be a copy) and use the UTF-8 path, because it should be
424     // very rare that host names have escaped characters, and it is relatively
425     // fast to do the conversion anyway.
426     StackBuffer utf8;
427     if (!ConvertUTF16ToUTF8(host, host_len, &utf8)) {
428       AppendInvalidNarrowString(host, 0, host_len, output);
429       return false;
430     }
431 
432     // Once we convert to UTF-8, we can use the 8-bit version of the complex
433     // host handling code above.
434     return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii, has_escaped,
435                          output);
436   }
437 
438   // No unescaping necessary, we can safely pass the input to ICU. This
439   // function will only get called if we either have escaped or non-ascii
440   // input, so it's safe to just use ICU now. Even if the input is ASCII,
441   // this function will do the right thing (just slower than we could).
442   return DoIDNHost(host, host_len, output);
443 }
444 
445 template <typename CHAR, typename UCHAR>
DoHostSubstring(const CHAR * spec,const Component & host,CanonOutput * output)446 bool DoHostSubstring(const CHAR* spec,
447                      const Component& host,
448                      CanonOutput* output) {
449   DCHECK(host.is_valid());
450 
451   bool has_non_ascii, has_escaped;
452   ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
453 
454   if (has_non_ascii || has_escaped) {
455     return DoComplexHost(&spec[host.begin], static_cast<size_t>(host.len),
456                          has_non_ascii, has_escaped, output);
457   }
458 
459   const bool success = DoSimpleHost(
460       &spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
461   DCHECK(!has_non_ascii);
462   return success;
463 }
464 
465 template <typename CharT>
DoOpaqueHost(const std::basic_string_view<CharT> host,CanonOutput & output)466 bool DoOpaqueHost(const std::basic_string_view<CharT> host,
467                   CanonOutput& output) {
468   // URL Standard: https://url.spec.whatwg.org/#concept-opaque-host-parser
469 
470   size_t host_len = host.size();
471 
472   for (size_t i = 0; i < host_len; ++i) {
473     char16_t ch = host[i];
474     // The characters '[', ':', and ']', are checked later in
475     // `CanonicalizeIPv6Address` function.
476     if (ch != '[' && ch != ']' && ch != ':' && IsForbiddenHostCodePoint(ch)) {
477       return false;
478     }
479 
480     // Implementation note:
481     //
482     // URL Standard: Step 3 in
483     // https://url.spec.whatwg.org/#concept-opaque-host-parser
484     //
485     // > 3. If input contains a U+0025 (%) and the two code points following
486     // > it are not ASCII hex digits, invalid-URL-unit validation error.
487     //
488     // `invalid-URL-unit` is NOT marked as failure. We don't need to consider
489     // step 3 here.
490 
491     // URL Standard: Step 4 in
492     // https://url.spec.whatwg.org/#concept-opaque-host-parser
493     //
494     // > 4. Return the result of running UTF-8 percent-encode on input using
495     // > the C0 control percent-encode set.
496     if (IsInC0ControlPercentEncodeSet(ch)) {
497       AppendUTF8EscapedChar(host.data(), &i, host_len, &output);
498     } else {
499       output.push_back(ch);
500     }
501   }
502   return true;
503 }
504 
505 template <typename CHAR, typename UCHAR, CanonMode canon_mode>
DoHost(const CHAR * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)506 void DoHost(const CHAR* spec,
507             const Component& host,
508             CanonOutput& output,
509             CanonHostInfo& host_info) {
510   // URL Standard: https://url.spec.whatwg.org/#host-parsing
511 
512   // Keep track of output's initial length, so we can rewind later.
513   const int output_begin = output.length();
514 
515   if (host.is_empty()) {
516     // Empty hosts don't need anything.
517     host_info.family = CanonHostInfo::NEUTRAL;
518     // Carry over the valid empty host for non-special URLs.
519     //
520     // Component(0, 0) should be considered invalid here for historical reasons.
521     //
522     // TODO(crbug.com/1416006): Update the callers so that they don't pass
523     // Component(0, 0) as an invalid `host`.
524     if (host.begin != 0 && host.len == 0) {
525       host_info.out_host = Component(output_begin, 0);
526     } else {
527       host_info.out_host = Component();
528     }
529     return;
530   }
531 
532   bool success;
533   if constexpr (canon_mode == CanonMode::kSpecialURL) {
534     success = DoHostSubstring<CHAR, UCHAR>(spec, host, &output);
535   } else {
536     // URL Standard: https://url.spec.whatwg.org/#concept-opaque-host-parser
537     success = DoOpaqueHost(host.as_string_view_on(spec), output);
538   }
539 
540   if (success) {
541     // After all the other canonicalization, check if we ended up with an IP
542     // address. IP addresses are small, so writing into this temporary buffer
543     // should not cause an allocation.
544     RawCanonOutput<64> canon_ip;
545 
546     if constexpr (canon_mode == CanonMode::kSpecialURL) {
547       CanonicalizeIPAddress(output.data(),
548                             MakeRange(output_begin, output.length()), &canon_ip,
549                             &host_info);
550     } else {
551       // Non-special URLs support only IPv6.
552       CanonicalizeIPv6Address(output.data(),
553                               MakeRange(output_begin, output.length()),
554                               canon_ip, host_info);
555     }
556 
557     // If we got an IPv4/IPv6 address, copy the canonical form back to the
558     // real buffer. Otherwise, it's a hostname or broken IP, in which case
559     // we just leave it in place.
560     if (host_info.IsIPAddress()) {
561       output.set_length(output_begin);
562       output.Append(canon_ip.view());
563     }
564   } else {
565     // Canonicalization failed. Set BROKEN to notify the caller.
566     host_info.family = CanonHostInfo::BROKEN;
567   }
568   host_info.out_host = MakeRange(output_begin, output.length());
569 }
570 
571 }  // namespace
572 
CanonicalizeHost(const char * spec,const Component & host,CanonOutput * output,Component * out_host)573 bool CanonicalizeHost(const char* spec,
574                       const Component& host,
575                       CanonOutput* output,
576                       Component* out_host) {
577   DCHECK(output);
578   DCHECK(out_host);
579   return CanonicalizeSpecialHost(spec, host, *output, *out_host);
580 }
581 
CanonicalizeHost(const char16_t * spec,const Component & host,CanonOutput * output,Component * out_host)582 bool CanonicalizeHost(const char16_t* spec,
583                       const Component& host,
584                       CanonOutput* output,
585                       Component* out_host) {
586   DCHECK(output);
587   DCHECK(out_host);
588   return CanonicalizeSpecialHost(spec, host, *output, *out_host);
589 }
590 
CanonicalizeSpecialHost(const char * spec,const Component & host,CanonOutput & output,Component & out_host)591 bool CanonicalizeSpecialHost(const char* spec,
592                              const Component& host,
593                              CanonOutput& output,
594                              Component& out_host) {
595   CanonHostInfo host_info;
596   DoHost<char, unsigned char, CanonMode::kSpecialURL>(spec, host, output,
597                                                       host_info);
598   out_host = host_info.out_host;
599   return (host_info.family != CanonHostInfo::BROKEN);
600 }
601 
CanonicalizeSpecialHost(const char16_t * spec,const Component & host,CanonOutput & output,Component & out_host)602 bool CanonicalizeSpecialHost(const char16_t* spec,
603                              const Component& host,
604                              CanonOutput& output,
605                              Component& out_host) {
606   CanonHostInfo host_info;
607   DoHost<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host, output,
608                                                      host_info);
609   out_host = host_info.out_host;
610   return (host_info.family != CanonHostInfo::BROKEN);
611 }
612 
CanonicalizeNonSpecialHost(const char * spec,const Component & host,CanonOutput & output,Component & out_host)613 bool CanonicalizeNonSpecialHost(const char* spec,
614                                 const Component& host,
615                                 CanonOutput& output,
616                                 Component& out_host) {
617   CanonHostInfo host_info;
618   DoHost<char, unsigned char, CanonMode::kNonSpecialURL>(spec, host, output,
619                                                          host_info);
620   out_host = host_info.out_host;
621   return (host_info.family != CanonHostInfo::BROKEN);
622 }
623 
CanonicalizeNonSpecialHost(const char16_t * spec,const Component & host,CanonOutput & output,Component & out_host)624 bool CanonicalizeNonSpecialHost(const char16_t* spec,
625                                 const Component& host,
626                                 CanonOutput& output,
627                                 Component& out_host) {
628   CanonHostInfo host_info;
629   DoHost<char16_t, char16_t, CanonMode::kNonSpecialURL>(spec, host, output,
630                                                         host_info);
631   out_host = host_info.out_host;
632   return (host_info.family != CanonHostInfo::BROKEN);
633 }
634 
CanonicalizeHostVerbose(const char * spec,const Component & host,CanonOutput * output,CanonHostInfo * host_info)635 void CanonicalizeHostVerbose(const char* spec,
636                              const Component& host,
637                              CanonOutput* output,
638                              CanonHostInfo* host_info) {
639   DCHECK(output);
640   DCHECK(host_info);
641   CanonicalizeSpecialHostVerbose(spec, host, *output, *host_info);
642 }
643 
CanonicalizeHostVerbose(const char16_t * spec,const Component & host,CanonOutput * output,CanonHostInfo * host_info)644 void CanonicalizeHostVerbose(const char16_t* spec,
645                              const Component& host,
646                              CanonOutput* output,
647                              CanonHostInfo* host_info) {
648   DCHECK(output);
649   DCHECK(host_info);
650   CanonicalizeSpecialHostVerbose(spec, host, *output, *host_info);
651 }
652 
CanonicalizeSpecialHostVerbose(const char * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)653 void CanonicalizeSpecialHostVerbose(const char* spec,
654                                     const Component& host,
655                                     CanonOutput& output,
656                                     CanonHostInfo& host_info) {
657   DoHost<char, unsigned char, CanonMode::kSpecialURL>(spec, host, output,
658                                                       host_info);
659 }
660 
CanonicalizeSpecialHostVerbose(const char16_t * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)661 void CanonicalizeSpecialHostVerbose(const char16_t* spec,
662                                     const Component& host,
663                                     CanonOutput& output,
664                                     CanonHostInfo& host_info) {
665   DoHost<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host, output,
666                                                      host_info);
667 }
668 
CanonicalizeHostSubstring(const char * spec,const Component & host,CanonOutput * output)669 bool CanonicalizeHostSubstring(const char* spec,
670                                const Component& host,
671                                CanonOutput* output) {
672   return DoHostSubstring<char, unsigned char>(spec, host, output);
673 }
674 
CanonicalizeHostSubstring(const char16_t * spec,const Component & host,CanonOutput * output)675 bool CanonicalizeHostSubstring(const char16_t* spec,
676                                const Component& host,
677                                CanonOutput* output) {
678   return DoHostSubstring<char16_t, char16_t>(spec, host, output);
679 }
680 
CanonicalizeNonSpecialHostVerbose(const char * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)681 void CanonicalizeNonSpecialHostVerbose(const char* spec,
682                                        const Component& host,
683                                        CanonOutput& output,
684                                        CanonHostInfo& host_info) {
685   DoHost<char, unsigned char, CanonMode::kNonSpecialURL>(spec, host, output,
686                                                          host_info);
687 }
688 
CanonicalizeNonSpecialHostVerbose(const char16_t * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)689 void CanonicalizeNonSpecialHostVerbose(const char16_t* spec,
690                                        const Component& host,
691                                        CanonOutput& output,
692                                        CanonHostInfo& host_info) {
693   DoHost<char16_t, char16_t, CanonMode::kNonSpecialURL>(spec, host, output,
694                                                         host_info);
695 }
696 
697 }  // namespace url
698