xref: /aosp_15_r20/external/cronet/net/third_party/quiche/src/quiche/common/platform/api/quiche_hostname_utils.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright (c) 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "quiche/common/platform/api/quiche_hostname_utils.h"
6 
7 #include <string>
8 
9 #include "absl/strings/string_view.h"
10 #include "quiche/common/platform/api/quiche_googleurl.h"
11 #include "quiche/common/platform/api/quiche_logging.h"
12 
13 namespace quiche {
14 
15 // TODO(vasilvv): the functions below are forked from Chromium's
16 // net/base/url_util.h; those should be moved to googleurl.
17 namespace {
18 
CanonicalizeHost(absl::string_view host,url::CanonHostInfo * host_info)19 std::string CanonicalizeHost(absl::string_view host,
20                              url::CanonHostInfo* host_info) {
21   // Try to canonicalize the host.
22   const url::Component raw_host_component(0, static_cast<int>(host.length()));
23   std::string canon_host;
24   url::StdStringCanonOutput canon_host_output(&canon_host);
25   url::CanonicalizeHostVerbose(host.data(), raw_host_component,
26                                &canon_host_output, host_info);
27 
28   if (host_info->out_host.is_nonempty() &&
29       host_info->family != url::CanonHostInfo::BROKEN) {
30     // Success!  Assert that there's no extra garbage.
31     canon_host_output.Complete();
32     QUICHE_DCHECK_EQ(host_info->out_host.len,
33                      static_cast<int>(canon_host.length()));
34   } else {
35     // Empty host, or canonicalization failed.  We'll return empty.
36     canon_host.clear();
37   }
38 
39   return canon_host;
40 }
41 
IsHostCharAlphanumeric(char c)42 bool IsHostCharAlphanumeric(char c) {
43   // We can just check lowercase because uppercase characters have already been
44   // normalized.
45   return ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9'));
46 }
47 
IsCanonicalizedHostCompliant(const std::string & host)48 bool IsCanonicalizedHostCompliant(const std::string& host) {
49   if (host.empty()) {
50     return false;
51   }
52 
53   bool in_component = false;
54   bool most_recent_component_started_alphanumeric = false;
55 
56   for (char c : host) {
57     if (!in_component) {
58       most_recent_component_started_alphanumeric = IsHostCharAlphanumeric(c);
59       if (!most_recent_component_started_alphanumeric && (c != '-') &&
60           (c != '_')) {
61         return false;
62       }
63       in_component = true;
64     } else if (c == '.') {
65       in_component = false;
66     } else if (!IsHostCharAlphanumeric(c) && (c != '-') && (c != '_')) {
67       return false;
68     }
69   }
70 
71   return most_recent_component_started_alphanumeric;
72 }
73 
74 }  // namespace
75 
76 // static
IsValidSNI(absl::string_view sni)77 bool QuicheHostnameUtils::IsValidSNI(absl::string_view sni) {
78   // TODO(rtenneti): Support RFC2396 hostname.
79   // NOTE: Microsoft does NOT enforce this spec, so if we throw away hostnames
80   // based on the above spec, we may be losing some hostnames that windows
81   // would consider valid. By far the most common hostname character NOT
82   // accepted by the above spec is '_'.
83   url::CanonHostInfo host_info;
84   std::string canonicalized_host = CanonicalizeHost(sni, &host_info);
85   return !host_info.IsIPAddress() &&
86          IsCanonicalizedHostCompliant(canonicalized_host);
87 }
88 
89 // static
NormalizeHostname(absl::string_view hostname)90 std::string QuicheHostnameUtils::NormalizeHostname(absl::string_view hostname) {
91   url::CanonHostInfo host_info;
92   std::string host = CanonicalizeHost(hostname, &host_info);
93 
94   // Walk backwards over the string, stopping at the first trailing dot.
95   size_t host_end = host.length();
96   while (host_end != 0 && host[host_end - 1] == '.') {
97     host_end--;
98   }
99 
100   // Erase the trailing dots.
101   if (host_end != host.length()) {
102     host.erase(host_end, host.length() - host_end);
103   }
104 
105   return host;
106 }
107 
108 }  // namespace quiche
109