1 // Copyright (c) 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "quiche/common/platform/api/quiche_hostname_utils.h"
6
7 #include <string>
8
9 #include "absl/strings/string_view.h"
10 #include "quiche/common/platform/api/quiche_googleurl.h"
11 #include "quiche/common/platform/api/quiche_logging.h"
12
13 namespace quiche {
14
15 // TODO(vasilvv): the functions below are forked from Chromium's
16 // net/base/url_util.h; those should be moved to googleurl.
17 namespace {
18
CanonicalizeHost(absl::string_view host,url::CanonHostInfo * host_info)19 std::string CanonicalizeHost(absl::string_view host,
20 url::CanonHostInfo* host_info) {
21 // Try to canonicalize the host.
22 const url::Component raw_host_component(0, static_cast<int>(host.length()));
23 std::string canon_host;
24 url::StdStringCanonOutput canon_host_output(&canon_host);
25 url::CanonicalizeHostVerbose(host.data(), raw_host_component,
26 &canon_host_output, host_info);
27
28 if (host_info->out_host.is_nonempty() &&
29 host_info->family != url::CanonHostInfo::BROKEN) {
30 // Success! Assert that there's no extra garbage.
31 canon_host_output.Complete();
32 QUICHE_DCHECK_EQ(host_info->out_host.len,
33 static_cast<int>(canon_host.length()));
34 } else {
35 // Empty host, or canonicalization failed. We'll return empty.
36 canon_host.clear();
37 }
38
39 return canon_host;
40 }
41
IsHostCharAlphanumeric(char c)42 bool IsHostCharAlphanumeric(char c) {
43 // We can just check lowercase because uppercase characters have already been
44 // normalized.
45 return ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9'));
46 }
47
IsCanonicalizedHostCompliant(const std::string & host)48 bool IsCanonicalizedHostCompliant(const std::string& host) {
49 if (host.empty()) {
50 return false;
51 }
52
53 bool in_component = false;
54 bool most_recent_component_started_alphanumeric = false;
55
56 for (char c : host) {
57 if (!in_component) {
58 most_recent_component_started_alphanumeric = IsHostCharAlphanumeric(c);
59 if (!most_recent_component_started_alphanumeric && (c != '-') &&
60 (c != '_')) {
61 return false;
62 }
63 in_component = true;
64 } else if (c == '.') {
65 in_component = false;
66 } else if (!IsHostCharAlphanumeric(c) && (c != '-') && (c != '_')) {
67 return false;
68 }
69 }
70
71 return most_recent_component_started_alphanumeric;
72 }
73
74 } // namespace
75
76 // static
IsValidSNI(absl::string_view sni)77 bool QuicheHostnameUtils::IsValidSNI(absl::string_view sni) {
78 // TODO(rtenneti): Support RFC2396 hostname.
79 // NOTE: Microsoft does NOT enforce this spec, so if we throw away hostnames
80 // based on the above spec, we may be losing some hostnames that windows
81 // would consider valid. By far the most common hostname character NOT
82 // accepted by the above spec is '_'.
83 url::CanonHostInfo host_info;
84 std::string canonicalized_host = CanonicalizeHost(sni, &host_info);
85 return !host_info.IsIPAddress() &&
86 IsCanonicalizedHostCompliant(canonicalized_host);
87 }
88
89 // static
NormalizeHostname(absl::string_view hostname)90 std::string QuicheHostnameUtils::NormalizeHostname(absl::string_view hostname) {
91 url::CanonHostInfo host_info;
92 std::string host = CanonicalizeHost(hostname, &host_info);
93
94 // Walk backwards over the string, stopping at the first trailing dot.
95 size_t host_end = host.length();
96 while (host_end != 0 && host[host_end - 1] == '.') {
97 host_end--;
98 }
99
100 // Erase the trailing dots.
101 if (host_end != host.length()) {
102 host.erase(host_end, host.length() - host_end);
103 }
104
105 return host;
106 }
107
108 } // namespace quiche
109