xref: /aosp_15_r20/external/cronet/net/base/url_util.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "net/base/url_util.h"
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include "build/build_config.h"
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_POSIX)
10*6777b538SAndroid Build Coastguard Worker #include <netinet/in.h>
11*6777b538SAndroid Build Coastguard Worker #elif BUILDFLAG(IS_WIN)
12*6777b538SAndroid Build Coastguard Worker #include <ws2tcpip.h>
13*6777b538SAndroid Build Coastguard Worker #endif
14*6777b538SAndroid Build Coastguard Worker 
15*6777b538SAndroid Build Coastguard Worker #include <optional>
16*6777b538SAndroid Build Coastguard Worker #include <string_view>
17*6777b538SAndroid Build Coastguard Worker 
18*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
19*6777b538SAndroid Build Coastguard Worker #include "base/containers/fixed_flat_set.h"
20*6777b538SAndroid Build Coastguard Worker #include "base/strings/escape.h"
21*6777b538SAndroid Build Coastguard Worker #include "base/strings/strcat.h"
22*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
23*6777b538SAndroid Build Coastguard Worker #include "base/strings/stringprintf.h"
24*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
25*6777b538SAndroid Build Coastguard Worker #include "net/base/ip_address.h"
26*6777b538SAndroid Build Coastguard Worker #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
27*6777b538SAndroid Build Coastguard Worker #include "url/gurl.h"
28*6777b538SAndroid Build Coastguard Worker #include "url/scheme_host_port.h"
29*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
30*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h"
31*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_ip.h"
32*6777b538SAndroid Build Coastguard Worker #include "url/url_constants.h"
33*6777b538SAndroid Build Coastguard Worker #include "url/url_util.h"
34*6777b538SAndroid Build Coastguard Worker 
35*6777b538SAndroid Build Coastguard Worker namespace net {
36*6777b538SAndroid Build Coastguard Worker 
37*6777b538SAndroid Build Coastguard Worker namespace {
38*6777b538SAndroid Build Coastguard Worker 
IsHostCharAlphanumeric(char c)39*6777b538SAndroid Build Coastguard Worker bool IsHostCharAlphanumeric(char c) {
40*6777b538SAndroid Build Coastguard Worker   // We can just check lowercase because uppercase characters have already been
41*6777b538SAndroid Build Coastguard Worker   // normalized.
42*6777b538SAndroid Build Coastguard Worker   return ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9'));
43*6777b538SAndroid Build Coastguard Worker }
44*6777b538SAndroid Build Coastguard Worker 
IsNormalizedLocalhostTLD(std::string_view host)45*6777b538SAndroid Build Coastguard Worker bool IsNormalizedLocalhostTLD(std::string_view host) {
46*6777b538SAndroid Build Coastguard Worker   return base::EndsWith(host, ".localhost",
47*6777b538SAndroid Build Coastguard Worker                         base::CompareCase::INSENSITIVE_ASCII);
48*6777b538SAndroid Build Coastguard Worker }
49*6777b538SAndroid Build Coastguard Worker 
50*6777b538SAndroid Build Coastguard Worker // Helper function used by GetIdentityFromURL. If |escaped_text| can be "safely
51*6777b538SAndroid Build Coastguard Worker // unescaped" to a valid UTF-8 string, return that string, as UTF-16. Otherwise,
52*6777b538SAndroid Build Coastguard Worker // convert it as-is to UTF-16. "Safely unescaped" is defined as having no
53*6777b538SAndroid Build Coastguard Worker // escaped character between '0x00' and '0x1F', inclusive.
UnescapeIdentityString(std::string_view escaped_text)54*6777b538SAndroid Build Coastguard Worker std::u16string UnescapeIdentityString(std::string_view escaped_text) {
55*6777b538SAndroid Build Coastguard Worker   std::string unescaped_text;
56*6777b538SAndroid Build Coastguard Worker   if (base::UnescapeBinaryURLComponentSafe(
57*6777b538SAndroid Build Coastguard Worker           escaped_text, false /* fail_on_path_separators */, &unescaped_text)) {
58*6777b538SAndroid Build Coastguard Worker     std::u16string result;
59*6777b538SAndroid Build Coastguard Worker     if (base::UTF8ToUTF16(unescaped_text.data(), unescaped_text.length(),
60*6777b538SAndroid Build Coastguard Worker                           &result)) {
61*6777b538SAndroid Build Coastguard Worker       return result;
62*6777b538SAndroid Build Coastguard Worker     }
63*6777b538SAndroid Build Coastguard Worker   }
64*6777b538SAndroid Build Coastguard Worker   return base::UTF8ToUTF16(escaped_text);
65*6777b538SAndroid Build Coastguard Worker }
66*6777b538SAndroid Build Coastguard Worker 
67*6777b538SAndroid Build Coastguard Worker }  // namespace
68*6777b538SAndroid Build Coastguard Worker 
AppendQueryParameter(const GURL & url,std::string_view name,std::string_view value)69*6777b538SAndroid Build Coastguard Worker GURL AppendQueryParameter(const GURL& url,
70*6777b538SAndroid Build Coastguard Worker                           std::string_view name,
71*6777b538SAndroid Build Coastguard Worker                           std::string_view value) {
72*6777b538SAndroid Build Coastguard Worker   std::string query(url.query());
73*6777b538SAndroid Build Coastguard Worker 
74*6777b538SAndroid Build Coastguard Worker   if (!query.empty())
75*6777b538SAndroid Build Coastguard Worker     query += "&";
76*6777b538SAndroid Build Coastguard Worker 
77*6777b538SAndroid Build Coastguard Worker   query += (base::EscapeQueryParamValue(name, true) + "=" +
78*6777b538SAndroid Build Coastguard Worker             base::EscapeQueryParamValue(value, true));
79*6777b538SAndroid Build Coastguard Worker   GURL::Replacements replacements;
80*6777b538SAndroid Build Coastguard Worker   replacements.SetQueryStr(query);
81*6777b538SAndroid Build Coastguard Worker   return url.ReplaceComponents(replacements);
82*6777b538SAndroid Build Coastguard Worker }
83*6777b538SAndroid Build Coastguard Worker 
AppendOrReplaceQueryParameter(const GURL & url,std::string_view name,std::optional<std::string_view> value)84*6777b538SAndroid Build Coastguard Worker GURL AppendOrReplaceQueryParameter(const GURL& url,
85*6777b538SAndroid Build Coastguard Worker                                    std::string_view name,
86*6777b538SAndroid Build Coastguard Worker                                    std::optional<std::string_view> value) {
87*6777b538SAndroid Build Coastguard Worker   bool replaced = false;
88*6777b538SAndroid Build Coastguard Worker   std::string param_name = base::EscapeQueryParamValue(name, true);
89*6777b538SAndroid Build Coastguard Worker   bool should_keep_param = value.has_value();
90*6777b538SAndroid Build Coastguard Worker 
91*6777b538SAndroid Build Coastguard Worker   std::string param_value;
92*6777b538SAndroid Build Coastguard Worker   if (should_keep_param)
93*6777b538SAndroid Build Coastguard Worker     param_value = base::EscapeQueryParamValue(value.value(), true);
94*6777b538SAndroid Build Coastguard Worker 
95*6777b538SAndroid Build Coastguard Worker   const std::string_view input = url.query_piece();
96*6777b538SAndroid Build Coastguard Worker   url::Component cursor(0, input.size());
97*6777b538SAndroid Build Coastguard Worker   std::string output;
98*6777b538SAndroid Build Coastguard Worker   url::Component key_range, value_range;
99*6777b538SAndroid Build Coastguard Worker   while (url::ExtractQueryKeyValue(input, &cursor, &key_range, &value_range)) {
100*6777b538SAndroid Build Coastguard Worker     const std::string_view key = input.substr(key_range.begin, key_range.len);
101*6777b538SAndroid Build Coastguard Worker     std::string key_value_pair;
102*6777b538SAndroid Build Coastguard Worker     // Check |replaced| as only the first pair should be replaced.
103*6777b538SAndroid Build Coastguard Worker     if (!replaced && key == param_name) {
104*6777b538SAndroid Build Coastguard Worker       replaced = true;
105*6777b538SAndroid Build Coastguard Worker       if (!should_keep_param)
106*6777b538SAndroid Build Coastguard Worker         continue;
107*6777b538SAndroid Build Coastguard Worker 
108*6777b538SAndroid Build Coastguard Worker       key_value_pair = param_name + "=" + param_value;
109*6777b538SAndroid Build Coastguard Worker     } else {
110*6777b538SAndroid Build Coastguard Worker       key_value_pair = std::string(
111*6777b538SAndroid Build Coastguard Worker           input.substr(key_range.begin, value_range.end() - key_range.begin));
112*6777b538SAndroid Build Coastguard Worker     }
113*6777b538SAndroid Build Coastguard Worker     if (!output.empty())
114*6777b538SAndroid Build Coastguard Worker       output += "&";
115*6777b538SAndroid Build Coastguard Worker 
116*6777b538SAndroid Build Coastguard Worker     output += key_value_pair;
117*6777b538SAndroid Build Coastguard Worker   }
118*6777b538SAndroid Build Coastguard Worker   if (!replaced && should_keep_param) {
119*6777b538SAndroid Build Coastguard Worker     if (!output.empty())
120*6777b538SAndroid Build Coastguard Worker       output += "&";
121*6777b538SAndroid Build Coastguard Worker 
122*6777b538SAndroid Build Coastguard Worker     output += (param_name + "=" + param_value);
123*6777b538SAndroid Build Coastguard Worker   }
124*6777b538SAndroid Build Coastguard Worker   GURL::Replacements replacements;
125*6777b538SAndroid Build Coastguard Worker   replacements.SetQueryStr(output);
126*6777b538SAndroid Build Coastguard Worker   return url.ReplaceComponents(replacements);
127*6777b538SAndroid Build Coastguard Worker }
128*6777b538SAndroid Build Coastguard Worker 
AppendOrReplaceRef(const GURL & url,const std::string_view & ref)129*6777b538SAndroid Build Coastguard Worker GURL AppendOrReplaceRef(const GURL& url, const std::string_view& ref) {
130*6777b538SAndroid Build Coastguard Worker   GURL::Replacements replacements;
131*6777b538SAndroid Build Coastguard Worker   replacements.SetRefStr(ref);
132*6777b538SAndroid Build Coastguard Worker   return url.ReplaceComponents(replacements);
133*6777b538SAndroid Build Coastguard Worker }
134*6777b538SAndroid Build Coastguard Worker 
QueryIterator(const GURL & url)135*6777b538SAndroid Build Coastguard Worker QueryIterator::QueryIterator(const GURL& url)
136*6777b538SAndroid Build Coastguard Worker     : url_(url), at_end_(!url.is_valid()) {
137*6777b538SAndroid Build Coastguard Worker   if (!at_end_) {
138*6777b538SAndroid Build Coastguard Worker     query_ = url.parsed_for_possibly_invalid_spec().query;
139*6777b538SAndroid Build Coastguard Worker     Advance();
140*6777b538SAndroid Build Coastguard Worker   }
141*6777b538SAndroid Build Coastguard Worker }
142*6777b538SAndroid Build Coastguard Worker 
143*6777b538SAndroid Build Coastguard Worker QueryIterator::~QueryIterator() = default;
144*6777b538SAndroid Build Coastguard Worker 
GetKey() const145*6777b538SAndroid Build Coastguard Worker std::string_view QueryIterator::GetKey() const {
146*6777b538SAndroid Build Coastguard Worker   DCHECK(!at_end_);
147*6777b538SAndroid Build Coastguard Worker   if (key_.is_nonempty())
148*6777b538SAndroid Build Coastguard Worker     return std::string_view(url_->spec()).substr(key_.begin, key_.len);
149*6777b538SAndroid Build Coastguard Worker   return std::string_view();
150*6777b538SAndroid Build Coastguard Worker }
151*6777b538SAndroid Build Coastguard Worker 
GetValue() const152*6777b538SAndroid Build Coastguard Worker std::string_view QueryIterator::GetValue() const {
153*6777b538SAndroid Build Coastguard Worker   DCHECK(!at_end_);
154*6777b538SAndroid Build Coastguard Worker   if (value_.is_nonempty())
155*6777b538SAndroid Build Coastguard Worker     return std::string_view(url_->spec()).substr(value_.begin, value_.len);
156*6777b538SAndroid Build Coastguard Worker   return std::string_view();
157*6777b538SAndroid Build Coastguard Worker }
158*6777b538SAndroid Build Coastguard Worker 
GetUnescapedValue()159*6777b538SAndroid Build Coastguard Worker const std::string& QueryIterator::GetUnescapedValue() {
160*6777b538SAndroid Build Coastguard Worker   DCHECK(!at_end_);
161*6777b538SAndroid Build Coastguard Worker   if (value_.is_nonempty() && unescaped_value_.empty()) {
162*6777b538SAndroid Build Coastguard Worker     unescaped_value_ = base::UnescapeURLComponent(
163*6777b538SAndroid Build Coastguard Worker         GetValue(),
164*6777b538SAndroid Build Coastguard Worker         base::UnescapeRule::SPACES | base::UnescapeRule::PATH_SEPARATORS |
165*6777b538SAndroid Build Coastguard Worker             base::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS |
166*6777b538SAndroid Build Coastguard Worker             base::UnescapeRule::REPLACE_PLUS_WITH_SPACE);
167*6777b538SAndroid Build Coastguard Worker   }
168*6777b538SAndroid Build Coastguard Worker   return unescaped_value_;
169*6777b538SAndroid Build Coastguard Worker }
170*6777b538SAndroid Build Coastguard Worker 
IsAtEnd() const171*6777b538SAndroid Build Coastguard Worker bool QueryIterator::IsAtEnd() const {
172*6777b538SAndroid Build Coastguard Worker   return at_end_;
173*6777b538SAndroid Build Coastguard Worker }
174*6777b538SAndroid Build Coastguard Worker 
Advance()175*6777b538SAndroid Build Coastguard Worker void QueryIterator::Advance() {
176*6777b538SAndroid Build Coastguard Worker   DCHECK(!at_end_);
177*6777b538SAndroid Build Coastguard Worker   key_.reset();
178*6777b538SAndroid Build Coastguard Worker   value_.reset();
179*6777b538SAndroid Build Coastguard Worker   unescaped_value_.clear();
180*6777b538SAndroid Build Coastguard Worker   at_end_ = !url::ExtractQueryKeyValue(url_->spec(), &query_, &key_, &value_);
181*6777b538SAndroid Build Coastguard Worker }
182*6777b538SAndroid Build Coastguard Worker 
GetValueForKeyInQuery(const GURL & url,std::string_view search_key,std::string * out_value)183*6777b538SAndroid Build Coastguard Worker bool GetValueForKeyInQuery(const GURL& url,
184*6777b538SAndroid Build Coastguard Worker                            std::string_view search_key,
185*6777b538SAndroid Build Coastguard Worker                            std::string* out_value) {
186*6777b538SAndroid Build Coastguard Worker   for (QueryIterator it(url); !it.IsAtEnd(); it.Advance()) {
187*6777b538SAndroid Build Coastguard Worker     if (it.GetKey() == search_key) {
188*6777b538SAndroid Build Coastguard Worker       *out_value = it.GetUnescapedValue();
189*6777b538SAndroid Build Coastguard Worker       return true;
190*6777b538SAndroid Build Coastguard Worker     }
191*6777b538SAndroid Build Coastguard Worker   }
192*6777b538SAndroid Build Coastguard Worker   return false;
193*6777b538SAndroid Build Coastguard Worker }
194*6777b538SAndroid Build Coastguard Worker 
ParseHostAndPort(std::string_view input,std::string * host,int * port)195*6777b538SAndroid Build Coastguard Worker bool ParseHostAndPort(std::string_view input, std::string* host, int* port) {
196*6777b538SAndroid Build Coastguard Worker   if (input.empty())
197*6777b538SAndroid Build Coastguard Worker     return false;
198*6777b538SAndroid Build Coastguard Worker 
199*6777b538SAndroid Build Coastguard Worker   url::Component auth_component(0, input.size());
200*6777b538SAndroid Build Coastguard Worker   url::Component username_component;
201*6777b538SAndroid Build Coastguard Worker   url::Component password_component;
202*6777b538SAndroid Build Coastguard Worker   url::Component hostname_component;
203*6777b538SAndroid Build Coastguard Worker   url::Component port_component;
204*6777b538SAndroid Build Coastguard Worker 
205*6777b538SAndroid Build Coastguard Worker   // `input` is not NUL-terminated, so `input.data()` must be accompanied by a
206*6777b538SAndroid Build Coastguard Worker   // length. In these calls, `url::Component` provides an offset and length.
207*6777b538SAndroid Build Coastguard Worker   url::ParseAuthority(input.data(), auth_component, &username_component,
208*6777b538SAndroid Build Coastguard Worker                       &password_component, &hostname_component,
209*6777b538SAndroid Build Coastguard Worker                       &port_component);
210*6777b538SAndroid Build Coastguard Worker 
211*6777b538SAndroid Build Coastguard Worker   // There shouldn't be a username/password.
212*6777b538SAndroid Build Coastguard Worker   if (username_component.is_valid() || password_component.is_valid())
213*6777b538SAndroid Build Coastguard Worker     return false;
214*6777b538SAndroid Build Coastguard Worker 
215*6777b538SAndroid Build Coastguard Worker   if (hostname_component.is_empty())
216*6777b538SAndroid Build Coastguard Worker     return false;  // Failed parsing.
217*6777b538SAndroid Build Coastguard Worker 
218*6777b538SAndroid Build Coastguard Worker   int parsed_port_number = -1;
219*6777b538SAndroid Build Coastguard Worker   if (port_component.is_nonempty()) {
220*6777b538SAndroid Build Coastguard Worker     parsed_port_number = url::ParsePort(input.data(), port_component);
221*6777b538SAndroid Build Coastguard Worker 
222*6777b538SAndroid Build Coastguard Worker     // If parsing failed, port_number will be either PORT_INVALID or
223*6777b538SAndroid Build Coastguard Worker     // PORT_UNSPECIFIED, both of which are negative.
224*6777b538SAndroid Build Coastguard Worker     if (parsed_port_number < 0)
225*6777b538SAndroid Build Coastguard Worker       return false;  // Failed parsing the port number.
226*6777b538SAndroid Build Coastguard Worker   }
227*6777b538SAndroid Build Coastguard Worker 
228*6777b538SAndroid Build Coastguard Worker   if (port_component.len == 0)
229*6777b538SAndroid Build Coastguard Worker     return false;  // Reject inputs like "foo:"
230*6777b538SAndroid Build Coastguard Worker 
231*6777b538SAndroid Build Coastguard Worker   unsigned char tmp_ipv6_addr[16];
232*6777b538SAndroid Build Coastguard Worker 
233*6777b538SAndroid Build Coastguard Worker   // If the hostname starts with a bracket, it is either an IPv6 literal or
234*6777b538SAndroid Build Coastguard Worker   // invalid. If it is an IPv6 literal then strip the brackets.
235*6777b538SAndroid Build Coastguard Worker   if (hostname_component.len > 0 && input[hostname_component.begin] == '[') {
236*6777b538SAndroid Build Coastguard Worker     if (input[hostname_component.end() - 1] == ']' &&
237*6777b538SAndroid Build Coastguard Worker         url::IPv6AddressToNumber(input.data(), hostname_component,
238*6777b538SAndroid Build Coastguard Worker                                  tmp_ipv6_addr)) {
239*6777b538SAndroid Build Coastguard Worker       // Strip the brackets.
240*6777b538SAndroid Build Coastguard Worker       hostname_component.begin++;
241*6777b538SAndroid Build Coastguard Worker       hostname_component.len -= 2;
242*6777b538SAndroid Build Coastguard Worker     } else {
243*6777b538SAndroid Build Coastguard Worker       return false;
244*6777b538SAndroid Build Coastguard Worker     }
245*6777b538SAndroid Build Coastguard Worker   }
246*6777b538SAndroid Build Coastguard Worker 
247*6777b538SAndroid Build Coastguard Worker   // Pass results back to caller.
248*6777b538SAndroid Build Coastguard Worker   *host = std::string(
249*6777b538SAndroid Build Coastguard Worker       input.substr(hostname_component.begin, hostname_component.len));
250*6777b538SAndroid Build Coastguard Worker   *port = parsed_port_number;
251*6777b538SAndroid Build Coastguard Worker 
252*6777b538SAndroid Build Coastguard Worker   return true;  // Success.
253*6777b538SAndroid Build Coastguard Worker }
254*6777b538SAndroid Build Coastguard Worker 
GetHostAndPort(const GURL & url)255*6777b538SAndroid Build Coastguard Worker std::string GetHostAndPort(const GURL& url) {
256*6777b538SAndroid Build Coastguard Worker   // For IPv6 literals, GURL::host() already includes the brackets so it is
257*6777b538SAndroid Build Coastguard Worker   // safe to just append a colon.
258*6777b538SAndroid Build Coastguard Worker   return base::StringPrintf("%s:%d", url.host().c_str(),
259*6777b538SAndroid Build Coastguard Worker                             url.EffectiveIntPort());
260*6777b538SAndroid Build Coastguard Worker }
261*6777b538SAndroid Build Coastguard Worker 
GetHostAndOptionalPort(const GURL & url)262*6777b538SAndroid Build Coastguard Worker std::string GetHostAndOptionalPort(const GURL& url) {
263*6777b538SAndroid Build Coastguard Worker   // For IPv6 literals, GURL::host() already includes the brackets
264*6777b538SAndroid Build Coastguard Worker   // so it is safe to just append a colon.
265*6777b538SAndroid Build Coastguard Worker   if (url.has_port())
266*6777b538SAndroid Build Coastguard Worker     return base::StringPrintf("%s:%s", url.host().c_str(), url.port().c_str());
267*6777b538SAndroid Build Coastguard Worker   return url.host();
268*6777b538SAndroid Build Coastguard Worker }
269*6777b538SAndroid Build Coastguard Worker 
GetHostAndOptionalPort(const url::SchemeHostPort & scheme_host_port)270*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostAndOptionalPort(
271*6777b538SAndroid Build Coastguard Worker     const url::SchemeHostPort& scheme_host_port) {
272*6777b538SAndroid Build Coastguard Worker   int default_port = url::DefaultPortForScheme(
273*6777b538SAndroid Build Coastguard Worker       scheme_host_port.scheme().data(),
274*6777b538SAndroid Build Coastguard Worker       static_cast<int>(scheme_host_port.scheme().length()));
275*6777b538SAndroid Build Coastguard Worker   if (default_port != scheme_host_port.port()) {
276*6777b538SAndroid Build Coastguard Worker     return base::StringPrintf("%s:%i", scheme_host_port.host().c_str(),
277*6777b538SAndroid Build Coastguard Worker                               scheme_host_port.port());
278*6777b538SAndroid Build Coastguard Worker   }
279*6777b538SAndroid Build Coastguard Worker   return scheme_host_port.host();
280*6777b538SAndroid Build Coastguard Worker }
281*6777b538SAndroid Build Coastguard Worker 
TrimEndingDot(std::string_view host)282*6777b538SAndroid Build Coastguard Worker std::string TrimEndingDot(std::string_view host) {
283*6777b538SAndroid Build Coastguard Worker   std::string_view host_trimmed = host;
284*6777b538SAndroid Build Coastguard Worker   size_t len = host_trimmed.length();
285*6777b538SAndroid Build Coastguard Worker   if (len > 1 && host_trimmed[len - 1] == '.') {
286*6777b538SAndroid Build Coastguard Worker     host_trimmed.remove_suffix(1);
287*6777b538SAndroid Build Coastguard Worker   }
288*6777b538SAndroid Build Coastguard Worker   return std::string(host_trimmed);
289*6777b538SAndroid Build Coastguard Worker }
290*6777b538SAndroid Build Coastguard Worker 
GetHostOrSpecFromURL(const GURL & url)291*6777b538SAndroid Build Coastguard Worker std::string GetHostOrSpecFromURL(const GURL& url) {
292*6777b538SAndroid Build Coastguard Worker   return url.has_host() ? TrimEndingDot(url.host_piece()) : url.spec();
293*6777b538SAndroid Build Coastguard Worker }
294*6777b538SAndroid Build Coastguard Worker 
GetSuperdomain(std::string_view domain)295*6777b538SAndroid Build Coastguard Worker std::string GetSuperdomain(std::string_view domain) {
296*6777b538SAndroid Build Coastguard Worker   size_t dot_pos = domain.find('.');
297*6777b538SAndroid Build Coastguard Worker   if (dot_pos == std::string::npos)
298*6777b538SAndroid Build Coastguard Worker     return "";
299*6777b538SAndroid Build Coastguard Worker   return std::string(domain.substr(dot_pos + 1));
300*6777b538SAndroid Build Coastguard Worker }
301*6777b538SAndroid Build Coastguard Worker 
IsSubdomainOf(std::string_view subdomain,std::string_view superdomain)302*6777b538SAndroid Build Coastguard Worker bool IsSubdomainOf(std::string_view subdomain, std::string_view superdomain) {
303*6777b538SAndroid Build Coastguard Worker   // Subdomain must be identical or have strictly more labels than the
304*6777b538SAndroid Build Coastguard Worker   // superdomain.
305*6777b538SAndroid Build Coastguard Worker   if (subdomain.length() <= superdomain.length())
306*6777b538SAndroid Build Coastguard Worker     return subdomain == superdomain;
307*6777b538SAndroid Build Coastguard Worker 
308*6777b538SAndroid Build Coastguard Worker   // Superdomain must be suffix of subdomain, and the last character not
309*6777b538SAndroid Build Coastguard Worker   // included in the matching substring must be a dot.
310*6777b538SAndroid Build Coastguard Worker   if (!subdomain.ends_with(superdomain)) {
311*6777b538SAndroid Build Coastguard Worker     return false;
312*6777b538SAndroid Build Coastguard Worker   }
313*6777b538SAndroid Build Coastguard Worker   subdomain.remove_suffix(superdomain.length());
314*6777b538SAndroid Build Coastguard Worker   return subdomain.back() == '.';
315*6777b538SAndroid Build Coastguard Worker }
316*6777b538SAndroid Build Coastguard Worker 
CanonicalizeHost(std::string_view host,url::CanonHostInfo * host_info)317*6777b538SAndroid Build Coastguard Worker std::string CanonicalizeHost(std::string_view host,
318*6777b538SAndroid Build Coastguard Worker                              url::CanonHostInfo* host_info) {
319*6777b538SAndroid Build Coastguard Worker   // Try to canonicalize the host.
320*6777b538SAndroid Build Coastguard Worker   const url::Component raw_host_component(0, static_cast<int>(host.length()));
321*6777b538SAndroid Build Coastguard Worker   std::string canon_host;
322*6777b538SAndroid Build Coastguard Worker   url::StdStringCanonOutput canon_host_output(&canon_host);
323*6777b538SAndroid Build Coastguard Worker   // A url::StdStringCanonOutput starts off with a zero length buffer. The
324*6777b538SAndroid Build Coastguard Worker   // first time through Grow() immediately resizes it to 32 bytes, incurring
325*6777b538SAndroid Build Coastguard Worker   // a malloc. With libcxx a 22 byte or smaller request can be accommodated
326*6777b538SAndroid Build Coastguard Worker   // within the std::string itself (i.e. no malloc occurs). Start the buffer
327*6777b538SAndroid Build Coastguard Worker   // off at the max size to avoid a malloc on short strings.
328*6777b538SAndroid Build Coastguard Worker   // NOTE: To ensure the final size is correctly reflected, it's necessary
329*6777b538SAndroid Build Coastguard Worker   // to call Complete() which will adjust the size to the actual bytes written.
330*6777b538SAndroid Build Coastguard Worker   // This is handled below for success cases, while failure cases discard all
331*6777b538SAndroid Build Coastguard Worker   // the output.
332*6777b538SAndroid Build Coastguard Worker   const int kCxxMaxStringBufferSizeWithoutMalloc = 22;
333*6777b538SAndroid Build Coastguard Worker   canon_host_output.Resize(kCxxMaxStringBufferSizeWithoutMalloc);
334*6777b538SAndroid Build Coastguard Worker   url::CanonicalizeHostVerbose(host.data(), raw_host_component,
335*6777b538SAndroid Build Coastguard Worker                                &canon_host_output, host_info);
336*6777b538SAndroid Build Coastguard Worker 
337*6777b538SAndroid Build Coastguard Worker   if (host_info->out_host.is_nonempty() &&
338*6777b538SAndroid Build Coastguard Worker       host_info->family != url::CanonHostInfo::BROKEN) {
339*6777b538SAndroid Build Coastguard Worker     // Success!  Assert that there's no extra garbage.
340*6777b538SAndroid Build Coastguard Worker     canon_host_output.Complete();
341*6777b538SAndroid Build Coastguard Worker     DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));
342*6777b538SAndroid Build Coastguard Worker   } else {
343*6777b538SAndroid Build Coastguard Worker     // Empty host, or canonicalization failed.  We'll return empty.
344*6777b538SAndroid Build Coastguard Worker     canon_host.clear();
345*6777b538SAndroid Build Coastguard Worker   }
346*6777b538SAndroid Build Coastguard Worker 
347*6777b538SAndroid Build Coastguard Worker   return canon_host;
348*6777b538SAndroid Build Coastguard Worker }
349*6777b538SAndroid Build Coastguard Worker 
IsCanonicalizedHostCompliant(std::string_view host)350*6777b538SAndroid Build Coastguard Worker bool IsCanonicalizedHostCompliant(std::string_view host) {
351*6777b538SAndroid Build Coastguard Worker   if (host.empty() || host.size() > 254 ||
352*6777b538SAndroid Build Coastguard Worker       (host.back() != '.' && host.size() == 254)) {
353*6777b538SAndroid Build Coastguard Worker     return false;
354*6777b538SAndroid Build Coastguard Worker   }
355*6777b538SAndroid Build Coastguard Worker 
356*6777b538SAndroid Build Coastguard Worker   bool in_component = false;
357*6777b538SAndroid Build Coastguard Worker   bool most_recent_component_started_alphanumeric = false;
358*6777b538SAndroid Build Coastguard Worker   size_t label_size = 0;
359*6777b538SAndroid Build Coastguard Worker 
360*6777b538SAndroid Build Coastguard Worker   for (char c : host) {
361*6777b538SAndroid Build Coastguard Worker     ++label_size;
362*6777b538SAndroid Build Coastguard Worker     if (!in_component) {
363*6777b538SAndroid Build Coastguard Worker       most_recent_component_started_alphanumeric = IsHostCharAlphanumeric(c);
364*6777b538SAndroid Build Coastguard Worker       if (!most_recent_component_started_alphanumeric && (c != '-') &&
365*6777b538SAndroid Build Coastguard Worker           (c != '_')) {
366*6777b538SAndroid Build Coastguard Worker         return false;
367*6777b538SAndroid Build Coastguard Worker       }
368*6777b538SAndroid Build Coastguard Worker       in_component = true;
369*6777b538SAndroid Build Coastguard Worker     } else if (c == '.') {
370*6777b538SAndroid Build Coastguard Worker       in_component = false;
371*6777b538SAndroid Build Coastguard Worker       if (label_size > 64 || label_size == 1) {
372*6777b538SAndroid Build Coastguard Worker         // Label should not be empty or longer than 63 characters (+1 for '.'
373*6777b538SAndroid Build Coastguard Worker         // character included in `label_size`).
374*6777b538SAndroid Build Coastguard Worker         return false;
375*6777b538SAndroid Build Coastguard Worker       } else {
376*6777b538SAndroid Build Coastguard Worker         label_size = 0;
377*6777b538SAndroid Build Coastguard Worker       }
378*6777b538SAndroid Build Coastguard Worker     } else if (!IsHostCharAlphanumeric(c) && (c != '-') && (c != '_')) {
379*6777b538SAndroid Build Coastguard Worker       return false;
380*6777b538SAndroid Build Coastguard Worker     }
381*6777b538SAndroid Build Coastguard Worker   }
382*6777b538SAndroid Build Coastguard Worker 
383*6777b538SAndroid Build Coastguard Worker   // Check for too-long label when not ended with final '.'.
384*6777b538SAndroid Build Coastguard Worker   if (label_size > 63)
385*6777b538SAndroid Build Coastguard Worker     return false;
386*6777b538SAndroid Build Coastguard Worker 
387*6777b538SAndroid Build Coastguard Worker   return most_recent_component_started_alphanumeric;
388*6777b538SAndroid Build Coastguard Worker }
389*6777b538SAndroid Build Coastguard Worker 
IsHostnameNonUnique(std::string_view hostname)390*6777b538SAndroid Build Coastguard Worker bool IsHostnameNonUnique(std::string_view hostname) {
391*6777b538SAndroid Build Coastguard Worker   // CanonicalizeHost requires surrounding brackets to parse an IPv6 address.
392*6777b538SAndroid Build Coastguard Worker   const std::string host_or_ip = hostname.find(':') != std::string::npos
393*6777b538SAndroid Build Coastguard Worker                                      ? base::StrCat({"[", hostname, "]"})
394*6777b538SAndroid Build Coastguard Worker                                      : std::string(hostname);
395*6777b538SAndroid Build Coastguard Worker   url::CanonHostInfo host_info;
396*6777b538SAndroid Build Coastguard Worker   std::string canonical_name = CanonicalizeHost(host_or_ip, &host_info);
397*6777b538SAndroid Build Coastguard Worker 
398*6777b538SAndroid Build Coastguard Worker   // If canonicalization fails, then the input is truly malformed. However,
399*6777b538SAndroid Build Coastguard Worker   // to avoid mis-reporting bad inputs as "non-unique", treat them as unique.
400*6777b538SAndroid Build Coastguard Worker   if (canonical_name.empty())
401*6777b538SAndroid Build Coastguard Worker     return false;
402*6777b538SAndroid Build Coastguard Worker 
403*6777b538SAndroid Build Coastguard Worker   // If |hostname| is an IP address, check to see if it's in an IANA-reserved
404*6777b538SAndroid Build Coastguard Worker   // range reserved for non-publicly routable networks.
405*6777b538SAndroid Build Coastguard Worker   if (host_info.IsIPAddress()) {
406*6777b538SAndroid Build Coastguard Worker     IPAddress host_addr;
407*6777b538SAndroid Build Coastguard Worker     if (!host_addr.AssignFromIPLiteral(hostname.substr(
408*6777b538SAndroid Build Coastguard Worker             host_info.out_host.begin, host_info.out_host.len))) {
409*6777b538SAndroid Build Coastguard Worker       return false;
410*6777b538SAndroid Build Coastguard Worker     }
411*6777b538SAndroid Build Coastguard Worker     switch (host_info.family) {
412*6777b538SAndroid Build Coastguard Worker       case url::CanonHostInfo::IPV4:
413*6777b538SAndroid Build Coastguard Worker       case url::CanonHostInfo::IPV6:
414*6777b538SAndroid Build Coastguard Worker         return !host_addr.IsPubliclyRoutable();
415*6777b538SAndroid Build Coastguard Worker       case url::CanonHostInfo::NEUTRAL:
416*6777b538SAndroid Build Coastguard Worker       case url::CanonHostInfo::BROKEN:
417*6777b538SAndroid Build Coastguard Worker         return false;
418*6777b538SAndroid Build Coastguard Worker     }
419*6777b538SAndroid Build Coastguard Worker   }
420*6777b538SAndroid Build Coastguard Worker 
421*6777b538SAndroid Build Coastguard Worker   // Check for a registry controlled portion of |hostname|, ignoring private
422*6777b538SAndroid Build Coastguard Worker   // registries, as they already chain to ICANN-administered registries,
423*6777b538SAndroid Build Coastguard Worker   // and explicitly ignoring unknown registries.
424*6777b538SAndroid Build Coastguard Worker   //
425*6777b538SAndroid Build Coastguard Worker   // Note: This means that as new gTLDs are introduced on the Internet, they
426*6777b538SAndroid Build Coastguard Worker   // will be treated as non-unique until the registry controlled domain list
427*6777b538SAndroid Build Coastguard Worker   // is updated. However, because gTLDs are expected to provide significant
428*6777b538SAndroid Build Coastguard Worker   // advance notice to deprecate older versions of this code, this an
429*6777b538SAndroid Build Coastguard Worker   // acceptable tradeoff.
430*6777b538SAndroid Build Coastguard Worker   return !registry_controlled_domains::HostHasRegistryControlledDomain(
431*6777b538SAndroid Build Coastguard Worker       canonical_name, registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
432*6777b538SAndroid Build Coastguard Worker       registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
433*6777b538SAndroid Build Coastguard Worker }
434*6777b538SAndroid Build Coastguard Worker 
IsLocalhost(const GURL & url)435*6777b538SAndroid Build Coastguard Worker bool IsLocalhost(const GURL& url) {
436*6777b538SAndroid Build Coastguard Worker   return HostStringIsLocalhost(url.HostNoBracketsPiece());
437*6777b538SAndroid Build Coastguard Worker }
438*6777b538SAndroid Build Coastguard Worker 
HostStringIsLocalhost(std::string_view host)439*6777b538SAndroid Build Coastguard Worker bool HostStringIsLocalhost(std::string_view host) {
440*6777b538SAndroid Build Coastguard Worker   IPAddress ip_address;
441*6777b538SAndroid Build Coastguard Worker   if (ip_address.AssignFromIPLiteral(host))
442*6777b538SAndroid Build Coastguard Worker     return ip_address.IsLoopback();
443*6777b538SAndroid Build Coastguard Worker   return IsLocalHostname(host);
444*6777b538SAndroid Build Coastguard Worker }
445*6777b538SAndroid Build Coastguard Worker 
SimplifyUrlForRequest(const GURL & url)446*6777b538SAndroid Build Coastguard Worker GURL SimplifyUrlForRequest(const GURL& url) {
447*6777b538SAndroid Build Coastguard Worker   DCHECK(url.is_valid());
448*6777b538SAndroid Build Coastguard Worker   // Fast path to avoid re-canonicalization via ReplaceComponents.
449*6777b538SAndroid Build Coastguard Worker   if (!url.has_username() && !url.has_password() && !url.has_ref())
450*6777b538SAndroid Build Coastguard Worker     return url;
451*6777b538SAndroid Build Coastguard Worker   GURL::Replacements replacements;
452*6777b538SAndroid Build Coastguard Worker   replacements.ClearUsername();
453*6777b538SAndroid Build Coastguard Worker   replacements.ClearPassword();
454*6777b538SAndroid Build Coastguard Worker   replacements.ClearRef();
455*6777b538SAndroid Build Coastguard Worker   return url.ReplaceComponents(replacements);
456*6777b538SAndroid Build Coastguard Worker }
457*6777b538SAndroid Build Coastguard Worker 
ChangeWebSocketSchemeToHttpScheme(const GURL & url)458*6777b538SAndroid Build Coastguard Worker GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url) {
459*6777b538SAndroid Build Coastguard Worker   DCHECK(url.SchemeIsWSOrWSS());
460*6777b538SAndroid Build Coastguard Worker   GURL::Replacements replace_scheme;
461*6777b538SAndroid Build Coastguard Worker   replace_scheme.SetSchemeStr(url.SchemeIs(url::kWssScheme) ? url::kHttpsScheme
462*6777b538SAndroid Build Coastguard Worker                                                             : url::kHttpScheme);
463*6777b538SAndroid Build Coastguard Worker   return url.ReplaceComponents(replace_scheme);
464*6777b538SAndroid Build Coastguard Worker }
465*6777b538SAndroid Build Coastguard Worker 
IsStandardSchemeWithNetworkHost(std::string_view scheme)466*6777b538SAndroid Build Coastguard Worker bool IsStandardSchemeWithNetworkHost(std::string_view scheme) {
467*6777b538SAndroid Build Coastguard Worker   // file scheme is special. Windows file share origins can have network hosts.
468*6777b538SAndroid Build Coastguard Worker   if (scheme == url::kFileScheme)
469*6777b538SAndroid Build Coastguard Worker     return true;
470*6777b538SAndroid Build Coastguard Worker 
471*6777b538SAndroid Build Coastguard Worker   url::SchemeType scheme_type;
472*6777b538SAndroid Build Coastguard Worker   if (!url::GetStandardSchemeType(
473*6777b538SAndroid Build Coastguard Worker           scheme.data(), url::Component(0, scheme.length()), &scheme_type)) {
474*6777b538SAndroid Build Coastguard Worker     return false;
475*6777b538SAndroid Build Coastguard Worker   }
476*6777b538SAndroid Build Coastguard Worker   return scheme_type == url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION ||
477*6777b538SAndroid Build Coastguard Worker          scheme_type == url::SCHEME_WITH_HOST_AND_PORT;
478*6777b538SAndroid Build Coastguard Worker }
479*6777b538SAndroid Build Coastguard Worker 
GetIdentityFromURL(const GURL & url,std::u16string * username,std::u16string * password)480*6777b538SAndroid Build Coastguard Worker void GetIdentityFromURL(const GURL& url,
481*6777b538SAndroid Build Coastguard Worker                         std::u16string* username,
482*6777b538SAndroid Build Coastguard Worker                         std::u16string* password) {
483*6777b538SAndroid Build Coastguard Worker   *username = UnescapeIdentityString(url.username());
484*6777b538SAndroid Build Coastguard Worker   *password = UnescapeIdentityString(url.password());
485*6777b538SAndroid Build Coastguard Worker }
486*6777b538SAndroid Build Coastguard Worker 
HasGoogleHost(const GURL & url)487*6777b538SAndroid Build Coastguard Worker bool HasGoogleHost(const GURL& url) {
488*6777b538SAndroid Build Coastguard Worker   return IsGoogleHost(url.host_piece());
489*6777b538SAndroid Build Coastguard Worker }
490*6777b538SAndroid Build Coastguard Worker 
IsGoogleHost(std::string_view host)491*6777b538SAndroid Build Coastguard Worker bool IsGoogleHost(std::string_view host) {
492*6777b538SAndroid Build Coastguard Worker   static const char* kGoogleHostSuffixes[] = {
493*6777b538SAndroid Build Coastguard Worker       ".google.com",
494*6777b538SAndroid Build Coastguard Worker       ".youtube.com",
495*6777b538SAndroid Build Coastguard Worker       ".gmail.com",
496*6777b538SAndroid Build Coastguard Worker       ".doubleclick.net",
497*6777b538SAndroid Build Coastguard Worker       ".gstatic.com",
498*6777b538SAndroid Build Coastguard Worker       ".googlevideo.com",
499*6777b538SAndroid Build Coastguard Worker       ".googleusercontent.com",
500*6777b538SAndroid Build Coastguard Worker       ".googlesyndication.com",
501*6777b538SAndroid Build Coastguard Worker       ".google-analytics.com",
502*6777b538SAndroid Build Coastguard Worker       ".googleadservices.com",
503*6777b538SAndroid Build Coastguard Worker       ".googleapis.com",
504*6777b538SAndroid Build Coastguard Worker       ".ytimg.com",
505*6777b538SAndroid Build Coastguard Worker   };
506*6777b538SAndroid Build Coastguard Worker   for (const char* suffix : kGoogleHostSuffixes) {
507*6777b538SAndroid Build Coastguard Worker     // Here it's possible to get away with faster case-sensitive comparisons
508*6777b538SAndroid Build Coastguard Worker     // because the list above is all lowercase, and a GURL's host name will
509*6777b538SAndroid Build Coastguard Worker     // always be canonicalized to lowercase as well.
510*6777b538SAndroid Build Coastguard Worker     if (host.ends_with(suffix)) {
511*6777b538SAndroid Build Coastguard Worker       return true;
512*6777b538SAndroid Build Coastguard Worker     }
513*6777b538SAndroid Build Coastguard Worker   }
514*6777b538SAndroid Build Coastguard Worker   return false;
515*6777b538SAndroid Build Coastguard Worker }
516*6777b538SAndroid Build Coastguard Worker 
IsGoogleHostWithAlpnH3(std::string_view host)517*6777b538SAndroid Build Coastguard Worker bool IsGoogleHostWithAlpnH3(std::string_view host) {
518*6777b538SAndroid Build Coastguard Worker   return base::EqualsCaseInsensitiveASCII(host, "google.com") ||
519*6777b538SAndroid Build Coastguard Worker          base::EqualsCaseInsensitiveASCII(host, "www.google.com");
520*6777b538SAndroid Build Coastguard Worker }
521*6777b538SAndroid Build Coastguard Worker 
IsLocalHostname(std::string_view host)522*6777b538SAndroid Build Coastguard Worker bool IsLocalHostname(std::string_view host) {
523*6777b538SAndroid Build Coastguard Worker   // Remove any trailing '.'.
524*6777b538SAndroid Build Coastguard Worker   if (!host.empty() && *host.rbegin() == '.')
525*6777b538SAndroid Build Coastguard Worker     host.remove_suffix(1);
526*6777b538SAndroid Build Coastguard Worker 
527*6777b538SAndroid Build Coastguard Worker   return base::EqualsCaseInsensitiveASCII(host, "localhost") ||
528*6777b538SAndroid Build Coastguard Worker          IsNormalizedLocalhostTLD(host);
529*6777b538SAndroid Build Coastguard Worker }
530*6777b538SAndroid Build Coastguard Worker 
UnescapePercentEncodedUrl(std::string_view input)531*6777b538SAndroid Build Coastguard Worker std::string UnescapePercentEncodedUrl(std::string_view input) {
532*6777b538SAndroid Build Coastguard Worker   std::string result(input);
533*6777b538SAndroid Build Coastguard Worker   // Replace any 0x2B (+) with 0x20 (SP).
534*6777b538SAndroid Build Coastguard Worker   for (char& c : result) {
535*6777b538SAndroid Build Coastguard Worker     if (c == '+') {
536*6777b538SAndroid Build Coastguard Worker       c = ' ';
537*6777b538SAndroid Build Coastguard Worker     }
538*6777b538SAndroid Build Coastguard Worker   }
539*6777b538SAndroid Build Coastguard Worker   // Run UTF-8 decoding without BOM on the percent-decoding.
540*6777b538SAndroid Build Coastguard Worker   url::RawCanonOutputT<char16_t> canon_output;
541*6777b538SAndroid Build Coastguard Worker   url::DecodeURLEscapeSequences(result, url::DecodeURLMode::kUTF8,
542*6777b538SAndroid Build Coastguard Worker                                 &canon_output);
543*6777b538SAndroid Build Coastguard Worker   return base::UTF16ToUTF8(canon_output.view());
544*6777b538SAndroid Build Coastguard Worker }
545*6777b538SAndroid Build Coastguard Worker 
546*6777b538SAndroid Build Coastguard Worker }  // namespace net
547