xref: /aosp_15_r20/external/cronet/net/base/url_util.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker // This file contains a set of utility functions related to parsing,
6*6777b538SAndroid Build Coastguard Worker // manipulating, and interacting with URLs and hostnames. These functions are
7*6777b538SAndroid Build Coastguard Worker // intended to be of a text-processing nature, and should not attempt to use any
8*6777b538SAndroid Build Coastguard Worker // networking or blocking services.
9*6777b538SAndroid Build Coastguard Worker 
10*6777b538SAndroid Build Coastguard Worker #ifndef NET_BASE_URL_UTIL_H_
11*6777b538SAndroid Build Coastguard Worker #define NET_BASE_URL_UTIL_H_
12*6777b538SAndroid Build Coastguard Worker 
13*6777b538SAndroid Build Coastguard Worker #include <optional>
14*6777b538SAndroid Build Coastguard Worker #include <string>
15*6777b538SAndroid Build Coastguard Worker #include <string_view>
16*6777b538SAndroid Build Coastguard Worker 
17*6777b538SAndroid Build Coastguard Worker #include "base/memory/raw_ref.h"
18*6777b538SAndroid Build Coastguard Worker #include "net/base/net_export.h"
19*6777b538SAndroid Build Coastguard Worker #include "url/third_party/mozilla/url_parse.h"
20*6777b538SAndroid Build Coastguard Worker 
21*6777b538SAndroid Build Coastguard Worker class GURL;
22*6777b538SAndroid Build Coastguard Worker 
23*6777b538SAndroid Build Coastguard Worker namespace url {
24*6777b538SAndroid Build Coastguard Worker struct CanonHostInfo;
25*6777b538SAndroid Build Coastguard Worker class SchemeHostPort;
26*6777b538SAndroid Build Coastguard Worker }  // namespace url
27*6777b538SAndroid Build Coastguard Worker 
28*6777b538SAndroid Build Coastguard Worker namespace net {
29*6777b538SAndroid Build Coastguard Worker 
30*6777b538SAndroid Build Coastguard Worker // Returns a new GURL by appending the given query parameter name and the
31*6777b538SAndroid Build Coastguard Worker // value. Unsafe characters in the name and the value are escaped like
32*6777b538SAndroid Build Coastguard Worker // %XX%XX. The original query component is preserved if it's present.
33*6777b538SAndroid Build Coastguard Worker //
34*6777b538SAndroid Build Coastguard Worker // Examples:
35*6777b538SAndroid Build Coastguard Worker //
36*6777b538SAndroid Build Coastguard Worker // AppendQueryParameter(GURL("http://example.com"), "name", "value").spec()
37*6777b538SAndroid Build Coastguard Worker // => "http://example.com?name=value"
38*6777b538SAndroid Build Coastguard Worker // AppendQueryParameter(GURL("http://example.com?x=y"), "name", "value").spec()
39*6777b538SAndroid Build Coastguard Worker // => "http://example.com?x=y&name=value"
40*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL AppendQueryParameter(const GURL& url,
41*6777b538SAndroid Build Coastguard Worker                                      std::string_view name,
42*6777b538SAndroid Build Coastguard Worker                                      std::string_view value);
43*6777b538SAndroid Build Coastguard Worker 
44*6777b538SAndroid Build Coastguard Worker // Returns a new GURL by appending or replacing the given query parameter name
45*6777b538SAndroid Build Coastguard Worker // and the value. If `name` appears more than once, only the first name-value
46*6777b538SAndroid Build Coastguard Worker // pair is replaced. Unsafe characters in the name and the value are escaped
47*6777b538SAndroid Build Coastguard Worker // like %XX%XX. The original query component is preserved if it's present.
48*6777b538SAndroid Build Coastguard Worker // Using `std::nullopt` for `value` will remove the `name` parameter.
49*6777b538SAndroid Build Coastguard Worker //
50*6777b538SAndroid Build Coastguard Worker // Examples:
51*6777b538SAndroid Build Coastguard Worker //
52*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceQueryParameter(
53*6777b538SAndroid Build Coastguard Worker //     GURL("http://example.com"), "name", "new").spec()
54*6777b538SAndroid Build Coastguard Worker // => "http://example.com?name=value"
55*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceQueryParameter(
56*6777b538SAndroid Build Coastguard Worker //     GURL("http://example.com?x=y&name=old"), "name", "new").spec()
57*6777b538SAndroid Build Coastguard Worker // => "http://example.com?x=y&name=new"
58*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceQueryParameter(
59*6777b538SAndroid Build Coastguard Worker //     GURL("http://example.com?x=y&name=old"), "name", std::nullopt).spec()
60*6777b538SAndroid Build Coastguard Worker // => "http://example.com?x=y&"
61*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL
62*6777b538SAndroid Build Coastguard Worker AppendOrReplaceQueryParameter(const GURL& url,
63*6777b538SAndroid Build Coastguard Worker                               std::string_view name,
64*6777b538SAndroid Build Coastguard Worker                               std::optional<std::string_view> value);
65*6777b538SAndroid Build Coastguard Worker 
66*6777b538SAndroid Build Coastguard Worker // Returns a new GURL by appending the provided ref (also named fragment).
67*6777b538SAndroid Build Coastguard Worker // Unsafe characters are escaped. The original fragment is replaced
68*6777b538SAndroid Build Coastguard Worker // if it's present.
69*6777b538SAndroid Build Coastguard Worker //
70*6777b538SAndroid Build Coastguard Worker // Examples:
71*6777b538SAndroid Build Coastguard Worker //
72*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceRef(
73*6777b538SAndroid Build Coastguard Worker //     GURL("http://example.com"), "ref").spec()
74*6777b538SAndroid Build Coastguard Worker // => "http://example.com#ref"
75*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceRef(
76*6777b538SAndroid Build Coastguard Worker //     GURL("http://example.com#ref"), "ref2").spec()
77*6777b538SAndroid Build Coastguard Worker // => "http://example.com#ref2"
78*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL AppendOrReplaceRef(const GURL& url,
79*6777b538SAndroid Build Coastguard Worker                                    const std::string_view& ref);
80*6777b538SAndroid Build Coastguard Worker 
81*6777b538SAndroid Build Coastguard Worker // Iterates over the key-value pairs in the query portion of |url|.
82*6777b538SAndroid Build Coastguard Worker // NOTE: QueryIterator stores reference to |url| and creates std::string_view
83*6777b538SAndroid Build Coastguard Worker // instances which refer to the data inside |url| query. Therefore |url| must
84*6777b538SAndroid Build Coastguard Worker // outlive QueryIterator and all std::string_view objects returned from GetKey
85*6777b538SAndroid Build Coastguard Worker // and GetValue methods.
86*6777b538SAndroid Build Coastguard Worker class NET_EXPORT QueryIterator {
87*6777b538SAndroid Build Coastguard Worker  public:
88*6777b538SAndroid Build Coastguard Worker   explicit QueryIterator(const GURL& url);
89*6777b538SAndroid Build Coastguard Worker   QueryIterator(const QueryIterator&) = delete;
90*6777b538SAndroid Build Coastguard Worker   QueryIterator& operator=(const QueryIterator&) = delete;
91*6777b538SAndroid Build Coastguard Worker   ~QueryIterator();
92*6777b538SAndroid Build Coastguard Worker 
93*6777b538SAndroid Build Coastguard Worker   std::string_view GetKey() const;
94*6777b538SAndroid Build Coastguard Worker   std::string_view GetValue() const;
95*6777b538SAndroid Build Coastguard Worker   const std::string& GetUnescapedValue();
96*6777b538SAndroid Build Coastguard Worker 
97*6777b538SAndroid Build Coastguard Worker   bool IsAtEnd() const;
98*6777b538SAndroid Build Coastguard Worker   void Advance();
99*6777b538SAndroid Build Coastguard Worker 
100*6777b538SAndroid Build Coastguard Worker  private:
101*6777b538SAndroid Build Coastguard Worker   const raw_ref<const GURL> url_;
102*6777b538SAndroid Build Coastguard Worker   url::Component query_;
103*6777b538SAndroid Build Coastguard Worker   bool at_end_;
104*6777b538SAndroid Build Coastguard Worker   url::Component key_;
105*6777b538SAndroid Build Coastguard Worker   url::Component value_;
106*6777b538SAndroid Build Coastguard Worker   std::string unescaped_value_;
107*6777b538SAndroid Build Coastguard Worker };
108*6777b538SAndroid Build Coastguard Worker 
109*6777b538SAndroid Build Coastguard Worker // Looks for |search_key| in the query portion of |url|. Returns true if the
110*6777b538SAndroid Build Coastguard Worker // key is found and sets |out_value| to the unescaped value for the key.
111*6777b538SAndroid Build Coastguard Worker // Returns false if the key is not found.
112*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool GetValueForKeyInQuery(const GURL& url,
113*6777b538SAndroid Build Coastguard Worker                                       std::string_view search_key,
114*6777b538SAndroid Build Coastguard Worker                                       std::string* out_value);
115*6777b538SAndroid Build Coastguard Worker 
116*6777b538SAndroid Build Coastguard Worker // Splits an input of the form <host>[":"<port>] into its consitituent parts.
117*6777b538SAndroid Build Coastguard Worker // Saves the result into |*host| and |*port|. If the input did not have
118*6777b538SAndroid Build Coastguard Worker // the optional port, sets |*port| to -1.
119*6777b538SAndroid Build Coastguard Worker // Returns true if the parsing was successful, false otherwise.
120*6777b538SAndroid Build Coastguard Worker // The returned host is NOT canonicalized, and may be invalid.
121*6777b538SAndroid Build Coastguard Worker //
122*6777b538SAndroid Build Coastguard Worker // IPv6 literals must be specified in a bracketed form, for instance:
123*6777b538SAndroid Build Coastguard Worker //   [::1]:90 and [::1]
124*6777b538SAndroid Build Coastguard Worker //
125*6777b538SAndroid Build Coastguard Worker // The resultant |*host| in both cases will be "::1" (not bracketed).
126*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool ParseHostAndPort(std::string_view input,
127*6777b538SAndroid Build Coastguard Worker                                  std::string* host,
128*6777b538SAndroid Build Coastguard Worker                                  int* port);
129*6777b538SAndroid Build Coastguard Worker 
130*6777b538SAndroid Build Coastguard Worker // Returns a host:port string for the given URL.
131*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostAndPort(const GURL& url);
132*6777b538SAndroid Build Coastguard Worker 
133*6777b538SAndroid Build Coastguard Worker // Returns a host[:port] string for the given URL, where the port is omitted
134*6777b538SAndroid Build Coastguard Worker // if it is the default for the URL's scheme.
135*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostAndOptionalPort(const GURL& url);
136*6777b538SAndroid Build Coastguard Worker 
137*6777b538SAndroid Build Coastguard Worker // Just like above, but takes a SchemeHostPort.
138*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostAndOptionalPort(
139*6777b538SAndroid Build Coastguard Worker     const url::SchemeHostPort& scheme_host_port);
140*6777b538SAndroid Build Coastguard Worker 
141*6777b538SAndroid Build Coastguard Worker // Returns the hostname by trimming the ending dot, if one exists.
142*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string TrimEndingDot(std::string_view host);
143*6777b538SAndroid Build Coastguard Worker 
144*6777b538SAndroid Build Coastguard Worker // Returns either the host from |url|, or, if the host is empty, the full spec.
145*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
146*6777b538SAndroid Build Coastguard Worker 
147*6777b538SAndroid Build Coastguard Worker // Returns the given domain minus its leftmost label, or the empty string if the
148*6777b538SAndroid Build Coastguard Worker // given domain is just a single label. For normal domain names (not IP
149*6777b538SAndroid Build Coastguard Worker // addresses), this represents the "superdomain" of the given domain.
150*6777b538SAndroid Build Coastguard Worker // Note that this does not take into account anything like the Public Suffix
151*6777b538SAndroid Build Coastguard Worker // List, so the superdomain may end up being a bare eTLD. The returned string is
152*6777b538SAndroid Build Coastguard Worker // not guaranteed to be a valid or canonical hostname, or to make any sense at
153*6777b538SAndroid Build Coastguard Worker // all.
154*6777b538SAndroid Build Coastguard Worker //
155*6777b538SAndroid Build Coastguard Worker // Examples:
156*6777b538SAndroid Build Coastguard Worker //
157*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("assets.example.com") -> "example.com"
158*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("example.net") -> "net"
159*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("littlebox") -> ""
160*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("127.0.0.1") -> "0.0.1"
161*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetSuperdomain(std::string_view domain);
162*6777b538SAndroid Build Coastguard Worker 
163*6777b538SAndroid Build Coastguard Worker // Returns whether |subdomain| is a subdomain of (or identical to)
164*6777b538SAndroid Build Coastguard Worker // |superdomain|, if both are hostnames (not IP addresses -- for which this
165*6777b538SAndroid Build Coastguard Worker // function is nonsensical). Does not consider the Public Suffix List.
166*6777b538SAndroid Build Coastguard Worker // Returns true if both input strings are empty.
167*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsSubdomainOf(std::string_view subdomain,
168*6777b538SAndroid Build Coastguard Worker                               std::string_view superdomain);
169*6777b538SAndroid Build Coastguard Worker 
170*6777b538SAndroid Build Coastguard Worker // Canonicalizes |host| and returns it.  Also fills |host_info| with
171*6777b538SAndroid Build Coastguard Worker // IP address information.  |host_info| must not be NULL.
172*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string CanonicalizeHost(std::string_view host,
173*6777b538SAndroid Build Coastguard Worker                                         url::CanonHostInfo* host_info);
174*6777b538SAndroid Build Coastguard Worker 
175*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is not an IP address and is compliant with a set of
176*6777b538SAndroid Build Coastguard Worker // rules based on RFC 1738 and tweaked to be compatible with the real world.
177*6777b538SAndroid Build Coastguard Worker // The rules are:
178*6777b538SAndroid Build Coastguard Worker //   * One or more non-empty labels separated by '.', each no more than 63
179*6777b538SAndroid Build Coastguard Worker //     characters.
180*6777b538SAndroid Build Coastguard Worker //   * Each component contains only alphanumeric characters and '-' or '_'
181*6777b538SAndroid Build Coastguard Worker //   * The last component begins with an alphanumeric character
182*6777b538SAndroid Build Coastguard Worker //   * Optional trailing dot after last component (means "treat as FQDN")
183*6777b538SAndroid Build Coastguard Worker //   * Total size (including optional trailing dot, whether or not actually
184*6777b538SAndroid Build Coastguard Worker //     present in `host`) no more than 254 characters.
185*6777b538SAndroid Build Coastguard Worker //
186*6777b538SAndroid Build Coastguard Worker // NOTE: You should only pass in hosts that have been returned from
187*6777b538SAndroid Build Coastguard Worker // CanonicalizeHost(), or you may not get accurate results.
188*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsCanonicalizedHostCompliant(std::string_view host);
189*6777b538SAndroid Build Coastguard Worker 
190*6777b538SAndroid Build Coastguard Worker // Returns true if |hostname| contains a non-registerable or non-assignable
191*6777b538SAndroid Build Coastguard Worker // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address
192*6777b538SAndroid Build Coastguard Worker // that falls in an range reserved for non-publicly routable networks.
193*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsHostnameNonUnique(std::string_view hostname);
194*6777b538SAndroid Build Coastguard Worker 
195*6777b538SAndroid Build Coastguard Worker // Returns true if the host part of |url| is a local host name according to
196*6777b538SAndroid Build Coastguard Worker // HostStringIsLocalhost.
197*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsLocalhost(const GURL& url);
198*6777b538SAndroid Build Coastguard Worker 
199*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is one of the local hostnames
200*6777b538SAndroid Build Coastguard Worker // (e.g. "localhost") or IP addresses (IPv4 127.0.0.0/8 or IPv6 ::1).
201*6777b538SAndroid Build Coastguard Worker // "[::1]" is not detected as a local hostname. Do not use this method to check
202*6777b538SAndroid Build Coastguard Worker // whether the host part of a URL is a local host name; use IsLocalhost instead.
203*6777b538SAndroid Build Coastguard Worker //
204*6777b538SAndroid Build Coastguard Worker // Note that this function does not check for IP addresses other than
205*6777b538SAndroid Build Coastguard Worker // the above, although other IP addresses may point to the local
206*6777b538SAndroid Build Coastguard Worker // machine.
207*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool HostStringIsLocalhost(std::string_view host);
208*6777b538SAndroid Build Coastguard Worker 
209*6777b538SAndroid Build Coastguard Worker // Strip the portions of |url| that aren't core to the network request.
210*6777b538SAndroid Build Coastguard Worker //   - user name / password
211*6777b538SAndroid Build Coastguard Worker //   - reference section
212*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL SimplifyUrlForRequest(const GURL& url);
213*6777b538SAndroid Build Coastguard Worker 
214*6777b538SAndroid Build Coastguard Worker // Changes scheme "ws" to "http" and "wss" to "https". This is useful for origin
215*6777b538SAndroid Build Coastguard Worker // checks and authentication, where WebSocket URLs are treated as if they were
216*6777b538SAndroid Build Coastguard Worker // HTTP. It is an error to call this function with a url with a scheme other
217*6777b538SAndroid Build Coastguard Worker // than "ws" or "wss".
218*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url);
219*6777b538SAndroid Build Coastguard Worker 
220*6777b538SAndroid Build Coastguard Worker // Returns whether the given url scheme is of a standard scheme type that can
221*6777b538SAndroid Build Coastguard Worker // have hostnames representing domains (i.e. network hosts).
222*6777b538SAndroid Build Coastguard Worker // See url::SchemeType.
223*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsStandardSchemeWithNetworkHost(std::string_view scheme);
224*6777b538SAndroid Build Coastguard Worker 
225*6777b538SAndroid Build Coastguard Worker // Extracts the unescaped username/password from |url|, saving the results
226*6777b538SAndroid Build Coastguard Worker // into |*username| and |*password|.
227*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url,
228*6777b538SAndroid Build Coastguard Worker                                            std::u16string* username,
229*6777b538SAndroid Build Coastguard Worker                                            std::u16string* password);
230*6777b538SAndroid Build Coastguard Worker 
231*6777b538SAndroid Build Coastguard Worker // Returns true if the url's host is a Google server. This should only be used
232*6777b538SAndroid Build Coastguard Worker // for histograms and shouldn't be used to affect behavior.
233*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url);
234*6777b538SAndroid Build Coastguard Worker 
235*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is the hostname of a Google server. This should only
236*6777b538SAndroid Build Coastguard Worker // be used for histograms and shouldn't be used to affect behavior.
237*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool IsGoogleHost(std::string_view host);
238*6777b538SAndroid Build Coastguard Worker 
239*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is the hostname of a Google server and HTTPS DNS
240*6777b538SAndroid Build Coastguard Worker // record of |host| is expected to indicate H3 support. This should only be used
241*6777b538SAndroid Build Coastguard Worker // for histograms and shouldn't be used to affect behavior.
242*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool IsGoogleHostWithAlpnH3(std::string_view host);
243*6777b538SAndroid Build Coastguard Worker 
244*6777b538SAndroid Build Coastguard Worker // This function tests |host| to see if it is of any local hostname form.
245*6777b538SAndroid Build Coastguard Worker // |host| is normalized before being tested.
246*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool IsLocalHostname(std::string_view host);
247*6777b538SAndroid Build Coastguard Worker 
248*6777b538SAndroid Build Coastguard Worker // The notion of unescaping used in the application/x-www-form-urlencoded
249*6777b538SAndroid Build Coastguard Worker // parser. https://url.spec.whatwg.org/#concept-urlencoded-parser
250*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE std::string UnescapePercentEncodedUrl(
251*6777b538SAndroid Build Coastguard Worker     std::string_view input);
252*6777b538SAndroid Build Coastguard Worker 
253*6777b538SAndroid Build Coastguard Worker }  // namespace net
254*6777b538SAndroid Build Coastguard Worker 
255*6777b538SAndroid Build Coastguard Worker #endif  // NET_BASE_URL_UTIL_H_
256