xref: /aosp_15_r20/external/cronet/net/base/url_util.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This file contains a set of utility functions related to parsing,
6 // manipulating, and interacting with URLs and hostnames. These functions are
7 // intended to be of a text-processing nature, and should not attempt to use any
8 // networking or blocking services.
9 
10 #ifndef NET_BASE_URL_UTIL_H_
11 #define NET_BASE_URL_UTIL_H_
12 
13 #include <optional>
14 #include <string>
15 #include <string_view>
16 
17 #include "base/memory/raw_ref.h"
18 #include "net/base/net_export.h"
19 #include "url/third_party/mozilla/url_parse.h"
20 
21 class GURL;
22 
23 namespace url {
24 struct CanonHostInfo;
25 class SchemeHostPort;
26 }  // namespace url
27 
28 namespace net {
29 
30 // Returns a new GURL by appending the given query parameter name and the
31 // value. Unsafe characters in the name and the value are escaped like
32 // %XX%XX. The original query component is preserved if it's present.
33 //
34 // Examples:
35 //
36 // AppendQueryParameter(GURL("http://example.com"), "name", "value").spec()
37 // => "http://example.com?name=value"
38 // AppendQueryParameter(GURL("http://example.com?x=y"), "name", "value").spec()
39 // => "http://example.com?x=y&name=value"
40 NET_EXPORT GURL AppendQueryParameter(const GURL& url,
41                                      std::string_view name,
42                                      std::string_view value);
43 
44 // Returns a new GURL by appending or replacing the given query parameter name
45 // and the value. If `name` appears more than once, only the first name-value
46 // pair is replaced. Unsafe characters in the name and the value are escaped
47 // like %XX%XX. The original query component is preserved if it's present.
48 // Using `std::nullopt` for `value` will remove the `name` parameter.
49 //
50 // Examples:
51 //
52 // AppendOrReplaceQueryParameter(
53 //     GURL("http://example.com"), "name", "new").spec()
54 // => "http://example.com?name=value"
55 // AppendOrReplaceQueryParameter(
56 //     GURL("http://example.com?x=y&name=old"), "name", "new").spec()
57 // => "http://example.com?x=y&name=new"
58 // AppendOrReplaceQueryParameter(
59 //     GURL("http://example.com?x=y&name=old"), "name", std::nullopt).spec()
60 // => "http://example.com?x=y&"
61 NET_EXPORT GURL
62 AppendOrReplaceQueryParameter(const GURL& url,
63                               std::string_view name,
64                               std::optional<std::string_view> value);
65 
66 // Returns a new GURL by appending the provided ref (also named fragment).
67 // Unsafe characters are escaped. The original fragment is replaced
68 // if it's present.
69 //
70 // Examples:
71 //
72 // AppendOrReplaceRef(
73 //     GURL("http://example.com"), "ref").spec()
74 // => "http://example.com#ref"
75 // AppendOrReplaceRef(
76 //     GURL("http://example.com#ref"), "ref2").spec()
77 // => "http://example.com#ref2"
78 NET_EXPORT GURL AppendOrReplaceRef(const GURL& url,
79                                    const std::string_view& ref);
80 
81 // Iterates over the key-value pairs in the query portion of |url|.
82 // NOTE: QueryIterator stores reference to |url| and creates std::string_view
83 // instances which refer to the data inside |url| query. Therefore |url| must
84 // outlive QueryIterator and all std::string_view objects returned from GetKey
85 // and GetValue methods.
86 class NET_EXPORT QueryIterator {
87  public:
88   explicit QueryIterator(const GURL& url);
89   QueryIterator(const QueryIterator&) = delete;
90   QueryIterator& operator=(const QueryIterator&) = delete;
91   ~QueryIterator();
92 
93   std::string_view GetKey() const;
94   std::string_view GetValue() const;
95   const std::string& GetUnescapedValue();
96 
97   bool IsAtEnd() const;
98   void Advance();
99 
100  private:
101   const raw_ref<const GURL> url_;
102   url::Component query_;
103   bool at_end_;
104   url::Component key_;
105   url::Component value_;
106   std::string unescaped_value_;
107 };
108 
109 // Looks for |search_key| in the query portion of |url|. Returns true if the
110 // key is found and sets |out_value| to the unescaped value for the key.
111 // Returns false if the key is not found.
112 NET_EXPORT bool GetValueForKeyInQuery(const GURL& url,
113                                       std::string_view search_key,
114                                       std::string* out_value);
115 
116 // Splits an input of the form <host>[":"<port>] into its consitituent parts.
117 // Saves the result into |*host| and |*port|. If the input did not have
118 // the optional port, sets |*port| to -1.
119 // Returns true if the parsing was successful, false otherwise.
120 // The returned host is NOT canonicalized, and may be invalid.
121 //
122 // IPv6 literals must be specified in a bracketed form, for instance:
123 //   [::1]:90 and [::1]
124 //
125 // The resultant |*host| in both cases will be "::1" (not bracketed).
126 NET_EXPORT bool ParseHostAndPort(std::string_view input,
127                                  std::string* host,
128                                  int* port);
129 
130 // Returns a host:port string for the given URL.
131 NET_EXPORT std::string GetHostAndPort(const GURL& url);
132 
133 // Returns a host[:port] string for the given URL, where the port is omitted
134 // if it is the default for the URL's scheme.
135 NET_EXPORT std::string GetHostAndOptionalPort(const GURL& url);
136 
137 // Just like above, but takes a SchemeHostPort.
138 NET_EXPORT std::string GetHostAndOptionalPort(
139     const url::SchemeHostPort& scheme_host_port);
140 
141 // Returns the hostname by trimming the ending dot, if one exists.
142 NET_EXPORT std::string TrimEndingDot(std::string_view host);
143 
144 // Returns either the host from |url|, or, if the host is empty, the full spec.
145 NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
146 
147 // Returns the given domain minus its leftmost label, or the empty string if the
148 // given domain is just a single label. For normal domain names (not IP
149 // addresses), this represents the "superdomain" of the given domain.
150 // Note that this does not take into account anything like the Public Suffix
151 // List, so the superdomain may end up being a bare eTLD. The returned string is
152 // not guaranteed to be a valid or canonical hostname, or to make any sense at
153 // all.
154 //
155 // Examples:
156 //
157 // GetSuperdomain("assets.example.com") -> "example.com"
158 // GetSuperdomain("example.net") -> "net"
159 // GetSuperdomain("littlebox") -> ""
160 // GetSuperdomain("127.0.0.1") -> "0.0.1"
161 NET_EXPORT std::string GetSuperdomain(std::string_view domain);
162 
163 // Returns whether |subdomain| is a subdomain of (or identical to)
164 // |superdomain|, if both are hostnames (not IP addresses -- for which this
165 // function is nonsensical). Does not consider the Public Suffix List.
166 // Returns true if both input strings are empty.
167 NET_EXPORT bool IsSubdomainOf(std::string_view subdomain,
168                               std::string_view superdomain);
169 
170 // Canonicalizes |host| and returns it.  Also fills |host_info| with
171 // IP address information.  |host_info| must not be NULL.
172 NET_EXPORT std::string CanonicalizeHost(std::string_view host,
173                                         url::CanonHostInfo* host_info);
174 
175 // Returns true if |host| is not an IP address and is compliant with a set of
176 // rules based on RFC 1738 and tweaked to be compatible with the real world.
177 // The rules are:
178 //   * One or more non-empty labels separated by '.', each no more than 63
179 //     characters.
180 //   * Each component contains only alphanumeric characters and '-' or '_'
181 //   * The last component begins with an alphanumeric character
182 //   * Optional trailing dot after last component (means "treat as FQDN")
183 //   * Total size (including optional trailing dot, whether or not actually
184 //     present in `host`) no more than 254 characters.
185 //
186 // NOTE: You should only pass in hosts that have been returned from
187 // CanonicalizeHost(), or you may not get accurate results.
188 NET_EXPORT bool IsCanonicalizedHostCompliant(std::string_view host);
189 
190 // Returns true if |hostname| contains a non-registerable or non-assignable
191 // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address
192 // that falls in an range reserved for non-publicly routable networks.
193 NET_EXPORT bool IsHostnameNonUnique(std::string_view hostname);
194 
195 // Returns true if the host part of |url| is a local host name according to
196 // HostStringIsLocalhost.
197 NET_EXPORT bool IsLocalhost(const GURL& url);
198 
199 // Returns true if |host| is one of the local hostnames
200 // (e.g. "localhost") or IP addresses (IPv4 127.0.0.0/8 or IPv6 ::1).
201 // "[::1]" is not detected as a local hostname. Do not use this method to check
202 // whether the host part of a URL is a local host name; use IsLocalhost instead.
203 //
204 // Note that this function does not check for IP addresses other than
205 // the above, although other IP addresses may point to the local
206 // machine.
207 NET_EXPORT bool HostStringIsLocalhost(std::string_view host);
208 
209 // Strip the portions of |url| that aren't core to the network request.
210 //   - user name / password
211 //   - reference section
212 NET_EXPORT GURL SimplifyUrlForRequest(const GURL& url);
213 
214 // Changes scheme "ws" to "http" and "wss" to "https". This is useful for origin
215 // checks and authentication, where WebSocket URLs are treated as if they were
216 // HTTP. It is an error to call this function with a url with a scheme other
217 // than "ws" or "wss".
218 NET_EXPORT GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url);
219 
220 // Returns whether the given url scheme is of a standard scheme type that can
221 // have hostnames representing domains (i.e. network hosts).
222 // See url::SchemeType.
223 NET_EXPORT bool IsStandardSchemeWithNetworkHost(std::string_view scheme);
224 
225 // Extracts the unescaped username/password from |url|, saving the results
226 // into |*username| and |*password|.
227 NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url,
228                                            std::u16string* username,
229                                            std::u16string* password);
230 
231 // Returns true if the url's host is a Google server. This should only be used
232 // for histograms and shouldn't be used to affect behavior.
233 NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url);
234 
235 // Returns true if |host| is the hostname of a Google server. This should only
236 // be used for histograms and shouldn't be used to affect behavior.
237 NET_EXPORT_PRIVATE bool IsGoogleHost(std::string_view host);
238 
239 // Returns true if |host| is the hostname of a Google server and HTTPS DNS
240 // record of |host| is expected to indicate H3 support. This should only be used
241 // for histograms and shouldn't be used to affect behavior.
242 NET_EXPORT_PRIVATE bool IsGoogleHostWithAlpnH3(std::string_view host);
243 
244 // This function tests |host| to see if it is of any local hostname form.
245 // |host| is normalized before being tested.
246 NET_EXPORT_PRIVATE bool IsLocalHostname(std::string_view host);
247 
248 // The notion of unescaping used in the application/x-www-form-urlencoded
249 // parser. https://url.spec.whatwg.org/#concept-urlencoded-parser
250 NET_EXPORT_PRIVATE std::string UnescapePercentEncodedUrl(
251     std::string_view input);
252 
253 }  // namespace net
254 
255 #endif  // NET_BASE_URL_UTIL_H_
256