1 // Copyright 2013 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This file contains a set of utility functions related to parsing, 6 // manipulating, and interacting with URLs and hostnames. These functions are 7 // intended to be of a text-processing nature, and should not attempt to use any 8 // networking or blocking services. 9 10 #ifndef NET_BASE_URL_UTIL_H_ 11 #define NET_BASE_URL_UTIL_H_ 12 13 #include <optional> 14 #include <string> 15 #include <string_view> 16 17 #include "base/memory/raw_ref.h" 18 #include "net/base/net_export.h" 19 #include "url/third_party/mozilla/url_parse.h" 20 21 class GURL; 22 23 namespace url { 24 struct CanonHostInfo; 25 class SchemeHostPort; 26 } // namespace url 27 28 namespace net { 29 30 // Returns a new GURL by appending the given query parameter name and the 31 // value. Unsafe characters in the name and the value are escaped like 32 // %XX%XX. The original query component is preserved if it's present. 33 // 34 // Examples: 35 // 36 // AppendQueryParameter(GURL("http://example.com"), "name", "value").spec() 37 // => "http://example.com?name=value" 38 // AppendQueryParameter(GURL("http://example.com?x=y"), "name", "value").spec() 39 // => "http://example.com?x=y&name=value" 40 NET_EXPORT GURL AppendQueryParameter(const GURL& url, 41 std::string_view name, 42 std::string_view value); 43 44 // Returns a new GURL by appending or replacing the given query parameter name 45 // and the value. If `name` appears more than once, only the first name-value 46 // pair is replaced. Unsafe characters in the name and the value are escaped 47 // like %XX%XX. The original query component is preserved if it's present. 48 // Using `std::nullopt` for `value` will remove the `name` parameter. 49 // 50 // Examples: 51 // 52 // AppendOrReplaceQueryParameter( 53 // GURL("http://example.com"), "name", "new").spec() 54 // => "http://example.com?name=value" 55 // AppendOrReplaceQueryParameter( 56 // GURL("http://example.com?x=y&name=old"), "name", "new").spec() 57 // => "http://example.com?x=y&name=new" 58 // AppendOrReplaceQueryParameter( 59 // GURL("http://example.com?x=y&name=old"), "name", std::nullopt).spec() 60 // => "http://example.com?x=y&" 61 NET_EXPORT GURL 62 AppendOrReplaceQueryParameter(const GURL& url, 63 std::string_view name, 64 std::optional<std::string_view> value); 65 66 // Returns a new GURL by appending the provided ref (also named fragment). 67 // Unsafe characters are escaped. The original fragment is replaced 68 // if it's present. 69 // 70 // Examples: 71 // 72 // AppendOrReplaceRef( 73 // GURL("http://example.com"), "ref").spec() 74 // => "http://example.com#ref" 75 // AppendOrReplaceRef( 76 // GURL("http://example.com#ref"), "ref2").spec() 77 // => "http://example.com#ref2" 78 NET_EXPORT GURL AppendOrReplaceRef(const GURL& url, 79 const std::string_view& ref); 80 81 // Iterates over the key-value pairs in the query portion of |url|. 82 // NOTE: QueryIterator stores reference to |url| and creates std::string_view 83 // instances which refer to the data inside |url| query. Therefore |url| must 84 // outlive QueryIterator and all std::string_view objects returned from GetKey 85 // and GetValue methods. 86 class NET_EXPORT QueryIterator { 87 public: 88 explicit QueryIterator(const GURL& url); 89 QueryIterator(const QueryIterator&) = delete; 90 QueryIterator& operator=(const QueryIterator&) = delete; 91 ~QueryIterator(); 92 93 std::string_view GetKey() const; 94 std::string_view GetValue() const; 95 const std::string& GetUnescapedValue(); 96 97 bool IsAtEnd() const; 98 void Advance(); 99 100 private: 101 const raw_ref<const GURL> url_; 102 url::Component query_; 103 bool at_end_; 104 url::Component key_; 105 url::Component value_; 106 std::string unescaped_value_; 107 }; 108 109 // Looks for |search_key| in the query portion of |url|. Returns true if the 110 // key is found and sets |out_value| to the unescaped value for the key. 111 // Returns false if the key is not found. 112 NET_EXPORT bool GetValueForKeyInQuery(const GURL& url, 113 std::string_view search_key, 114 std::string* out_value); 115 116 // Splits an input of the form <host>[":"<port>] into its consitituent parts. 117 // Saves the result into |*host| and |*port|. If the input did not have 118 // the optional port, sets |*port| to -1. 119 // Returns true if the parsing was successful, false otherwise. 120 // The returned host is NOT canonicalized, and may be invalid. 121 // 122 // IPv6 literals must be specified in a bracketed form, for instance: 123 // [::1]:90 and [::1] 124 // 125 // The resultant |*host| in both cases will be "::1" (not bracketed). 126 NET_EXPORT bool ParseHostAndPort(std::string_view input, 127 std::string* host, 128 int* port); 129 130 // Returns a host:port string for the given URL. 131 NET_EXPORT std::string GetHostAndPort(const GURL& url); 132 133 // Returns a host[:port] string for the given URL, where the port is omitted 134 // if it is the default for the URL's scheme. 135 NET_EXPORT std::string GetHostAndOptionalPort(const GURL& url); 136 137 // Just like above, but takes a SchemeHostPort. 138 NET_EXPORT std::string GetHostAndOptionalPort( 139 const url::SchemeHostPort& scheme_host_port); 140 141 // Returns the hostname by trimming the ending dot, if one exists. 142 NET_EXPORT std::string TrimEndingDot(std::string_view host); 143 144 // Returns either the host from |url|, or, if the host is empty, the full spec. 145 NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url); 146 147 // Returns the given domain minus its leftmost label, or the empty string if the 148 // given domain is just a single label. For normal domain names (not IP 149 // addresses), this represents the "superdomain" of the given domain. 150 // Note that this does not take into account anything like the Public Suffix 151 // List, so the superdomain may end up being a bare eTLD. The returned string is 152 // not guaranteed to be a valid or canonical hostname, or to make any sense at 153 // all. 154 // 155 // Examples: 156 // 157 // GetSuperdomain("assets.example.com") -> "example.com" 158 // GetSuperdomain("example.net") -> "net" 159 // GetSuperdomain("littlebox") -> "" 160 // GetSuperdomain("127.0.0.1") -> "0.0.1" 161 NET_EXPORT std::string GetSuperdomain(std::string_view domain); 162 163 // Returns whether |subdomain| is a subdomain of (or identical to) 164 // |superdomain|, if both are hostnames (not IP addresses -- for which this 165 // function is nonsensical). Does not consider the Public Suffix List. 166 // Returns true if both input strings are empty. 167 NET_EXPORT bool IsSubdomainOf(std::string_view subdomain, 168 std::string_view superdomain); 169 170 // Canonicalizes |host| and returns it. Also fills |host_info| with 171 // IP address information. |host_info| must not be NULL. 172 NET_EXPORT std::string CanonicalizeHost(std::string_view host, 173 url::CanonHostInfo* host_info); 174 175 // Returns true if |host| is not an IP address and is compliant with a set of 176 // rules based on RFC 1738 and tweaked to be compatible with the real world. 177 // The rules are: 178 // * One or more non-empty labels separated by '.', each no more than 63 179 // characters. 180 // * Each component contains only alphanumeric characters and '-' or '_' 181 // * The last component begins with an alphanumeric character 182 // * Optional trailing dot after last component (means "treat as FQDN") 183 // * Total size (including optional trailing dot, whether or not actually 184 // present in `host`) no more than 254 characters. 185 // 186 // NOTE: You should only pass in hosts that have been returned from 187 // CanonicalizeHost(), or you may not get accurate results. 188 NET_EXPORT bool IsCanonicalizedHostCompliant(std::string_view host); 189 190 // Returns true if |hostname| contains a non-registerable or non-assignable 191 // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address 192 // that falls in an range reserved for non-publicly routable networks. 193 NET_EXPORT bool IsHostnameNonUnique(std::string_view hostname); 194 195 // Returns true if the host part of |url| is a local host name according to 196 // HostStringIsLocalhost. 197 NET_EXPORT bool IsLocalhost(const GURL& url); 198 199 // Returns true if |host| is one of the local hostnames 200 // (e.g. "localhost") or IP addresses (IPv4 127.0.0.0/8 or IPv6 ::1). 201 // "[::1]" is not detected as a local hostname. Do not use this method to check 202 // whether the host part of a URL is a local host name; use IsLocalhost instead. 203 // 204 // Note that this function does not check for IP addresses other than 205 // the above, although other IP addresses may point to the local 206 // machine. 207 NET_EXPORT bool HostStringIsLocalhost(std::string_view host); 208 209 // Strip the portions of |url| that aren't core to the network request. 210 // - user name / password 211 // - reference section 212 NET_EXPORT GURL SimplifyUrlForRequest(const GURL& url); 213 214 // Changes scheme "ws" to "http" and "wss" to "https". This is useful for origin 215 // checks and authentication, where WebSocket URLs are treated as if they were 216 // HTTP. It is an error to call this function with a url with a scheme other 217 // than "ws" or "wss". 218 NET_EXPORT GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url); 219 220 // Returns whether the given url scheme is of a standard scheme type that can 221 // have hostnames representing domains (i.e. network hosts). 222 // See url::SchemeType. 223 NET_EXPORT bool IsStandardSchemeWithNetworkHost(std::string_view scheme); 224 225 // Extracts the unescaped username/password from |url|, saving the results 226 // into |*username| and |*password|. 227 NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url, 228 std::u16string* username, 229 std::u16string* password); 230 231 // Returns true if the url's host is a Google server. This should only be used 232 // for histograms and shouldn't be used to affect behavior. 233 NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url); 234 235 // Returns true if |host| is the hostname of a Google server. This should only 236 // be used for histograms and shouldn't be used to affect behavior. 237 NET_EXPORT_PRIVATE bool IsGoogleHost(std::string_view host); 238 239 // Returns true if |host| is the hostname of a Google server and HTTPS DNS 240 // record of |host| is expected to indicate H3 support. This should only be used 241 // for histograms and shouldn't be used to affect behavior. 242 NET_EXPORT_PRIVATE bool IsGoogleHostWithAlpnH3(std::string_view host); 243 244 // This function tests |host| to see if it is of any local hostname form. 245 // |host| is normalized before being tested. 246 NET_EXPORT_PRIVATE bool IsLocalHostname(std::string_view host); 247 248 // The notion of unescaping used in the application/x-www-form-urlencoded 249 // parser. https://url.spec.whatwg.org/#concept-urlencoded-parser 250 NET_EXPORT_PRIVATE std::string UnescapePercentEncodedUrl( 251 std::string_view input); 252 253 } // namespace net 254 255 #endif // NET_BASE_URL_UTIL_H_ 256