1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors 2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be 3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file. 4*6777b538SAndroid Build Coastguard Worker 5*6777b538SAndroid Build Coastguard Worker // This file contains a set of utility functions related to parsing, 6*6777b538SAndroid Build Coastguard Worker // manipulating, and interacting with URLs and hostnames. These functions are 7*6777b538SAndroid Build Coastguard Worker // intended to be of a text-processing nature, and should not attempt to use any 8*6777b538SAndroid Build Coastguard Worker // networking or blocking services. 9*6777b538SAndroid Build Coastguard Worker 10*6777b538SAndroid Build Coastguard Worker #ifndef NET_BASE_URL_UTIL_H_ 11*6777b538SAndroid Build Coastguard Worker #define NET_BASE_URL_UTIL_H_ 12*6777b538SAndroid Build Coastguard Worker 13*6777b538SAndroid Build Coastguard Worker #include <optional> 14*6777b538SAndroid Build Coastguard Worker #include <string> 15*6777b538SAndroid Build Coastguard Worker #include <string_view> 16*6777b538SAndroid Build Coastguard Worker 17*6777b538SAndroid Build Coastguard Worker #include "base/memory/raw_ref.h" 18*6777b538SAndroid Build Coastguard Worker #include "net/base/net_export.h" 19*6777b538SAndroid Build Coastguard Worker #include "url/third_party/mozilla/url_parse.h" 20*6777b538SAndroid Build Coastguard Worker 21*6777b538SAndroid Build Coastguard Worker class GURL; 22*6777b538SAndroid Build Coastguard Worker 23*6777b538SAndroid Build Coastguard Worker namespace url { 24*6777b538SAndroid Build Coastguard Worker struct CanonHostInfo; 25*6777b538SAndroid Build Coastguard Worker class SchemeHostPort; 26*6777b538SAndroid Build Coastguard Worker } // namespace url 27*6777b538SAndroid Build Coastguard Worker 28*6777b538SAndroid Build Coastguard Worker namespace net { 29*6777b538SAndroid Build Coastguard Worker 30*6777b538SAndroid Build Coastguard Worker // Returns a new GURL by appending the given query parameter name and the 31*6777b538SAndroid Build Coastguard Worker // value. Unsafe characters in the name and the value are escaped like 32*6777b538SAndroid Build Coastguard Worker // %XX%XX. The original query component is preserved if it's present. 33*6777b538SAndroid Build Coastguard Worker // 34*6777b538SAndroid Build Coastguard Worker // Examples: 35*6777b538SAndroid Build Coastguard Worker // 36*6777b538SAndroid Build Coastguard Worker // AppendQueryParameter(GURL("http://example.com"), "name", "value").spec() 37*6777b538SAndroid Build Coastguard Worker // => "http://example.com?name=value" 38*6777b538SAndroid Build Coastguard Worker // AppendQueryParameter(GURL("http://example.com?x=y"), "name", "value").spec() 39*6777b538SAndroid Build Coastguard Worker // => "http://example.com?x=y&name=value" 40*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL AppendQueryParameter(const GURL& url, 41*6777b538SAndroid Build Coastguard Worker std::string_view name, 42*6777b538SAndroid Build Coastguard Worker std::string_view value); 43*6777b538SAndroid Build Coastguard Worker 44*6777b538SAndroid Build Coastguard Worker // Returns a new GURL by appending or replacing the given query parameter name 45*6777b538SAndroid Build Coastguard Worker // and the value. If `name` appears more than once, only the first name-value 46*6777b538SAndroid Build Coastguard Worker // pair is replaced. Unsafe characters in the name and the value are escaped 47*6777b538SAndroid Build Coastguard Worker // like %XX%XX. The original query component is preserved if it's present. 48*6777b538SAndroid Build Coastguard Worker // Using `std::nullopt` for `value` will remove the `name` parameter. 49*6777b538SAndroid Build Coastguard Worker // 50*6777b538SAndroid Build Coastguard Worker // Examples: 51*6777b538SAndroid Build Coastguard Worker // 52*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceQueryParameter( 53*6777b538SAndroid Build Coastguard Worker // GURL("http://example.com"), "name", "new").spec() 54*6777b538SAndroid Build Coastguard Worker // => "http://example.com?name=value" 55*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceQueryParameter( 56*6777b538SAndroid Build Coastguard Worker // GURL("http://example.com?x=y&name=old"), "name", "new").spec() 57*6777b538SAndroid Build Coastguard Worker // => "http://example.com?x=y&name=new" 58*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceQueryParameter( 59*6777b538SAndroid Build Coastguard Worker // GURL("http://example.com?x=y&name=old"), "name", std::nullopt).spec() 60*6777b538SAndroid Build Coastguard Worker // => "http://example.com?x=y&" 61*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL 62*6777b538SAndroid Build Coastguard Worker AppendOrReplaceQueryParameter(const GURL& url, 63*6777b538SAndroid Build Coastguard Worker std::string_view name, 64*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> value); 65*6777b538SAndroid Build Coastguard Worker 66*6777b538SAndroid Build Coastguard Worker // Returns a new GURL by appending the provided ref (also named fragment). 67*6777b538SAndroid Build Coastguard Worker // Unsafe characters are escaped. The original fragment is replaced 68*6777b538SAndroid Build Coastguard Worker // if it's present. 69*6777b538SAndroid Build Coastguard Worker // 70*6777b538SAndroid Build Coastguard Worker // Examples: 71*6777b538SAndroid Build Coastguard Worker // 72*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceRef( 73*6777b538SAndroid Build Coastguard Worker // GURL("http://example.com"), "ref").spec() 74*6777b538SAndroid Build Coastguard Worker // => "http://example.com#ref" 75*6777b538SAndroid Build Coastguard Worker // AppendOrReplaceRef( 76*6777b538SAndroid Build Coastguard Worker // GURL("http://example.com#ref"), "ref2").spec() 77*6777b538SAndroid Build Coastguard Worker // => "http://example.com#ref2" 78*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL AppendOrReplaceRef(const GURL& url, 79*6777b538SAndroid Build Coastguard Worker const std::string_view& ref); 80*6777b538SAndroid Build Coastguard Worker 81*6777b538SAndroid Build Coastguard Worker // Iterates over the key-value pairs in the query portion of |url|. 82*6777b538SAndroid Build Coastguard Worker // NOTE: QueryIterator stores reference to |url| and creates std::string_view 83*6777b538SAndroid Build Coastguard Worker // instances which refer to the data inside |url| query. Therefore |url| must 84*6777b538SAndroid Build Coastguard Worker // outlive QueryIterator and all std::string_view objects returned from GetKey 85*6777b538SAndroid Build Coastguard Worker // and GetValue methods. 86*6777b538SAndroid Build Coastguard Worker class NET_EXPORT QueryIterator { 87*6777b538SAndroid Build Coastguard Worker public: 88*6777b538SAndroid Build Coastguard Worker explicit QueryIterator(const GURL& url); 89*6777b538SAndroid Build Coastguard Worker QueryIterator(const QueryIterator&) = delete; 90*6777b538SAndroid Build Coastguard Worker QueryIterator& operator=(const QueryIterator&) = delete; 91*6777b538SAndroid Build Coastguard Worker ~QueryIterator(); 92*6777b538SAndroid Build Coastguard Worker 93*6777b538SAndroid Build Coastguard Worker std::string_view GetKey() const; 94*6777b538SAndroid Build Coastguard Worker std::string_view GetValue() const; 95*6777b538SAndroid Build Coastguard Worker const std::string& GetUnescapedValue(); 96*6777b538SAndroid Build Coastguard Worker 97*6777b538SAndroid Build Coastguard Worker bool IsAtEnd() const; 98*6777b538SAndroid Build Coastguard Worker void Advance(); 99*6777b538SAndroid Build Coastguard Worker 100*6777b538SAndroid Build Coastguard Worker private: 101*6777b538SAndroid Build Coastguard Worker const raw_ref<const GURL> url_; 102*6777b538SAndroid Build Coastguard Worker url::Component query_; 103*6777b538SAndroid Build Coastguard Worker bool at_end_; 104*6777b538SAndroid Build Coastguard Worker url::Component key_; 105*6777b538SAndroid Build Coastguard Worker url::Component value_; 106*6777b538SAndroid Build Coastguard Worker std::string unescaped_value_; 107*6777b538SAndroid Build Coastguard Worker }; 108*6777b538SAndroid Build Coastguard Worker 109*6777b538SAndroid Build Coastguard Worker // Looks for |search_key| in the query portion of |url|. Returns true if the 110*6777b538SAndroid Build Coastguard Worker // key is found and sets |out_value| to the unescaped value for the key. 111*6777b538SAndroid Build Coastguard Worker // Returns false if the key is not found. 112*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool GetValueForKeyInQuery(const GURL& url, 113*6777b538SAndroid Build Coastguard Worker std::string_view search_key, 114*6777b538SAndroid Build Coastguard Worker std::string* out_value); 115*6777b538SAndroid Build Coastguard Worker 116*6777b538SAndroid Build Coastguard Worker // Splits an input of the form <host>[":"<port>] into its consitituent parts. 117*6777b538SAndroid Build Coastguard Worker // Saves the result into |*host| and |*port|. If the input did not have 118*6777b538SAndroid Build Coastguard Worker // the optional port, sets |*port| to -1. 119*6777b538SAndroid Build Coastguard Worker // Returns true if the parsing was successful, false otherwise. 120*6777b538SAndroid Build Coastguard Worker // The returned host is NOT canonicalized, and may be invalid. 121*6777b538SAndroid Build Coastguard Worker // 122*6777b538SAndroid Build Coastguard Worker // IPv6 literals must be specified in a bracketed form, for instance: 123*6777b538SAndroid Build Coastguard Worker // [::1]:90 and [::1] 124*6777b538SAndroid Build Coastguard Worker // 125*6777b538SAndroid Build Coastguard Worker // The resultant |*host| in both cases will be "::1" (not bracketed). 126*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool ParseHostAndPort(std::string_view input, 127*6777b538SAndroid Build Coastguard Worker std::string* host, 128*6777b538SAndroid Build Coastguard Worker int* port); 129*6777b538SAndroid Build Coastguard Worker 130*6777b538SAndroid Build Coastguard Worker // Returns a host:port string for the given URL. 131*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostAndPort(const GURL& url); 132*6777b538SAndroid Build Coastguard Worker 133*6777b538SAndroid Build Coastguard Worker // Returns a host[:port] string for the given URL, where the port is omitted 134*6777b538SAndroid Build Coastguard Worker // if it is the default for the URL's scheme. 135*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostAndOptionalPort(const GURL& url); 136*6777b538SAndroid Build Coastguard Worker 137*6777b538SAndroid Build Coastguard Worker // Just like above, but takes a SchemeHostPort. 138*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostAndOptionalPort( 139*6777b538SAndroid Build Coastguard Worker const url::SchemeHostPort& scheme_host_port); 140*6777b538SAndroid Build Coastguard Worker 141*6777b538SAndroid Build Coastguard Worker // Returns the hostname by trimming the ending dot, if one exists. 142*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string TrimEndingDot(std::string_view host); 143*6777b538SAndroid Build Coastguard Worker 144*6777b538SAndroid Build Coastguard Worker // Returns either the host from |url|, or, if the host is empty, the full spec. 145*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url); 146*6777b538SAndroid Build Coastguard Worker 147*6777b538SAndroid Build Coastguard Worker // Returns the given domain minus its leftmost label, or the empty string if the 148*6777b538SAndroid Build Coastguard Worker // given domain is just a single label. For normal domain names (not IP 149*6777b538SAndroid Build Coastguard Worker // addresses), this represents the "superdomain" of the given domain. 150*6777b538SAndroid Build Coastguard Worker // Note that this does not take into account anything like the Public Suffix 151*6777b538SAndroid Build Coastguard Worker // List, so the superdomain may end up being a bare eTLD. The returned string is 152*6777b538SAndroid Build Coastguard Worker // not guaranteed to be a valid or canonical hostname, or to make any sense at 153*6777b538SAndroid Build Coastguard Worker // all. 154*6777b538SAndroid Build Coastguard Worker // 155*6777b538SAndroid Build Coastguard Worker // Examples: 156*6777b538SAndroid Build Coastguard Worker // 157*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("assets.example.com") -> "example.com" 158*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("example.net") -> "net" 159*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("littlebox") -> "" 160*6777b538SAndroid Build Coastguard Worker // GetSuperdomain("127.0.0.1") -> "0.0.1" 161*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string GetSuperdomain(std::string_view domain); 162*6777b538SAndroid Build Coastguard Worker 163*6777b538SAndroid Build Coastguard Worker // Returns whether |subdomain| is a subdomain of (or identical to) 164*6777b538SAndroid Build Coastguard Worker // |superdomain|, if both are hostnames (not IP addresses -- for which this 165*6777b538SAndroid Build Coastguard Worker // function is nonsensical). Does not consider the Public Suffix List. 166*6777b538SAndroid Build Coastguard Worker // Returns true if both input strings are empty. 167*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsSubdomainOf(std::string_view subdomain, 168*6777b538SAndroid Build Coastguard Worker std::string_view superdomain); 169*6777b538SAndroid Build Coastguard Worker 170*6777b538SAndroid Build Coastguard Worker // Canonicalizes |host| and returns it. Also fills |host_info| with 171*6777b538SAndroid Build Coastguard Worker // IP address information. |host_info| must not be NULL. 172*6777b538SAndroid Build Coastguard Worker NET_EXPORT std::string CanonicalizeHost(std::string_view host, 173*6777b538SAndroid Build Coastguard Worker url::CanonHostInfo* host_info); 174*6777b538SAndroid Build Coastguard Worker 175*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is not an IP address and is compliant with a set of 176*6777b538SAndroid Build Coastguard Worker // rules based on RFC 1738 and tweaked to be compatible with the real world. 177*6777b538SAndroid Build Coastguard Worker // The rules are: 178*6777b538SAndroid Build Coastguard Worker // * One or more non-empty labels separated by '.', each no more than 63 179*6777b538SAndroid Build Coastguard Worker // characters. 180*6777b538SAndroid Build Coastguard Worker // * Each component contains only alphanumeric characters and '-' or '_' 181*6777b538SAndroid Build Coastguard Worker // * The last component begins with an alphanumeric character 182*6777b538SAndroid Build Coastguard Worker // * Optional trailing dot after last component (means "treat as FQDN") 183*6777b538SAndroid Build Coastguard Worker // * Total size (including optional trailing dot, whether or not actually 184*6777b538SAndroid Build Coastguard Worker // present in `host`) no more than 254 characters. 185*6777b538SAndroid Build Coastguard Worker // 186*6777b538SAndroid Build Coastguard Worker // NOTE: You should only pass in hosts that have been returned from 187*6777b538SAndroid Build Coastguard Worker // CanonicalizeHost(), or you may not get accurate results. 188*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsCanonicalizedHostCompliant(std::string_view host); 189*6777b538SAndroid Build Coastguard Worker 190*6777b538SAndroid Build Coastguard Worker // Returns true if |hostname| contains a non-registerable or non-assignable 191*6777b538SAndroid Build Coastguard Worker // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address 192*6777b538SAndroid Build Coastguard Worker // that falls in an range reserved for non-publicly routable networks. 193*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsHostnameNonUnique(std::string_view hostname); 194*6777b538SAndroid Build Coastguard Worker 195*6777b538SAndroid Build Coastguard Worker // Returns true if the host part of |url| is a local host name according to 196*6777b538SAndroid Build Coastguard Worker // HostStringIsLocalhost. 197*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsLocalhost(const GURL& url); 198*6777b538SAndroid Build Coastguard Worker 199*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is one of the local hostnames 200*6777b538SAndroid Build Coastguard Worker // (e.g. "localhost") or IP addresses (IPv4 127.0.0.0/8 or IPv6 ::1). 201*6777b538SAndroid Build Coastguard Worker // "[::1]" is not detected as a local hostname. Do not use this method to check 202*6777b538SAndroid Build Coastguard Worker // whether the host part of a URL is a local host name; use IsLocalhost instead. 203*6777b538SAndroid Build Coastguard Worker // 204*6777b538SAndroid Build Coastguard Worker // Note that this function does not check for IP addresses other than 205*6777b538SAndroid Build Coastguard Worker // the above, although other IP addresses may point to the local 206*6777b538SAndroid Build Coastguard Worker // machine. 207*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool HostStringIsLocalhost(std::string_view host); 208*6777b538SAndroid Build Coastguard Worker 209*6777b538SAndroid Build Coastguard Worker // Strip the portions of |url| that aren't core to the network request. 210*6777b538SAndroid Build Coastguard Worker // - user name / password 211*6777b538SAndroid Build Coastguard Worker // - reference section 212*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL SimplifyUrlForRequest(const GURL& url); 213*6777b538SAndroid Build Coastguard Worker 214*6777b538SAndroid Build Coastguard Worker // Changes scheme "ws" to "http" and "wss" to "https". This is useful for origin 215*6777b538SAndroid Build Coastguard Worker // checks and authentication, where WebSocket URLs are treated as if they were 216*6777b538SAndroid Build Coastguard Worker // HTTP. It is an error to call this function with a url with a scheme other 217*6777b538SAndroid Build Coastguard Worker // than "ws" or "wss". 218*6777b538SAndroid Build Coastguard Worker NET_EXPORT GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url); 219*6777b538SAndroid Build Coastguard Worker 220*6777b538SAndroid Build Coastguard Worker // Returns whether the given url scheme is of a standard scheme type that can 221*6777b538SAndroid Build Coastguard Worker // have hostnames representing domains (i.e. network hosts). 222*6777b538SAndroid Build Coastguard Worker // See url::SchemeType. 223*6777b538SAndroid Build Coastguard Worker NET_EXPORT bool IsStandardSchemeWithNetworkHost(std::string_view scheme); 224*6777b538SAndroid Build Coastguard Worker 225*6777b538SAndroid Build Coastguard Worker // Extracts the unescaped username/password from |url|, saving the results 226*6777b538SAndroid Build Coastguard Worker // into |*username| and |*password|. 227*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url, 228*6777b538SAndroid Build Coastguard Worker std::u16string* username, 229*6777b538SAndroid Build Coastguard Worker std::u16string* password); 230*6777b538SAndroid Build Coastguard Worker 231*6777b538SAndroid Build Coastguard Worker // Returns true if the url's host is a Google server. This should only be used 232*6777b538SAndroid Build Coastguard Worker // for histograms and shouldn't be used to affect behavior. 233*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url); 234*6777b538SAndroid Build Coastguard Worker 235*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is the hostname of a Google server. This should only 236*6777b538SAndroid Build Coastguard Worker // be used for histograms and shouldn't be used to affect behavior. 237*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool IsGoogleHost(std::string_view host); 238*6777b538SAndroid Build Coastguard Worker 239*6777b538SAndroid Build Coastguard Worker // Returns true if |host| is the hostname of a Google server and HTTPS DNS 240*6777b538SAndroid Build Coastguard Worker // record of |host| is expected to indicate H3 support. This should only be used 241*6777b538SAndroid Build Coastguard Worker // for histograms and shouldn't be used to affect behavior. 242*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool IsGoogleHostWithAlpnH3(std::string_view host); 243*6777b538SAndroid Build Coastguard Worker 244*6777b538SAndroid Build Coastguard Worker // This function tests |host| to see if it is of any local hostname form. 245*6777b538SAndroid Build Coastguard Worker // |host| is normalized before being tested. 246*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE bool IsLocalHostname(std::string_view host); 247*6777b538SAndroid Build Coastguard Worker 248*6777b538SAndroid Build Coastguard Worker // The notion of unescaping used in the application/x-www-form-urlencoded 249*6777b538SAndroid Build Coastguard Worker // parser. https://url.spec.whatwg.org/#concept-urlencoded-parser 250*6777b538SAndroid Build Coastguard Worker NET_EXPORT_PRIVATE std::string UnescapePercentEncodedUrl( 251*6777b538SAndroid Build Coastguard Worker std::string_view input); 252*6777b538SAndroid Build Coastguard Worker 253*6777b538SAndroid Build Coastguard Worker } // namespace net 254*6777b538SAndroid Build Coastguard Worker 255*6777b538SAndroid Build Coastguard Worker #endif // NET_BASE_URL_UTIL_H_ 256