1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef URL_SCHEME_HOST_PORT_H_
6 #define URL_SCHEME_HOST_PORT_H_
7
8 #include <stdint.h>
9
10 #include <string>
11 #include <string_view>
12
13 #include "base/component_export.h"
14
15 class GURL;
16
17 namespace url {
18
19 struct Parsed;
20
21 // This class represents a (scheme, host, port) tuple extracted from a URL.
22 //
23 // The primary purpose of this class is to represent relevant network-authority
24 // information for a URL. It is _not_ an Origin, as described in RFC 6454. In
25 // particular, it is generally NOT the right thing to use for security
26 // decisions.
27 //
28 // Instead, this class is a mechanism for simplifying URLs with standard schemes
29 // (that is, those which follow the generic syntax of RFC 3986) down to the
30 // uniquely identifying information necessary for network fetches. This makes it
31 // suitable as a cache key for a collection of active connections, for instance.
32 // It may, however, be inappropriate to use as a cache key for persistent
33 // storage associated with a host.
34 //
35 // In particular, note that:
36 //
37 // * SchemeHostPort can only represent schemes which follow the RFC 3986 syntax
38 // (e.g. those registered with GURL as "standard schemes"). Non-standard
39 // schemes such as "blob", "filesystem", "data", and "javascript" can only be
40 // represented as invalid SchemeHostPort objects.
41 //
42 // * For example, the "file" scheme follows the standard syntax, but it is
43 // important to note that the authority portion (host, port) is optional.
44 // URLs without an authority portion will be represented with an empty string
45 // for the host, and a port of 0 (e.g. "file:///etc/hosts" =>
46 // ("file", "", 0)), and URLs with a host-only authority portion will be
47 // represented with a port of 0 (e.g. "file://example.com/etc/hosts" =>
48 // ("file", "example.com", 0)). See Section 3 of RFC 3986 to better understand
49 // these constructs.
50 //
51 // * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in
52 // particular, it has no notion of an opaque Origin. If you need to take
53 // opaque origins into account (and, if you're making security-relevant
54 // decisions then you absolutely do), please use 'url::Origin' instead.
55 //
56 // Usage:
57 //
58 // * SchemeHostPort objects are commonly created from GURL objects:
59 //
60 // GURL url("https://example.com/");
61 // url::SchemeHostPort tuple(url);
62 // tuple.scheme(); // "https"
63 // tuple.host(); // "example.com"
64 // tuple.port(); // 443
65 //
66 // * Objects may also be explicitly created and compared:
67 //
68 // url::SchemeHostPort tuple(url::kHttpsScheme, "example.com", 443);
69 // tuple.scheme(); // "https"
70 // tuple.host(); // "example.com"
71 // tuple.port(); // 443
72 //
73 // GURL url("https://example.com/");
74 // tuple == url::SchemeHostPort(url); // true
COMPONENT_EXPORT(URL)75 class COMPONENT_EXPORT(URL) SchemeHostPort {
76 public:
77 // Creates an invalid (scheme, host, port) tuple, which represents an invalid
78 // or non-standard URL.
79 SchemeHostPort();
80
81 // Creates a (scheme, host, port) tuple. |host| must be a canonicalized
82 // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme|
83 // must be a standard scheme. |port| must be 0 if |scheme| does not support
84 // ports (e.g. 'file').
85 //
86 // Copies the data in |scheme| and |host|.
87 SchemeHostPort(std::string_view scheme, std::string_view host, uint16_t port);
88
89 // Metadata influencing whether or not the constructor should sanity check
90 // host canonicalization.
91 enum ConstructPolicy { CHECK_CANONICALIZATION, ALREADY_CANONICALIZED };
92
93 // Creates a (scheme, host, port) tuple without performing sanity checking
94 // that the host and port are canonicalized. This should only be used when
95 // converting between already normalized types, and should NOT be used for
96 // IPC.
97 SchemeHostPort(std::string scheme,
98 std::string host,
99 uint16_t port,
100 ConstructPolicy policy);
101
102 // Creates a (scheme, host, port) tuple from |url|, as described at
103 // https://tools.ietf.org/html/rfc6454#section-4
104 //
105 // If |url| is invalid or non-standard, the result will be an invalid
106 // SchemeHostPort object.
107 explicit SchemeHostPort(const GURL& url);
108
109 // Copyable and movable.
110 SchemeHostPort(const SchemeHostPort&) = default;
111 SchemeHostPort& operator=(const SchemeHostPort&) = default;
112 SchemeHostPort(SchemeHostPort&&) noexcept = default;
113 SchemeHostPort& operator=(SchemeHostPort&&) noexcept = default;
114
115 ~SchemeHostPort();
116
117 // Returns the host component, in URL form. That is all IDN domain names will
118 // be expressed as A-Labels ('☃.net' will be returned as 'xn--n3h.net'), and
119 // and all IPv6 addresses will be enclosed in brackets ("[2001:db8::1]").
120 const std::string& host() const { return host_; }
121 const std::string& scheme() const { return scheme_; }
122 uint16_t port() const { return port_; }
123 bool IsValid() const;
124
125 // Serializes the SchemeHostPort tuple to a canonical form.
126 //
127 // While this string form resembles the Origin serialization specified in
128 // Section 6.2 of RFC 6454, it is important to note that invalid
129 // SchemeHostPort tuples serialize to the empty string, rather than being
130 // serialized as would an opaque Origin.
131 std::string Serialize() const;
132
133 // Efficiently returns what GURL(Serialize()) would return, without needing to
134 // re-parse the URL. Note: this still performs allocations to copy data into
135 // GURL, so please avoid using this method if you only need to work on
136 // schemes, hosts, or ports individually.
137 // For example, see crrev.com/c/3637099/comments/782360d0_e14757be.
138 GURL GetURL() const;
139
140 // Estimates dynamic memory usage.
141 // See base/trace_event/memory_usage_estimator.h for more info.
142 size_t EstimateMemoryUsage() const;
143
144 // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
145 // are exact matches.
146 //
147 // Note that this comparison is _not_ the same as an origin-based comparison.
148 // In particular, invalid SchemeHostPort objects match each other (and
149 // themselves). Opaque origins, on the other hand, would not.
150 bool operator==(const SchemeHostPort& other) const {
151 return port_ == other.port() && scheme_ == other.scheme() &&
152 host_ == other.host();
153 }
154 bool operator!=(const SchemeHostPort& other) const {
155 return !(*this == other);
156 }
157 // Allows SchemeHostPort to be used as a key in STL (for example, a std::set
158 // or std::map).
159 bool operator<(const SchemeHostPort& other) const;
160
161 // Whether to discard host and port information for a specific scheme.
162 //
163 // Note that this hack is required to avoid breaking existing Android WebView
164 // behaviors. Currently, Android WebView doesn't use host and port information
165 // for non-special URLs. See https://crbug.com/40063064 for details.
166 static bool ShouldDiscardHostAndPort(const std::string_view scheme);
167
168 std::string SerializeInternal(url::Parsed* parsed) const;
169
170 std::string scheme_;
171 std::string host_;
172 uint16_t port_ = 0;
173 };
174
175 COMPONENT_EXPORT(URL)
176 std::ostream& operator<<(std::ostream& out,
177 const SchemeHostPort& scheme_host_port);
178
179 } // namespace url
180
181 #endif // URL_SCHEME_HOST_PORT_H_
182