1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "url/scheme_host_port.h"
6
7 #include <stdint.h>
8 #include <string.h>
9
10 #include <ostream>
11 #include <string_view>
12 #include <tuple>
13
14 #include "base/check_op.h"
15 #include "base/containers/contains.h"
16 #include "base/notreached.h"
17 #include "base/numerics/safe_conversions.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/trace_event/memory_usage_estimator.h"
20 #include "url/gurl.h"
21 #include "url/third_party/mozilla/url_parse.h"
22 #include "url/url_canon.h"
23 #include "url/url_canon_stdstring.h"
24 #include "url/url_constants.h"
25 #include "url/url_features.h"
26 #include "url/url_util.h"
27
28 namespace url {
29
30 namespace {
31
IsCanonicalHost(const std::string_view & host)32 bool IsCanonicalHost(const std::string_view& host) {
33 std::string canon_host;
34
35 // Try to canonicalize the host (copy/pasted from net/base. :( ).
36 const Component raw_host_component(0,
37 base::checked_cast<int>(host.length()));
38 StdStringCanonOutput canon_host_output(&canon_host);
39 CanonHostInfo host_info;
40 CanonicalizeHostVerbose(host.data(), raw_host_component,
41 &canon_host_output, &host_info);
42
43 if (host_info.out_host.is_nonempty() &&
44 host_info.family != CanonHostInfo::BROKEN) {
45 // Success! Assert that there's no extra garbage.
46 canon_host_output.Complete();
47 DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
48 } else {
49 // Empty host, or canonicalization failed.
50 canon_host.clear();
51 }
52
53 return host == canon_host;
54 }
55
56 // Note: When changing IsValidInput, consider also updating
57 // ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
58 // behavior between these 2 layers, but we should avoid introducing new
59 // differences).
IsValidInput(const std::string_view & scheme,const std::string_view & host,uint16_t port,SchemeHostPort::ConstructPolicy policy)60 bool IsValidInput(const std::string_view& scheme,
61 const std::string_view& host,
62 uint16_t port,
63 SchemeHostPort::ConstructPolicy policy) {
64 // Empty schemes are never valid.
65 if (scheme.empty())
66 return false;
67
68 // about:blank and other no-access schemes translate into an opaque origin.
69 // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
70 if (base::Contains(GetNoAccessSchemes(), scheme))
71 return false;
72
73 SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
74 bool is_standard = GetStandardSchemeType(
75 scheme.data(),
76 Component(0, base::checked_cast<int>(scheme.length())),
77 &scheme_type);
78 if (!is_standard) {
79 // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
80 // non-standard schemes are currently allowed to be tuple origins.
81 //
82 // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
83 // remove this local scheme exception.
84 if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
85 // If the flag is enabled, a host can be empty for non-special URLs.
86 // Therefore, we don't check a host nor port.
87 if (base::Contains(GetLocalSchemes(), scheme)) {
88 return true;
89 }
90 } else {
91 if (base::Contains(GetLocalSchemes(), scheme) && host.empty() &&
92 port == 0) {
93 return true;
94 }
95 }
96
97 // Otherwise, allow non-standard schemes only if the Android WebView
98 // workaround is enabled.
99 return AllowNonStandardSchemesForAndroidWebView();
100 }
101
102 switch (scheme_type) {
103 case SCHEME_WITH_HOST_AND_PORT:
104 case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
105 // A URL with |scheme| is required to have the host and port, so return an
106 // invalid instance if host is not given. Note that a valid port is
107 // always provided by SchemeHostPort(const GURL&) constructor (a missing
108 // port is replaced with a default port if needed by
109 // GURL::EffectiveIntPort()).
110 if (host.empty())
111 return false;
112
113 // Don't do an expensive canonicalization if the host is already
114 // canonicalized.
115 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
116 IsCanonicalHost(host));
117 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
118 !IsCanonicalHost(host)) {
119 return false;
120 }
121
122 return true;
123
124 case SCHEME_WITH_HOST:
125 if (port != 0) {
126 // Return an invalid object if a URL with the scheme never represents
127 // the port data but the given |port| is non-zero.
128 return false;
129 }
130
131 // Don't do an expensive canonicalization if the host is already
132 // canonicalized.
133 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
134 IsCanonicalHost(host));
135 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
136 !IsCanonicalHost(host)) {
137 return false;
138 }
139
140 return true;
141
142 case SCHEME_WITHOUT_AUTHORITY:
143 return false;
144
145 default:
146 NOTREACHED();
147 return false;
148 }
149 }
150
151 } // namespace
152
153 SchemeHostPort::SchemeHostPort() = default;
154
SchemeHostPort(std::string scheme,std::string host,uint16_t port,ConstructPolicy policy)155 SchemeHostPort::SchemeHostPort(std::string scheme,
156 std::string host,
157 uint16_t port,
158 ConstructPolicy policy) {
159 if (ShouldDiscardHostAndPort(scheme)) {
160 host = "";
161 port = 0;
162 }
163
164 if (!IsValidInput(scheme, host, port, policy)) {
165 DCHECK(!IsValid());
166 return;
167 }
168
169 scheme_ = std::move(scheme);
170 host_ = std::move(host);
171 port_ = port;
172 DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
173 << " Port: " << port;
174 }
175
SchemeHostPort(std::string_view scheme,std::string_view host,uint16_t port)176 SchemeHostPort::SchemeHostPort(std::string_view scheme,
177 std::string_view host,
178 uint16_t port)
179 : SchemeHostPort(std::string(scheme),
180 std::string(host),
181 port,
182 ConstructPolicy::CHECK_CANONICALIZATION) {}
183
SchemeHostPort(const GURL & url)184 SchemeHostPort::SchemeHostPort(const GURL& url) {
185 if (!url.is_valid())
186 return;
187
188 std::string_view scheme = url.scheme_piece();
189 std::string_view host = url.host_piece();
190
191 // A valid GURL never returns PORT_INVALID.
192 int port = url.EffectiveIntPort();
193 if (port == PORT_UNSPECIFIED) {
194 port = 0;
195 } else {
196 DCHECK_GE(port, 0);
197 DCHECK_LE(port, 65535);
198 }
199
200 if (ShouldDiscardHostAndPort(scheme)) {
201 host = "";
202 port = 0;
203 }
204
205 if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
206 return;
207
208 scheme_ = std::string(scheme);
209 host_ = std::string(host);
210 port_ = port;
211 }
212
213 SchemeHostPort::~SchemeHostPort() = default;
214
IsValid() const215 bool SchemeHostPort::IsValid() const {
216 // It suffices to just check |scheme_| for emptiness; the other fields are
217 // never present without it.
218 DCHECK(!scheme_.empty() || host_.empty());
219 DCHECK(!scheme_.empty() || port_ == 0);
220 return !scheme_.empty();
221 }
222
Serialize() const223 std::string SchemeHostPort::Serialize() const {
224 // Null checking for |parsed| in SerializeInternal is probably slower than
225 // just filling it in and discarding it here.
226 url::Parsed parsed;
227 return SerializeInternal(&parsed);
228 }
229
GetURL() const230 GURL SchemeHostPort::GetURL() const {
231 url::Parsed parsed;
232 std::string serialized = SerializeInternal(&parsed);
233
234 if (!IsValid())
235 return GURL(std::move(serialized), parsed, false);
236
237 // SchemeHostPort does not have enough information to determine if an empty
238 // host is valid or not for the given scheme. Force re-parsing.
239 DCHECK(!scheme_.empty());
240 if (host_.empty())
241 return GURL(serialized);
242
243 // If the serialized string is passed to GURL for parsing, it will append an
244 // empty path "/" for standard URLs. Add that here. Note: per RFC 6454 we
245 // cannot do this for normal Origin serialization.
246 DCHECK(!parsed.path.is_valid());
247 if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
248 // Append "/" only if the URL is standard. If the flag is enabled,
249 // non-special URLs can have an empty path and GURL doesn't append "/" to
250 // that.
251 if (IsStandardScheme(scheme_)) {
252 parsed.path = Component(serialized.length(), 1);
253 serialized.append("/");
254 }
255 } else {
256 parsed.path = Component(serialized.length(), 1);
257 serialized.append("/");
258 }
259 return GURL(std::move(serialized), parsed, true);
260 }
261
EstimateMemoryUsage() const262 size_t SchemeHostPort::EstimateMemoryUsage() const {
263 return base::trace_event::EstimateMemoryUsage(scheme_) +
264 base::trace_event::EstimateMemoryUsage(host_);
265 }
266
operator <(const SchemeHostPort & other) const267 bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
268 return std::tie(port_, scheme_, host_) <
269 std::tie(other.port_, other.scheme_, other.host_);
270 }
271
SerializeInternal(url::Parsed * parsed) const272 std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
273 std::string result;
274 if (!IsValid())
275 return result;
276
277 // Reserve enough space for the "normal" case of scheme://host/.
278 result.reserve(scheme_.size() + host_.size() + 4);
279
280 if (!scheme_.empty()) {
281 parsed->scheme = Component(0, scheme_.length());
282 result.append(scheme_);
283 }
284
285 result.append(kStandardSchemeSeparator);
286
287 if (!host_.empty()) {
288 parsed->host = Component(result.length(), host_.length());
289 result.append(host_);
290 }
291
292 // Omit the port component if the port matches with the default port
293 // defined for the scheme, if any.
294 int default_port = DefaultPortForScheme(scheme_.data(),
295 static_cast<int>(scheme_.length()));
296 if (default_port == PORT_UNSPECIFIED)
297 return result;
298 if (port_ != default_port) {
299 result.push_back(':');
300 std::string port(base::NumberToString(port_));
301 parsed->port = Component(result.length(), port.length());
302 result.append(std::move(port));
303 }
304
305 return result;
306 }
307
ShouldDiscardHostAndPort(const std::string_view scheme)308 bool SchemeHostPort::ShouldDiscardHostAndPort(const std::string_view scheme) {
309 return IsAndroidWebViewHackEnabledScheme(scheme) &&
310 IsUsingStandardCompliantNonSpecialSchemeURLParsing();
311 }
312
operator <<(std::ostream & out,const SchemeHostPort & scheme_host_port)313 std::ostream& operator<<(std::ostream& out,
314 const SchemeHostPort& scheme_host_port) {
315 return out << scheme_host_port.Serialize();
316 }
317
318 } // namespace url
319