xref: /aosp_15_r20/external/cronet/url/scheme_host_port.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "url/scheme_host_port.h"
6 
7 #include <stdint.h>
8 #include <string.h>
9 
10 #include <ostream>
11 #include <string_view>
12 #include <tuple>
13 
14 #include "base/check_op.h"
15 #include "base/containers/contains.h"
16 #include "base/notreached.h"
17 #include "base/numerics/safe_conversions.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/trace_event/memory_usage_estimator.h"
20 #include "url/gurl.h"
21 #include "url/third_party/mozilla/url_parse.h"
22 #include "url/url_canon.h"
23 #include "url/url_canon_stdstring.h"
24 #include "url/url_constants.h"
25 #include "url/url_features.h"
26 #include "url/url_util.h"
27 
28 namespace url {
29 
30 namespace {
31 
IsCanonicalHost(const std::string_view & host)32 bool IsCanonicalHost(const std::string_view& host) {
33   std::string canon_host;
34 
35   // Try to canonicalize the host (copy/pasted from net/base. :( ).
36   const Component raw_host_component(0,
37                                      base::checked_cast<int>(host.length()));
38   StdStringCanonOutput canon_host_output(&canon_host);
39   CanonHostInfo host_info;
40   CanonicalizeHostVerbose(host.data(), raw_host_component,
41                           &canon_host_output, &host_info);
42 
43   if (host_info.out_host.is_nonempty() &&
44       host_info.family != CanonHostInfo::BROKEN) {
45     // Success!  Assert that there's no extra garbage.
46     canon_host_output.Complete();
47     DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
48   } else {
49     // Empty host, or canonicalization failed.
50     canon_host.clear();
51   }
52 
53   return host == canon_host;
54 }
55 
56 // Note: When changing IsValidInput, consider also updating
57 // ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
58 // behavior between these 2 layers, but we should avoid introducing new
59 // differences).
IsValidInput(const std::string_view & scheme,const std::string_view & host,uint16_t port,SchemeHostPort::ConstructPolicy policy)60 bool IsValidInput(const std::string_view& scheme,
61                   const std::string_view& host,
62                   uint16_t port,
63                   SchemeHostPort::ConstructPolicy policy) {
64   // Empty schemes are never valid.
65   if (scheme.empty())
66     return false;
67 
68   // about:blank and other no-access schemes translate into an opaque origin.
69   // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
70   if (base::Contains(GetNoAccessSchemes(), scheme))
71     return false;
72 
73   SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
74   bool is_standard = GetStandardSchemeType(
75       scheme.data(),
76       Component(0, base::checked_cast<int>(scheme.length())),
77       &scheme_type);
78   if (!is_standard) {
79     // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
80     // non-standard schemes are currently allowed to be tuple origins.
81     //
82     // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
83     // remove this local scheme exception.
84     if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
85       // If the flag is enabled, a host can be empty for non-special URLs.
86       // Therefore, we don't check a host nor port.
87       if (base::Contains(GetLocalSchemes(), scheme)) {
88         return true;
89       }
90     } else {
91       if (base::Contains(GetLocalSchemes(), scheme) && host.empty() &&
92           port == 0) {
93         return true;
94       }
95     }
96 
97     // Otherwise, allow non-standard schemes only if the Android WebView
98     // workaround is enabled.
99     return AllowNonStandardSchemesForAndroidWebView();
100   }
101 
102   switch (scheme_type) {
103     case SCHEME_WITH_HOST_AND_PORT:
104     case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
105       // A URL with |scheme| is required to have the host and port, so return an
106       // invalid instance if host is not given.  Note that a valid port is
107       // always provided by SchemeHostPort(const GURL&) constructor (a missing
108       // port is replaced with a default port if needed by
109       // GURL::EffectiveIntPort()).
110       if (host.empty())
111         return false;
112 
113       // Don't do an expensive canonicalization if the host is already
114       // canonicalized.
115       DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
116              IsCanonicalHost(host));
117       if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
118           !IsCanonicalHost(host)) {
119         return false;
120       }
121 
122       return true;
123 
124     case SCHEME_WITH_HOST:
125       if (port != 0) {
126         // Return an invalid object if a URL with the scheme never represents
127         // the port data but the given |port| is non-zero.
128         return false;
129       }
130 
131       // Don't do an expensive canonicalization if the host is already
132       // canonicalized.
133       DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
134              IsCanonicalHost(host));
135       if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
136           !IsCanonicalHost(host)) {
137         return false;
138       }
139 
140       return true;
141 
142     case SCHEME_WITHOUT_AUTHORITY:
143       return false;
144 
145     default:
146       NOTREACHED();
147       return false;
148   }
149 }
150 
151 }  // namespace
152 
153 SchemeHostPort::SchemeHostPort() = default;
154 
SchemeHostPort(std::string scheme,std::string host,uint16_t port,ConstructPolicy policy)155 SchemeHostPort::SchemeHostPort(std::string scheme,
156                                std::string host,
157                                uint16_t port,
158                                ConstructPolicy policy) {
159   if (ShouldDiscardHostAndPort(scheme)) {
160     host = "";
161     port = 0;
162   }
163 
164   if (!IsValidInput(scheme, host, port, policy)) {
165     DCHECK(!IsValid());
166     return;
167   }
168 
169   scheme_ = std::move(scheme);
170   host_ = std::move(host);
171   port_ = port;
172   DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
173                     << " Port: " << port;
174 }
175 
SchemeHostPort(std::string_view scheme,std::string_view host,uint16_t port)176 SchemeHostPort::SchemeHostPort(std::string_view scheme,
177                                std::string_view host,
178                                uint16_t port)
179     : SchemeHostPort(std::string(scheme),
180                      std::string(host),
181                      port,
182                      ConstructPolicy::CHECK_CANONICALIZATION) {}
183 
SchemeHostPort(const GURL & url)184 SchemeHostPort::SchemeHostPort(const GURL& url) {
185   if (!url.is_valid())
186     return;
187 
188   std::string_view scheme = url.scheme_piece();
189   std::string_view host = url.host_piece();
190 
191   // A valid GURL never returns PORT_INVALID.
192   int port = url.EffectiveIntPort();
193   if (port == PORT_UNSPECIFIED) {
194     port = 0;
195   } else {
196     DCHECK_GE(port, 0);
197     DCHECK_LE(port, 65535);
198   }
199 
200   if (ShouldDiscardHostAndPort(scheme)) {
201     host = "";
202     port = 0;
203   }
204 
205   if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
206     return;
207 
208   scheme_ = std::string(scheme);
209   host_ = std::string(host);
210   port_ = port;
211 }
212 
213 SchemeHostPort::~SchemeHostPort() = default;
214 
IsValid() const215 bool SchemeHostPort::IsValid() const {
216   // It suffices to just check |scheme_| for emptiness; the other fields are
217   // never present without it.
218   DCHECK(!scheme_.empty() || host_.empty());
219   DCHECK(!scheme_.empty() || port_ == 0);
220   return !scheme_.empty();
221 }
222 
Serialize() const223 std::string SchemeHostPort::Serialize() const {
224   // Null checking for |parsed| in SerializeInternal is probably slower than
225   // just filling it in and discarding it here.
226   url::Parsed parsed;
227   return SerializeInternal(&parsed);
228 }
229 
GetURL() const230 GURL SchemeHostPort::GetURL() const {
231   url::Parsed parsed;
232   std::string serialized = SerializeInternal(&parsed);
233 
234   if (!IsValid())
235     return GURL(std::move(serialized), parsed, false);
236 
237   // SchemeHostPort does not have enough information to determine if an empty
238   // host is valid or not for the given scheme. Force re-parsing.
239   DCHECK(!scheme_.empty());
240   if (host_.empty())
241     return GURL(serialized);
242 
243   // If the serialized string is passed to GURL for parsing, it will append an
244   // empty path "/" for standard URLs. Add that here. Note: per RFC 6454 we
245   // cannot do this for normal Origin serialization.
246   DCHECK(!parsed.path.is_valid());
247   if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
248     // Append "/" only if the URL is standard. If the flag is enabled,
249     // non-special URLs can have an empty path and GURL doesn't append "/" to
250     // that.
251     if (IsStandardScheme(scheme_)) {
252       parsed.path = Component(serialized.length(), 1);
253       serialized.append("/");
254     }
255   } else {
256     parsed.path = Component(serialized.length(), 1);
257     serialized.append("/");
258   }
259   return GURL(std::move(serialized), parsed, true);
260 }
261 
EstimateMemoryUsage() const262 size_t SchemeHostPort::EstimateMemoryUsage() const {
263   return base::trace_event::EstimateMemoryUsage(scheme_) +
264          base::trace_event::EstimateMemoryUsage(host_);
265 }
266 
operator <(const SchemeHostPort & other) const267 bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
268   return std::tie(port_, scheme_, host_) <
269          std::tie(other.port_, other.scheme_, other.host_);
270 }
271 
SerializeInternal(url::Parsed * parsed) const272 std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
273   std::string result;
274   if (!IsValid())
275     return result;
276 
277   // Reserve enough space for the "normal" case of scheme://host/.
278   result.reserve(scheme_.size() + host_.size() + 4);
279 
280   if (!scheme_.empty()) {
281     parsed->scheme = Component(0, scheme_.length());
282     result.append(scheme_);
283   }
284 
285   result.append(kStandardSchemeSeparator);
286 
287   if (!host_.empty()) {
288     parsed->host = Component(result.length(), host_.length());
289     result.append(host_);
290   }
291 
292   // Omit the port component if the port matches with the default port
293   // defined for the scheme, if any.
294   int default_port = DefaultPortForScheme(scheme_.data(),
295                                           static_cast<int>(scheme_.length()));
296   if (default_port == PORT_UNSPECIFIED)
297     return result;
298   if (port_ != default_port) {
299     result.push_back(':');
300     std::string port(base::NumberToString(port_));
301     parsed->port = Component(result.length(), port.length());
302     result.append(std::move(port));
303   }
304 
305   return result;
306 }
307 
ShouldDiscardHostAndPort(const std::string_view scheme)308 bool SchemeHostPort::ShouldDiscardHostAndPort(const std::string_view scheme) {
309   return IsAndroidWebViewHackEnabledScheme(scheme) &&
310          IsUsingStandardCompliantNonSpecialSchemeURLParsing();
311 }
312 
operator <<(std::ostream & out,const SchemeHostPort & scheme_host_port)313 std::ostream& operator<<(std::ostream& out,
314                          const SchemeHostPort& scheme_host_port) {
315   return out << scheme_host_port.Serialize();
316 }
317 
318 }  // namespace url
319