xref: /aosp_15_r20/external/cronet/url/origin.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef URL_ORIGIN_H_
6 #define URL_ORIGIN_H_
7 
8 #include <stdint.h>
9 
10 #include <memory>
11 #include <optional>
12 #include <string>
13 #include <string_view>
14 
15 #include "base/component_export.h"
16 #include "base/debug/alias.h"
17 #include "base/debug/crash_logging.h"
18 #include "base/gtest_prod_util.h"
19 #include "base/strings/string_util.h"
20 #include "base/trace_event/base_tracing_forward.h"
21 #include "base/unguessable_token.h"
22 #include "build/build_config.h"
23 #include "build/buildflag.h"
24 #include "build/robolectric_buildflags.h"
25 #include "url/scheme_host_port.h"
26 
27 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
28 #include "base/android/jni_android.h"
29 #endif
30 
31 class GURL;
32 
33 namespace blink {
34 class SecurityOrigin;
35 class SecurityOriginTest;
36 class StorageKey;
37 class StorageKeyTest;
38 }  // namespace blink
39 
40 namespace content {
41 class SiteInfo;
42 }  // namespace content
43 
44 namespace IPC {
45 template <class P>
46 struct ParamTraits;
47 }  // namespace IPC
48 
49 namespace ipc_fuzzer {
50 template <class T>
51 struct FuzzTraits;
52 }  // namespace ipc_fuzzer
53 
54 namespace mojo {
55 template <typename DataViewType, typename T>
56 struct StructTraits;
57 struct UrlOriginAdapter;
58 }  // namespace mojo
59 
60 namespace net {
61 class SchemefulSite;
62 }  // namespace net
63 
64 namespace url {
65 
66 namespace mojom {
67 class OriginDataView;
68 }  // namespace mojom
69 
70 // Per https://html.spec.whatwg.org/multipage/origin.html#origin, an origin is
71 // either:
72 // - a tuple origin of (scheme, host, port) as described in RFC 6454.
73 // - an opaque origin with an internal value, and a memory of the tuple origin
74 //   from which it was derived.
75 //
76 // TL;DR: If you need to make a security-relevant decision, use 'url::Origin'.
77 // If you only need to extract the bits of a URL which are relevant for a
78 // network connection, use 'url::SchemeHostPort'.
79 //
80 // STL;SDR: If you aren't making actual network connections, use 'url::Origin'.
81 //
82 // This class ought to be used when code needs to determine if two resources
83 // are "same-origin", and when a canonical serialization of an origin is
84 // required. Note that the canonical serialization of an origin *must not* be
85 // used to determine if two resources are same-origin.
86 //
87 // A tuple origin, like 'SchemeHostPort', is composed of a tuple of (scheme,
88 // host, port), but contains a number of additional concepts which make it
89 // appropriate for use as a security boundary and access control mechanism
90 // between contexts. Two tuple origins are same-origin if the tuples are equal.
91 // A tuple origin may also be re-created from its serialization.
92 //
93 // An opaque origin has an internal globally unique identifier. When creating a
94 // new opaque origin from a URL, a fresh globally unique identifier is
95 // generated. However, if an opaque origin is copied or moved, the internal
96 // globally unique identifier is preserved. Two opaque origins are same-origin
97 // iff the globally unique identifiers match. Unlike tuple origins, an opaque
98 // origin cannot be re-created from its serialization, which is always the
99 // string "null".
100 //
101 // IMPORTANT: Since opaque origins always serialize as the string "null", it is
102 // *never* safe to use the serialization for security checks!
103 //
104 // A tuple origin and an opaque origin are never same-origin.
105 //
106 // There are a few subtleties to note:
107 //
108 // * A default constructed Origin is opaque, with no precursor origin.
109 //
110 // * Invalid and non-standard GURLs are parsed as opaque origins. This includes
111 //   non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'.
112 //
113 // * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the
114 //   internals of the URL. That is, 'filesystem:https://example.com/temporary/f'
115 //   is parsed as ('https', 'example.com', 443).
116 //
117 // * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0),
118 //   but their behavior may differ from embedder to embedder.
119 //   TODO(dcheng): This behavior is not consistent with Blink's notion of file
120 //   URLs, which always creates an opaque origin.
121 //
122 // * The host component of an IPv6 address includes brackets, just like the URL
123 //   representation.
124 //
125 // * Constructing origins from GURLs (or from SchemeHostPort) is typically a red
126 //   flag (this is true for `url::Origin::Create` but also to some extent for
127 //   `url::Origin::Resolve`). See docs/security/origin-vs-url.md for more.
128 //
129 // * To answer the question "Are |this| and |that| "same-origin" with each
130 //   other?", use |Origin::IsSameOriginWith|:
131 //
132 //     if (this.IsSameOriginWith(that)) {
133 //       // Amazingness goes here.
134 //     }
COMPONENT_EXPORT(URL)135 class COMPONENT_EXPORT(URL) Origin {
136  public:
137   // Creates an opaque Origin with a nonce that is different from all previously
138   // existing origins.
139   Origin();
140 
141   // WARNING: Converting an URL into an Origin is usually a red flag. See
142   // //docs/security/origin-vs-url.md for more details. Some discussion about
143   // deprecating the Create method can be found in https://crbug.com/1270878.
144   //
145   // Creates an Origin from `url`, as described at
146   // https://url.spec.whatwg.org/#origin, with the following additions:
147   // 1. If `url` is invalid or non-standard, an opaque Origin is constructed.
148   // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
149   //    out of everything in the URL which follows the scheme).
150   // 3. 'file' URLs all parse as ("file", "", 0).
151   //
152   // WARNING: `url::Origin::Create(url)` can give unexpected results if:
153   // 1) `url` is "about:blank", or "about:srcdoc" (returning unique, opaque
154   //    origin rather than the real origin of the frame)
155   // 2) `url` comes from a sandboxed frame (potentially returning a non-opaque
156   //    origin, when an opaque one is needed; see also
157   //    https://www.html5rocks.com/en/tutorials/security/sandboxed-iframes/)
158   // 3) Wrong `url` is used - e.g. in some navigations `base_url_for_data_url`
159   //    might need to be used instead of relying on
160   //    `content::NavigationHandle::GetURL`.
161   //
162   // WARNING: The returned Origin may have a different scheme and host from
163   // `url` (e.g. in case of blob URLs - see OriginTest.ConstructFromGURL).
164   //
165   // WARNING: data: URLs will be correctly be translated into opaque origins,
166   // but the precursor origin will be lost (unlike with `url::Origin::Resolve`).
167   static Origin Create(const GURL& url);
168 
169   // Creates an Origin for the resource `url` as if it were requested
170   // from the context of `base_origin`. If `url` is standard
171   // (in the sense that it embeds a complete origin, like http/https),
172   // this returns the same value as would Create().
173   //
174   // If `url` is "about:blank" or "about:srcdoc", this returns a copy of
175   // `base_origin`.
176   //
177   // Otherwise, returns a new opaque origin derived from `base_origin`.
178   // In this case, the resulting opaque origin will inherit the tuple
179   // (or precursor tuple) of `base_origin`, but will not be same origin
180   // with `base_origin`, even if `base_origin` is already opaque.
181   static Origin Resolve(const GURL& url, const Origin& base_origin);
182 
183   // Copyable and movable.
184   Origin(const Origin&);
185   Origin& operator=(const Origin&);
186   Origin(Origin&&) noexcept;
187   Origin& operator=(Origin&&) noexcept;
188 
189   // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
190   // must be valid and canonicalized. Returns nullopt if any parameter is not
191   // canonical, or if all the parameters are empty.
192   //
193   // This constructor should be used in order to pass 'Origin' objects back and
194   // forth over IPC (as transitioning through GURL would risk potentially
195   // dangerous recanonicalization); other potential callers should prefer the
196   // 'GURL'-based constructor.
197   static std::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
198       std::string_view scheme,
199       std::string_view host,
200       uint16_t port);
201 
202   // Creates an origin without sanity checking that the host is canonicalized.
203   // This should only be used when converting between already normalized types,
204   // and should NOT be used for IPC. Method takes std::strings for use with move
205   // operators to avoid copies.
206   static Origin CreateFromNormalizedTuple(std::string scheme,
207                                           std::string host,
208                                           uint16_t port);
209 
210   ~Origin();
211 
212   // For opaque origins, these return ("", "", 0).
213   const std::string& scheme() const {
214     return !opaque() ? tuple_.scheme() : base::EmptyString();
215   }
216   const std::string& host() const {
217     return !opaque() ? tuple_.host() : base::EmptyString();
218   }
219   uint16_t port() const { return !opaque() ? tuple_.port() : 0; }
220 
221   bool opaque() const { return nonce_.has_value(); }
222 
223   // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
224   // the addition that all Origins with a 'file' scheme serialize to "file://".
225   std::string Serialize() const;
226 
227   // Two non-opaque Origins are "same-origin" if their schemes, hosts, and ports
228   // are exact matches. Two opaque origins are same-origin only if their
229   // internal nonce values match. A non-opaque origin is never same-origin with
230   // an opaque origin.
231   bool IsSameOriginWith(const Origin& other) const;
232   bool operator==(const Origin& other) const { return IsSameOriginWith(other); }
233   bool operator!=(const Origin& other) const {
234     return !IsSameOriginWith(other);
235   }
236 
237   // Non-opaque origin is "same-origin" with `url` if their schemes, hosts, and
238   // ports are exact matches. Opaque origin is never "same-origin" with any
239   // `url`.  about:blank, about:srcdoc, and invalid GURLs are never
240   // "same-origin" with any origin. This method is a shorthand for
241   // `origin.IsSameOriginWith(url::Origin::Create(url))`.
242   //
243   // See also CanBeDerivedFrom.
244   bool IsSameOriginWith(const GURL& url) const;
245 
246   // This method returns true for any |url| which if navigated to could result
247   // in an origin compatible with |this|.
248   bool CanBeDerivedFrom(const GURL& url) const;
249 
250   // Get the scheme, host, and port from which this origin derives. For
251   // a tuple Origin, this gives the same values as calling scheme(), host()
252   // and port(). For an opaque Origin that was created by calling
253   // Origin::DeriveNewOpaqueOrigin() on a precursor or Origin::Resolve(),
254   // this returns the tuple inherited from the precursor.
255   //
256   // If this Origin is opaque and was created via the default constructor or
257   // Origin::Create(), the precursor origin is unknown.
258   //
259   // Use with great caution: opaque origins should generally not inherit
260   // privileges from the origins they derive from. However, in some cases
261   // (such as restrictions on process placement, or determining the http lock
262   // icon) this information may be relevant to ensure that entering an
263   // opaque origin does not grant privileges initially denied to the original
264   // non-opaque origin.
265   //
266   // This method has a deliberately obnoxious name to prompt caution in its use.
267   const SchemeHostPort& GetTupleOrPrecursorTupleIfOpaque() const {
268     return tuple_;
269   }
270 
271   // Efficiently returns what GURL(Serialize()) would without re-parsing the
272   // URL. This can be used for the (rare) times a GURL representation is needed
273   // for an Origin.
274   // Note: The returned URL will not necessarily be serialized to the same value
275   // as the Origin would. The GURL will have an added "/" path for Origins with
276   // valid SchemeHostPorts and file Origins.
277   //
278   // Try not to use this method under normal circumstances, as it loses type
279   // information. Downstream consumers can mistake the returned GURL with a full
280   // URL (e.g. with a path component).
281   GURL GetURL() const;
282 
283   // Same as GURL::DomainIs. If |this| origin is opaque, then returns false.
284   bool DomainIs(std::string_view canonical_domain) const;
285 
286   // Allows Origin to be used as a key in STL (for example, a std::set or
287   // std::map).
288   bool operator<(const Origin& other) const;
289 
290   // Creates a new opaque origin that is guaranteed to be cross-origin to all
291   // currently existing origins. An origin created by this method retains its
292   // identity across copies. Copies are guaranteed to be same-origin to each
293   // other, e.g.
294   //
295   //   url::Origin page = Origin::Create(GURL("http://example.com"))
296   //   url::Origin a = page.DeriveNewOpaqueOrigin();
297   //   url::Origin b = page.DeriveNewOpaqueOrigin();
298   //   url::Origin c = a;
299   //   url::Origin d = b;
300   //
301   // |a| and |c| are same-origin, since |c| was copied from |a|. |b| and |d| are
302   // same-origin as well, since |d| was copied from |b|. All other combinations
303   // of origins are considered cross-origin, e.g. |a| is cross-origin to |b| and
304   // |d|, |b| is cross-origin to |a| and |c|, |c| is cross-origin to |b| and
305   // |d|, and |d| is cross-origin to |a| and |c|.
306   Origin DeriveNewOpaqueOrigin() const;
307 
308   // Returns the nonce associated with the origin, if it is opaque, or nullptr
309   // otherwise. This is only for use in tests.
310   const base::UnguessableToken* GetNonceForTesting() const;
311 
312   // Creates a string representation of the object that can be used for logging
313   // and debugging. It serializes the internal state, such as the nonce value
314   // and precursor information.
315   std::string GetDebugString(bool include_nonce = true) const;
316 
317 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
318   base::android::ScopedJavaLocalRef<jobject> ToJavaObject() const;
319   static Origin FromJavaObject(
320       const base::android::JavaRef<jobject>& java_origin);
321   static jlong CreateNative(JNIEnv* env,
322                             const base::android::JavaRef<jstring>& java_scheme,
323                             const base::android::JavaRef<jstring>& java_host,
324                             uint16_t port,
325                             bool is_opaque,
326                             uint64_t tokenHighBits,
327                             uint64_t tokenLowBits);
328 #endif  // BUILDFLAG(IS_ANDROID)
329 
330   void WriteIntoTrace(perfetto::TracedValue context) const;
331 
332   // Estimates dynamic memory usage.
333   // See base/trace_event/memory_usage_estimator.h for more info.
334   size_t EstimateMemoryUsage() const;
335 
336  private:
337 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
338   friend Origin CreateOpaqueOriginForAndroid(
339       const std::string& scheme,
340       const std::string& host,
341       uint16_t port,
342       const base::UnguessableToken& nonce_token);
343 #endif
344   friend class blink::SecurityOrigin;
345   friend class blink::SecurityOriginTest;
346   friend class blink::StorageKey;
347   // SiteInfo needs the nonce to compute the site URL for some opaque origins,
348   // like data: URLs.
349   friend class content::SiteInfo;
350   // SchemefulSite needs access to the serialization/deserialization logic which
351   // includes the nonce.
352   friend class net::SchemefulSite;
353   friend class OriginTest;
354   friend struct mojo::UrlOriginAdapter;
355   friend struct ipc_fuzzer::FuzzTraits<Origin>;
356   friend struct mojo::StructTraits<url::mojom::OriginDataView, url::Origin>;
357   friend IPC::ParamTraits<url::Origin>;
358   friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
359                                                         const Origin& origin);
360   friend class blink::StorageKeyTest;
361 
362   // Origin::Nonce is a wrapper around base::UnguessableToken that generates
363   // the random value only when the value is first accessed. The lazy generation
364   // allows Origin to be default-constructed quickly, without spending time
365   // in random number generation.
366   //
367   // TODO(nick): Should this optimization move into UnguessableToken, once it no
368   // longer treats the Null case specially?
369   class COMPONENT_EXPORT(URL) Nonce {
370    public:
371     // Creates a nonce to hold a newly-generated UnguessableToken. The actual
372     // token value will be generated lazily.
373     Nonce();
374 
375     // Creates a nonce to hold an already-generated UnguessableToken value. This
376     // constructor should only be used for IPC serialization and testing --
377     // regular code should never need to touch the UnguessableTokens directly,
378     // and the default constructor is faster.
379     explicit Nonce(const base::UnguessableToken& token);
380 
381     // Accessor, which lazily initializes the underlying |token_| member.
382     const base::UnguessableToken& token() const;
383 
384     // Do not use in cases where lazy initialization is expected! This
385     // accessor does not initialize the |token_| member.
386     const base::UnguessableToken& raw_token() const;
387 
388     // Copyable and movable. Copying a Nonce triggers lazy-initialization,
389     // moving it does not.
390     Nonce(const Nonce&);
391     Nonce& operator=(const Nonce&);
392     Nonce(Nonce&&) noexcept;
393     Nonce& operator=(Nonce&&) noexcept;
394 
395     // Note that operator<, used by maps type containers, will trigger |token_|
396     // lazy-initialization. Equality comparisons do not.
397     bool operator<(const Nonce& other) const;
398     bool operator==(const Nonce& other) const;
399     bool operator!=(const Nonce& other) const;
400 
401    private:
402     friend class OriginTest;
403 
404     // mutable to support lazy generation.
405     mutable base::UnguessableToken token_;
406   };
407 
408   // This needs to be friended within Origin as well, since Nonce is a private
409   // nested class of Origin.
410   friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
411                                                         const Nonce& nonce);
412 
413   // Creates an origin without sanity checking that the host is canonicalized.
414   // This should only be used when converting between already normalized types,
415   // and should NOT be used for IPC. Method takes std::strings for use with move
416   // operators to avoid copies.
417   static Origin CreateOpaqueFromNormalizedPrecursorTuple(
418       std::string precursor_scheme,
419       std::string precursor_host,
420       uint16_t precursor_port,
421       const Nonce& nonce);
422 
423   // Creates an opaque Origin with the identity given by |nonce|, and an
424   // optional precursor origin given by |precursor_scheme|, |precursor_host| and
425   // |precursor_port|. Returns nullopt if any parameter is not canonical. When
426   // the precursor is unknown, the precursor parameters should be ("", "", 0).
427   //
428   // This factory method should be used in order to pass opaque Origin objects
429   // back and forth over IPC (as transitioning through GURL would risk
430   // potentially dangerous recanonicalization).
431   static std::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
432       std::string_view precursor_scheme,
433       std::string_view precursor_host,
434       uint16_t precursor_port,
435       const Nonce& nonce);
436 
437   // Constructs a non-opaque tuple origin. |tuple| must be valid.
438   explicit Origin(SchemeHostPort tuple);
439 
440   // Constructs an opaque origin derived from the |precursor| tuple, with the
441   // given |nonce|.
442   Origin(const Nonce& nonce, SchemeHostPort precursor);
443 
444   // Get the nonce associated with this origin, if it is opaque, or nullptr
445   // otherwise. This should be used only when trying to send an Origin across an
446   // IPC pipe.
447   const base::UnguessableToken* GetNonceForSerialization() const;
448 
449   // Serializes this Origin, including its nonce if it is opaque. If an opaque
450   // origin's |tuple_| is invalid nullopt is returned. If the nonce is not
451   // initialized, a nonce of 0 is used. Use of this method should be limited as
452   // an opaque origin will never be matchable in future browser sessions.
453   std::optional<std::string> SerializeWithNonce() const;
454 
455   // Like SerializeWithNonce(), but forces |nonce_| to be initialized prior to
456   // serializing.
457   std::optional<std::string> SerializeWithNonceAndInitIfNeeded();
458 
459   std::optional<std::string> SerializeWithNonceImpl() const;
460 
461   // Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the
462   // value was invalid in any way.
463   static std::optional<Origin> Deserialize(const std::string& value);
464 
465   // The tuple is used for both tuple origins (e.g. https://example.com:80), as
466   // well as for opaque origins, where it tracks the tuple origin from which
467   // the opaque origin was initially derived (we call this the "precursor"
468   // origin).
469   SchemeHostPort tuple_;
470 
471   // The nonce is used for maintaining identity of an opaque origin. This
472   // nonce is preserved when an opaque origin is copied or moved. An Origin
473   // is considered opaque if and only if |nonce_| holds a value.
474   std::optional<Nonce> nonce_;
475 };
476 
477 // Pretty-printers for logging. These expose the internal state of the nonce.
478 COMPONENT_EXPORT(URL)
479 std::ostream& operator<<(std::ostream& out, const Origin& origin);
480 COMPONENT_EXPORT(URL)
481 std::ostream& operator<<(std::ostream& out, const Origin::Nonce& origin);
482 
483 COMPONENT_EXPORT(URL) bool IsSameOriginWith(const GURL& a, const GURL& b);
484 
485 // DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) copies `origin` into a new
486 // stack-allocated variable named `<var_name>`. This helps ensure that the
487 // value of `origin` gets preserved in crash dumps.
488 #define DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) \
489   DEBUG_ALIAS_FOR_CSTR(var_name, (origin).Serialize().c_str(), 128)
490 
491 namespace debug {
492 
493 class COMPONENT_EXPORT(URL) ScopedOriginCrashKey {
494  public:
495   ScopedOriginCrashKey(base::debug::CrashKeyString* crash_key,
496                        const url::Origin* value);
497   ~ScopedOriginCrashKey();
498 
499   ScopedOriginCrashKey(const ScopedOriginCrashKey&) = delete;
500   ScopedOriginCrashKey& operator=(const ScopedOriginCrashKey&) = delete;
501 
502  private:
503   base::debug::ScopedCrashKeyString scoped_string_value_;
504 };
505 
506 }  // namespace debug
507 
508 }  // namespace url
509 
510 #endif  // URL_ORIGIN_H_
511