xref: /aosp_15_r20/external/cronet/net/base/data_url.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef NET_BASE_DATA_URL_H_
6 #define NET_BASE_DATA_URL_H_
7 
8 #include <string>
9 #include <string_view>
10 
11 #include "base/memory/scoped_refptr.h"
12 #include "net/base/net_errors.h"
13 #include "net/base/net_export.h"
14 
15 class GURL;
16 
17 namespace net {
18 
19 class HttpResponseHeaders;
20 
21 // See RFC 2397 for a complete description of the 'data' URL scheme.
22 //
23 // Briefly, a 'data' URL has the form:
24 //
25 //   data:[<mediatype>][;base64],<data>
26 //
27 // The <mediatype> is an Internet media type specification (with optional
28 // parameters.)  The appearance of ";base64" means that the data is encoded as
29 // base64.  Without ";base64", the data (as a sequence of octets) is represented
30 // using ASCII encoding for octets inside the range of safe URL characters and
31 // using the standard %xx hex encoding of URLs for octets outside that range.
32 // If <mediatype> is omitted, it defaults to text/plain;charset=US-ASCII.  As a
33 // shorthand, "text/plain" can be omitted but the charset parameter supplied.
34 //
35 class NET_EXPORT DataURL {
36  public:
37   // This method can be used to parse a 'data' URL into its component pieces.
38   //
39   // |mime_type| and |charset| must be non-null and point to empty strings.
40   //
41   // If |data| is null, then the <data> section will not be parsed or validated.
42   // If non-null, it must point to an empty string.
43   //
44   // The resulting mime_type is normalized to lowercase.  The data is the
45   // decoded data (e.g.., if the data URL specifies base64 encoding, then the
46   // returned data is base64 decoded, and any %-escaped bytes are unescaped).
47   //
48   // If the media type value doesn't match the media-type production defined in
49   // RFC 7231, mime_type will be set to the default value "text/plain". We
50   // don't simply fail for this grammar violation since Chromium had been
51   // accepting such invalid values. For example, <img> element with the src
52   // attribute set to a data URL with an invalid media type "image" (without a
53   // slash and subtype) had been displayed. However, the value this method will
54   // store in mime_type argument can be used for generating other headers, etc.
55   // This could lead to security vulnerability. We don't want to accept
56   // arbitrary value and ask each caller to validate the return value.
57   //
58   // If the charset parameter is specified but its value doesn't match the
59   // token production defined in RFC 7230, this method simply fails and returns
60   // false.
61   //
62   // If there's any other grammar violation in the URL, then this method will
63   // return false, and all passed in pointers will be unmodified. On success,
64   // true is returned.
65   [[nodiscard]] static bool Parse(const GURL& url,
66                                   std::string* mime_type,
67                                   std::string* charset,
68                                   std::string* data);
69 
70   // Similar to parse, except that it also generates a bogus set of response
71   // headers, with Content-Type populated, and takes a method. Only the "HEAD"
72   // method modifies the response, resulting in a 0-length body. All arguments
73   // except must be non-null. All std::string pointers must point to empty
74   // strings, and |*headers| must be nullptr. Returns net::OK on success.
75   [[nodiscard]] static Error BuildResponse(
76       const GURL& url,
77       std::string_view method,
78       std::string* mime_type,
79       std::string* charset,
80       std::string* data,
81       scoped_refptr<HttpResponseHeaders>* headers);
82 };
83 
84 }  // namespace net
85 
86 #endif  // NET_BASE_DATA_URL_H_
87