xref: /aosp_15_r20/external/cronet/net/base/data_url_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/data_url.h"
6 
7 #include "base/memory/ref_counted.h"
8 #include "net/base/net_errors.h"
9 #include "net/http/http_response_headers.h"
10 #include "net/http/http_version.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "url/gurl.h"
13 
14 namespace net {
15 
16 namespace {
17 
18 struct ParseTestData {
19   const char* url;
20   bool is_valid;
21   const char* mime_type;
22   const char* charset;
23   const std::string data;
24 };
25 
26 }  // namespace
27 
TEST(DataURLTest,Parse)28 TEST(DataURLTest, Parse) {
29   const ParseTestData tests[] = {
30       {"data:", false, "", "", ""},
31 
32       {"data:,", true, "text/plain", "US-ASCII", ""},
33 
34       {"data:;base64,", true, "text/plain", "US-ASCII", ""},
35 
36       {"data:;charset=,test", false, "", "", ""},
37 
38       {"data:TeXt/HtMl,<b>x</b>", true, "text/html", "", "<b>x</b>"},
39 
40       {"data:,foo", true, "text/plain", "US-ASCII", "foo"},
41 
42       {"data:;base64,aGVsbG8gd29ybGQ=", true, "text/plain", "US-ASCII",
43        "hello world"},
44 
45       // Allow invalid mediatype for backward compatibility but set mime_type to
46       // "text/plain" instead of the invalid mediatype.
47       {"data:foo,boo", true, "text/plain", "US-ASCII", "boo"},
48 
49       // When accepting an invalid mediatype, override charset with "US-ASCII"
50       {"data:foo;charset=UTF-8,boo", true, "text/plain", "US-ASCII", "boo"},
51 
52       // Invalid mediatype. Includes a slash but the type part is not a token.
53       {"data:f(oo/bar;baz=1;charset=kk,boo", true, "text/plain", "US-ASCII",
54        "boo"},
55 
56       {"data:foo/bar;baz=1;charset=kk,boo", true, "foo/bar", "kk", "boo"},
57 
58       {"data:foo/bar;charset=kk;baz=1,boo", true, "foo/bar", "kk", "boo"},
59 
60       {"data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
61        "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
62        true, "text/html", "", "<html><body><b>hello world</b></body></html>"},
63 
64       {"data:text/html,<html><body><b>hello world</b></body></html>", true,
65        "text/html", "", "<html><body><b>hello world</b></body></html>"},
66 
67       // the comma cannot be url-escaped!
68       {"data:%2Cblah", false, "", "", ""},
69 
70       // invalid base64 content
71       {"data:;base64,aGVs_-_-", false, "", "", ""},
72 
73       // Spaces should be removed from non-text data URLs (we already tested
74       // spaces above).
75       {" bG8gd2  9ybGQ=", true, "text/plain", "US-ASCII",
80        "hello world"},
81 
82       // Other whitespace should also be removed from anything base-64 encoded.
83       {"data:;base64,aGVs bG8gd2  \n9ybGQ=", true, "text/plain", "US-ASCII",
84        "hello world"},
85 
86       // In base64 encoding, escaped whitespace should be stripped.
87       // (This test was taken from acid3)
88       // http://b/1054495
89       {"data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
90        "%20",
91        true, "text/javascript", "", "d4 = 'four';"},
92 
93       // Only unescaped whitespace should be stripped in non-base64.
94       // http://b/1157796
95       {"data:img/png,A  B  %20  %0A  C", true, "img/png", "", "AB \nC"},
96 
97       {"data:text/plain;charset=utf-8;base64,SGVsbMO2", true, "text/plain",
98        "utf-8", "Hell\xC3\xB6"},
99 
100       // no mimetype
101       {"data:;charset=utf-8;base64,SGVsbMO2", true, "text/plain", "utf-8",
102        "Hell\xC3\xB6"},
103 
104       // Not sufficiently padded.
105       {"data:;base64,aGVsbG8gd29ybGQ", true, "text/plain", "US-ASCII",
106        "hello world"},
107 
108       // Not sufficiently padded with whitespace.
109       {"data:;base64,aGV sbG8g d29ybGQ", true, "text/plain", "US-ASCII",
110        "hello world"},
111 
112       // Not sufficiently padded with escaped whitespace.
113       {"data:;base64,aGV%20sbG8g%20d29ybGQ", true, "text/plain", "US-ASCII",
114        "hello world"},
115 
116       // Bad encoding (truncated).
117       {"data:;base64,aGVsbG8gd29yb", false, "", "", ""},
118 
119       // BiDi control characters should be unescaped and preserved as is, and
120       // should not be replaced with % versions. In the below case, \xE2\x80\x8F
121       // is the RTL mark and the parsed text should preserve it as is.
122       {"data:text/plain;charset=utf-8,\xE2\x80\x8Ftest", true, "text/plain",
123        "utf-8", "\xE2\x80\x8Ftest"},
124 
125       // Same as above but with Arabic text after RTL mark.
126       {"data:text/plain;charset=utf-8,"
127        "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
128        true, "text/plain", "utf-8",
129        "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
130 
131       // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
132       // wrapped in a GURL, this URL and the next effectively become the same as
133       // the previous two URLs.
134       {"data:text/plain;charset=utf-8,%E2%80%8Ftest", true, "text/plain",
135        "utf-8", "\xE2\x80\x8Ftest"},
136 
137       // Same as above but with Arabic text after RTL mark.
138       {"data:text/plain;charset=utf-8,"
139        "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
140        true, "text/plain", "utf-8",
141        "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
142 
143       // The 'data' of a data URI does not include any ref it has.
144       {"data:text/plain,this/is/a/test/%23include/#dontinclude", true,
145        "text/plain", "", "this/is/a/test/#include/"},
146 
147       // More unescaping tests and tests with nulls.
148       {"data:%00text/plain%41,foo", true, "%00text/plain%41", "", "foo"},
149       {"data:text/plain;charset=%00US-ASCII%41,foo", true, "text/plain",
150        "%00US-ASCII%41", "foo"},
151       {"data:text/plain,%00_%41", true, "text/plain", "",
152        std::string("\x00_A", 3)},
153       {"data:text/plain;base64,AA//", true, "text/plain", "",
154        std::string("\x00\x0F\xFF", 3)},
155       // "%62ase64" unescapes to base64, but should not be treated as such.
156       {"data:text/plain;%62ase64,AA//", true, "text/plain", "", "AA//"},
157   };
158 
159   for (const auto& test : tests) {
160     SCOPED_TRACE(test.url);
161 
162     std::string mime_type;
163     std::string charset;
164     std::string data;
165     bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
166     EXPECT_EQ(ok, test.is_valid);
167     EXPECT_EQ(test.mime_type, mime_type);
168     EXPECT_EQ(test.charset, charset);
169     EXPECT_EQ(test.data, data);
170   }
171 }
172 
TEST(DataURLTest,BuildResponseSimple)173 TEST(DataURLTest, BuildResponseSimple) {
174   std::string mime_type;
175   std::string charset;
176   std::string data;
177   scoped_refptr<HttpResponseHeaders> headers;
178 
179   ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:,Hello"), "GET", &mime_type,
180                                        &charset, &data, &headers));
181 
182   EXPECT_EQ("text/plain", mime_type);
183   EXPECT_EQ("US-ASCII", charset);
184   EXPECT_EQ("Hello", data);
185 
186   ASSERT_TRUE(headers);
187   const HttpVersion& version = headers->GetHttpVersion();
188   EXPECT_EQ(1, version.major_value());
189   EXPECT_EQ(1, version.minor_value());
190   EXPECT_EQ("OK", headers->GetStatusText());
191   std::string value;
192   EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
193   EXPECT_EQ(value, "text/plain;charset=US-ASCII");
194   value.clear();
195 }
196 
TEST(DataURLTest,BuildResponseHead)197 TEST(DataURLTest, BuildResponseHead) {
198   for (const char* method : {"HEAD", "head", "hEaD"}) {
199     SCOPED_TRACE(method);
200 
201     std::string mime_type;
202     std::string charset;
203     std::string data;
204     scoped_refptr<HttpResponseHeaders> headers;
205     ASSERT_EQ(OK,
206               DataURL::BuildResponse(GURL("data:,Hello"), method, &mime_type,
207                                      &charset, &data, &headers));
208 
209     EXPECT_EQ("text/plain", mime_type);
210     EXPECT_EQ("US-ASCII", charset);
211     EXPECT_EQ("", data);
212 
213     ASSERT_TRUE(headers);
214     HttpVersion version = headers->GetHttpVersion();
215     EXPECT_EQ(1, version.major_value());
216     EXPECT_EQ(1, version.minor_value());
217     EXPECT_EQ("OK", headers->GetStatusText());
218     std::string content_type;
219     EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &content_type));
220     EXPECT_EQ(content_type, "text/plain;charset=US-ASCII");
221   }
222 }
223 
TEST(DataURLTest,BuildResponseInput)224 TEST(DataURLTest, BuildResponseInput) {
225   std::string mime_type;
226   std::string charset;
227   std::string data;
228   scoped_refptr<HttpResponseHeaders> headers;
229 
230   ASSERT_EQ(ERR_INVALID_URL,
231             DataURL::BuildResponse(GURL("bogus"), "GET", &mime_type, &charset,
232                                    &data, &headers));
233   EXPECT_FALSE(headers);
234   EXPECT_TRUE(mime_type.empty());
235   EXPECT_TRUE(charset.empty());
236   EXPECT_TRUE(data.empty());
237 }
238 
TEST(DataURLTest,BuildResponseInvalidMimeType)239 TEST(DataURLTest, BuildResponseInvalidMimeType) {
240   std::string mime_type;
241   std::string charset;
242   std::string data;
243   scoped_refptr<HttpResponseHeaders> headers;
244 
245   // MIME type contains delimiters. Must be accepted but Content-Type header
246   // should be generated as if the mediatype was text/plain.
247   ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:f(o/b)r,test"), "GET",
248                                        &mime_type, &charset, &data, &headers));
249 
250   ASSERT_TRUE(headers);
251   std::string value;
252   EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
253   EXPECT_EQ(value, "text/plain;charset=US-ASCII");
254 }
255 
TEST(DataURLTest,InvalidCharset)256 TEST(DataURLTest, InvalidCharset) {
257   std::string mime_type;
258   std::string charset;
259   std::string data;
260   scoped_refptr<HttpResponseHeaders> headers;
261 
262   // MIME type contains delimiters. Must be rejected.
263   ASSERT_EQ(ERR_INVALID_URL, DataURL::BuildResponse(
264                                  GURL("data:text/html;charset=(),test"), "GET",
265                                  &mime_type, &charset, &data, &headers));
266   EXPECT_FALSE(headers);
267   EXPECT_TRUE(mime_type.empty());
268   EXPECT_TRUE(charset.empty());
269   EXPECT_TRUE(data.empty());
270 }
271 
272 // Test a slightly larger data URL.
TEST(DataURLTest,Image)273 TEST(DataURLTest, Image) {
274   // Use our nice little Chrome logo.
275   GURL image_url(
276       "data:image/png;base64,"
277       "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAADVklEQVQ4jX2TfUwUB"
278       "BjG3w1y+HGcd9dxhXR8T4awOccJGgOSWclHImznLkTlSw0DDQXkrmgYgbUYnlQTqQ"
279       "xIEVxitD5UMCATRA1CEEg+Qjw3bWDxIauJv/5oumqs39/P827vnucRmYN0gyF01GI"
280       "5MpCVdW0gO7tvNC+vqSEtbZefk5NuLv1jdJ46p/zw0HeH4+PHr3h7c1mjoV2t5rKz"
281       "Mx1+fg9bAgK6zHq9cU5z+LpA3xOtx34+vTeT21onRuzssC3zxbbSwC13d/pFuC7Ck"
282       "IMDxQpF7r/MWq12UctI1dWWm99ypqSYmRUBdKem8MkrO/kgaTt1O7YzlpzE5GIVd0"
283       "WYUqt57yWf2McHTObYPbVD+ZwbtlLTVMZ3BW+TnLyXLaWtmEq6WJVbT3HBh3Svj2H"
284       "QQcm43XwmtoYM6vVKleh0uoWvnzW3v3MpidruPTQPf0bia7sJOtBM0ufTWNvus/nk"
285       "DFHF9ZS+uYVjRUasMeHUmyLYtcklTvzWGFZnNOXczThvpKIzjcahSqIzkvDLayDq6"
286       "D3eOjtBbNUEIZYyqsvj4V4wY92eNJ4IoyhTbxXX1T5xsV9tm9r4TQwHLiZw/pdDZJ"
287       "ea8TKmsmR/K0uLh/GwnCHghTja6lPhphezPfO5/5MrVvMzNaI3+ERHfrFzPKQukrQ"
288       "GI4d/3EFD/3E2mVNYvi4at7CXWREaxZGD+3hg28zD3gVMd6q5c8GdosynKmSeRuGz"
289       "pjyl1/9UDGtPR5HeaKT8Wjo17WXk579BXVUhN64ehF9fhRtq/uxxZKzNiZFGD0wRC"
290       "3NFROZ5mwIPL/96K/rKMMLrIzF9uhHr+/sYH7DAbwlgC4J+R2Z7FUx1qLnV7MGF40"
291       "smVSoJ/jvHRfYhQeUJd/SnYtGWhPHR0Sz+GE2F2yth0B36Vcz2KpnufBJbsysjjW4"
292       "kblBUiIjiURUWqJY65zxbnTy57GQyH58zgy0QBtTQv5gH15XMdKkYu+TGaJMnlm2O"
293       "34uI4b9tflqp1+QEFGzoW/ulmcofcpkZCYJhDfSpme7QcrHa+Xfji8paEQkTkSfmm"
294       "oRWRNZr/F1KfVMjW+IKEnv2FwZfKdzt0BQR6lClcZR0EfEXEfv/G6W9iLiIyCoReV"
295       "5EnhORIBHx+ufPj/gLB/zGI/G4Bk0AAAAASUVORK5CYII=");
296 
297   std::string mime_type;
298   std::string charset;
299   std::string data;
300   scoped_refptr<HttpResponseHeaders> headers;
301 
302   EXPECT_EQ(OK, DataURL::BuildResponse(image_url, "GET", &mime_type, &charset,
303                                        &data, &headers));
304 
305   EXPECT_EQ(911u, data.size());
306   EXPECT_EQ("image/png", mime_type);
307   EXPECT_TRUE(charset.empty());
308 
309   ASSERT_TRUE(headers);
310   std::string value;
311   EXPECT_EQ(headers->GetStatusLine(), "HTTP/1.1 200 OK");
312   EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
313   EXPECT_EQ(value, "image/png");
314 }
315 
316 }  // namespace net
317