1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/data_url.h"
6
7 #include "base/memory/ref_counted.h"
8 #include "net/base/net_errors.h"
9 #include "net/http/http_response_headers.h"
10 #include "net/http/http_version.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "url/gurl.h"
13
14 namespace net {
15
16 namespace {
17
18 struct ParseTestData {
19 const char* url;
20 bool is_valid;
21 const char* mime_type;
22 const char* charset;
23 const std::string data;
24 };
25
26 } // namespace
27
TEST(DataURLTest,Parse)28 TEST(DataURLTest, Parse) {
29 const ParseTestData tests[] = {
30 {"data:", false, "", "", ""},
31
32 {"data:,", true, "text/plain", "US-ASCII", ""},
33
34 {"data:;base64,", true, "text/plain", "US-ASCII", ""},
35
36 {"data:;charset=,test", false, "", "", ""},
37
38 {"data:TeXt/HtMl,<b>x</b>", true, "text/html", "", "<b>x</b>"},
39
40 {"data:,foo", true, "text/plain", "US-ASCII", "foo"},
41
42 {"data:;base64,aGVsbG8gd29ybGQ=", true, "text/plain", "US-ASCII",
43 "hello world"},
44
45 // Allow invalid mediatype for backward compatibility but set mime_type to
46 // "text/plain" instead of the invalid mediatype.
47 {"data:foo,boo", true, "text/plain", "US-ASCII", "boo"},
48
49 // When accepting an invalid mediatype, override charset with "US-ASCII"
50 {"data:foo;charset=UTF-8,boo", true, "text/plain", "US-ASCII", "boo"},
51
52 // Invalid mediatype. Includes a slash but the type part is not a token.
53 {"data:f(oo/bar;baz=1;charset=kk,boo", true, "text/plain", "US-ASCII",
54 "boo"},
55
56 {"data:foo/bar;baz=1;charset=kk,boo", true, "foo/bar", "kk", "boo"},
57
58 {"data:foo/bar;charset=kk;baz=1,boo", true, "foo/bar", "kk", "boo"},
59
60 {"data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
61 "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
62 true, "text/html", "", "<html><body><b>hello world</b></body></html>"},
63
64 {"data:text/html,<html><body><b>hello world</b></body></html>", true,
65 "text/html", "", "<html><body><b>hello world</b></body></html>"},
66
67 // the comma cannot be url-escaped!
68 {"data:%2Cblah", false, "", "", ""},
69
70 // invalid base64 content
71 {"data:;base64,aGVs_-_-", false, "", "", ""},
72
73 // Spaces should be removed from non-text data URLs (we already tested
74 // spaces above).
75 {" bG8gd2 9ybGQ=", true, "text/plain", "US-ASCII",
80 "hello world"},
81
82 // Other whitespace should also be removed from anything base-64 encoded.
83 {"data:;base64,aGVs bG8gd2 \n9ybGQ=", true, "text/plain", "US-ASCII",
84 "hello world"},
85
86 // In base64 encoding, escaped whitespace should be stripped.
87 // (This test was taken from acid3)
88 // http://b/1054495
89 {"data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
90 "%20",
91 true, "text/javascript", "", "d4 = 'four';"},
92
93 // Only unescaped whitespace should be stripped in non-base64.
94 // http://b/1157796
95 {"data:img/png,A B %20 %0A C", true, "img/png", "", "AB \nC"},
96
97 {"data:text/plain;charset=utf-8;base64,SGVsbMO2", true, "text/plain",
98 "utf-8", "Hell\xC3\xB6"},
99
100 // no mimetype
101 {"data:;charset=utf-8;base64,SGVsbMO2", true, "text/plain", "utf-8",
102 "Hell\xC3\xB6"},
103
104 // Not sufficiently padded.
105 {"data:;base64,aGVsbG8gd29ybGQ", true, "text/plain", "US-ASCII",
106 "hello world"},
107
108 // Not sufficiently padded with whitespace.
109 {"data:;base64,aGV sbG8g d29ybGQ", true, "text/plain", "US-ASCII",
110 "hello world"},
111
112 // Not sufficiently padded with escaped whitespace.
113 {"data:;base64,aGV%20sbG8g%20d29ybGQ", true, "text/plain", "US-ASCII",
114 "hello world"},
115
116 // Bad encoding (truncated).
117 {"data:;base64,aGVsbG8gd29yb", false, "", "", ""},
118
119 // BiDi control characters should be unescaped and preserved as is, and
120 // should not be replaced with % versions. In the below case, \xE2\x80\x8F
121 // is the RTL mark and the parsed text should preserve it as is.
122 {"data:text/plain;charset=utf-8,\xE2\x80\x8Ftest", true, "text/plain",
123 "utf-8", "\xE2\x80\x8Ftest"},
124
125 // Same as above but with Arabic text after RTL mark.
126 {"data:text/plain;charset=utf-8,"
127 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
128 true, "text/plain", "utf-8",
129 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
130
131 // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
132 // wrapped in a GURL, this URL and the next effectively become the same as
133 // the previous two URLs.
134 {"data:text/plain;charset=utf-8,%E2%80%8Ftest", true, "text/plain",
135 "utf-8", "\xE2\x80\x8Ftest"},
136
137 // Same as above but with Arabic text after RTL mark.
138 {"data:text/plain;charset=utf-8,"
139 "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
140 true, "text/plain", "utf-8",
141 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
142
143 // The 'data' of a data URI does not include any ref it has.
144 {"data:text/plain,this/is/a/test/%23include/#dontinclude", true,
145 "text/plain", "", "this/is/a/test/#include/"},
146
147 // More unescaping tests and tests with nulls.
148 {"data:%00text/plain%41,foo", true, "%00text/plain%41", "", "foo"},
149 {"data:text/plain;charset=%00US-ASCII%41,foo", true, "text/plain",
150 "%00US-ASCII%41", "foo"},
151 {"data:text/plain,%00_%41", true, "text/plain", "",
152 std::string("\x00_A", 3)},
153 {"data:text/plain;base64,AA//", true, "text/plain", "",
154 std::string("\x00\x0F\xFF", 3)},
155 // "%62ase64" unescapes to base64, but should not be treated as such.
156 {"data:text/plain;%62ase64,AA//", true, "text/plain", "", "AA//"},
157 };
158
159 for (const auto& test : tests) {
160 SCOPED_TRACE(test.url);
161
162 std::string mime_type;
163 std::string charset;
164 std::string data;
165 bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
166 EXPECT_EQ(ok, test.is_valid);
167 EXPECT_EQ(test.mime_type, mime_type);
168 EXPECT_EQ(test.charset, charset);
169 EXPECT_EQ(test.data, data);
170 }
171 }
172
TEST(DataURLTest,BuildResponseSimple)173 TEST(DataURLTest, BuildResponseSimple) {
174 std::string mime_type;
175 std::string charset;
176 std::string data;
177 scoped_refptr<HttpResponseHeaders> headers;
178
179 ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:,Hello"), "GET", &mime_type,
180 &charset, &data, &headers));
181
182 EXPECT_EQ("text/plain", mime_type);
183 EXPECT_EQ("US-ASCII", charset);
184 EXPECT_EQ("Hello", data);
185
186 ASSERT_TRUE(headers);
187 const HttpVersion& version = headers->GetHttpVersion();
188 EXPECT_EQ(1, version.major_value());
189 EXPECT_EQ(1, version.minor_value());
190 EXPECT_EQ("OK", headers->GetStatusText());
191 std::string value;
192 EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
193 EXPECT_EQ(value, "text/plain;charset=US-ASCII");
194 value.clear();
195 }
196
TEST(DataURLTest,BuildResponseHead)197 TEST(DataURLTest, BuildResponseHead) {
198 for (const char* method : {"HEAD", "head", "hEaD"}) {
199 SCOPED_TRACE(method);
200
201 std::string mime_type;
202 std::string charset;
203 std::string data;
204 scoped_refptr<HttpResponseHeaders> headers;
205 ASSERT_EQ(OK,
206 DataURL::BuildResponse(GURL("data:,Hello"), method, &mime_type,
207 &charset, &data, &headers));
208
209 EXPECT_EQ("text/plain", mime_type);
210 EXPECT_EQ("US-ASCII", charset);
211 EXPECT_EQ("", data);
212
213 ASSERT_TRUE(headers);
214 HttpVersion version = headers->GetHttpVersion();
215 EXPECT_EQ(1, version.major_value());
216 EXPECT_EQ(1, version.minor_value());
217 EXPECT_EQ("OK", headers->GetStatusText());
218 std::string content_type;
219 EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &content_type));
220 EXPECT_EQ(content_type, "text/plain;charset=US-ASCII");
221 }
222 }
223
TEST(DataURLTest,BuildResponseInput)224 TEST(DataURLTest, BuildResponseInput) {
225 std::string mime_type;
226 std::string charset;
227 std::string data;
228 scoped_refptr<HttpResponseHeaders> headers;
229
230 ASSERT_EQ(ERR_INVALID_URL,
231 DataURL::BuildResponse(GURL("bogus"), "GET", &mime_type, &charset,
232 &data, &headers));
233 EXPECT_FALSE(headers);
234 EXPECT_TRUE(mime_type.empty());
235 EXPECT_TRUE(charset.empty());
236 EXPECT_TRUE(data.empty());
237 }
238
TEST(DataURLTest,BuildResponseInvalidMimeType)239 TEST(DataURLTest, BuildResponseInvalidMimeType) {
240 std::string mime_type;
241 std::string charset;
242 std::string data;
243 scoped_refptr<HttpResponseHeaders> headers;
244
245 // MIME type contains delimiters. Must be accepted but Content-Type header
246 // should be generated as if the mediatype was text/plain.
247 ASSERT_EQ(OK, DataURL::BuildResponse(GURL("data:f(o/b)r,test"), "GET",
248 &mime_type, &charset, &data, &headers));
249
250 ASSERT_TRUE(headers);
251 std::string value;
252 EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
253 EXPECT_EQ(value, "text/plain;charset=US-ASCII");
254 }
255
TEST(DataURLTest,InvalidCharset)256 TEST(DataURLTest, InvalidCharset) {
257 std::string mime_type;
258 std::string charset;
259 std::string data;
260 scoped_refptr<HttpResponseHeaders> headers;
261
262 // MIME type contains delimiters. Must be rejected.
263 ASSERT_EQ(ERR_INVALID_URL, DataURL::BuildResponse(
264 GURL("data:text/html;charset=(),test"), "GET",
265 &mime_type, &charset, &data, &headers));
266 EXPECT_FALSE(headers);
267 EXPECT_TRUE(mime_type.empty());
268 EXPECT_TRUE(charset.empty());
269 EXPECT_TRUE(data.empty());
270 }
271
272 // Test a slightly larger data URL.
TEST(DataURLTest,Image)273 TEST(DataURLTest, Image) {
274 // Use our nice little Chrome logo.
275 GURL image_url(
276 "data:image/png;base64,"
277 "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAADVklEQVQ4jX2TfUwUB"
278 "BjG3w1y+HGcd9dxhXR8T4awOccJGgOSWclHImznLkTlSw0DDQXkrmgYgbUYnlQTqQ"
279 "xIEVxitD5UMCATRA1CEEg+Qjw3bWDxIauJv/5oumqs39/P827vnucRmYN0gyF01GI"
280 "5MpCVdW0gO7tvNC+vqSEtbZefk5NuLv1jdJ46p/zw0HeH4+PHr3h7c1mjoV2t5rKz"
281 "Mx1+fg9bAgK6zHq9cU5z+LpA3xOtx34+vTeT21onRuzssC3zxbbSwC13d/pFuC7Ck"
282 "IMDxQpF7r/MWq12UctI1dWWm99ypqSYmRUBdKem8MkrO/kgaTt1O7YzlpzE5GIVd0"
283 "WYUqt57yWf2McHTObYPbVD+ZwbtlLTVMZ3BW+TnLyXLaWtmEq6WJVbT3HBh3Svj2H"
284 "QQcm43XwmtoYM6vVKleh0uoWvnzW3v3MpidruPTQPf0bia7sJOtBM0ufTWNvus/nk"
285 "DFHF9ZS+uYVjRUasMeHUmyLYtcklTvzWGFZnNOXczThvpKIzjcahSqIzkvDLayDq6"
286 "D3eOjtBbNUEIZYyqsvj4V4wY92eNJ4IoyhTbxXX1T5xsV9tm9r4TQwHLiZw/pdDZJ"
287 "ea8TKmsmR/K0uLh/GwnCHghTja6lPhphezPfO5/5MrVvMzNaI3+ERHfrFzPKQukrQ"
288 "GI4d/3EFD/3E2mVNYvi4at7CXWREaxZGD+3hg28zD3gVMd6q5c8GdosynKmSeRuGz"
289 "pjyl1/9UDGtPR5HeaKT8Wjo17WXk579BXVUhN64ehF9fhRtq/uxxZKzNiZFGD0wRC"
290 "3NFROZ5mwIPL/96K/rKMMLrIzF9uhHr+/sYH7DAbwlgC4J+R2Z7FUx1qLnV7MGF40"
291 "smVSoJ/jvHRfYhQeUJd/SnYtGWhPHR0Sz+GE2F2yth0B36Vcz2KpnufBJbsysjjW4"
292 "kblBUiIjiURUWqJY65zxbnTy57GQyH58zgy0QBtTQv5gH15XMdKkYu+TGaJMnlm2O"
293 "34uI4b9tflqp1+QEFGzoW/ulmcofcpkZCYJhDfSpme7QcrHa+Xfji8paEQkTkSfmm"
294 "oRWRNZr/F1KfVMjW+IKEnv2FwZfKdzt0BQR6lClcZR0EfEXEfv/G6W9iLiIyCoReV"
295 "5EnhORIBHx+ufPj/gLB/zGI/G4Bk0AAAAASUVORK5CYII=");
296
297 std::string mime_type;
298 std::string charset;
299 std::string data;
300 scoped_refptr<HttpResponseHeaders> headers;
301
302 EXPECT_EQ(OK, DataURL::BuildResponse(image_url, "GET", &mime_type, &charset,
303 &data, &headers));
304
305 EXPECT_EQ(911u, data.size());
306 EXPECT_EQ("image/png", mime_type);
307 EXPECT_TRUE(charset.empty());
308
309 ASSERT_TRUE(headers);
310 std::string value;
311 EXPECT_EQ(headers->GetStatusLine(), "HTTP/1.1 200 OK");
312 EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
313 EXPECT_EQ(value, "image/png");
314 }
315
316 } // namespace net
317