xref: /aosp_15_r20/external/cronet/net/base/url_util_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/url_util.h"
6 
7 #include <optional>
8 #include <ostream>
9 
10 #include "base/format_macros.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "base/test/scoped_feature_list.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "url/gurl.h"
15 #include "url/scheme_host_port.h"
16 #include "url/url_features.h"
17 #include "url/url_util.h"
18 
19 using base::ASCIIToUTF16;
20 using base::WideToUTF16;
21 
22 namespace net {
23 namespace {
24 
TEST(UrlUtilTest,AppendQueryParameter)25 TEST(UrlUtilTest, AppendQueryParameter) {
26   // Appending a name-value pair to a URL without a query component.
27   EXPECT_EQ("http://example.com/path?name=value",
28             AppendQueryParameter(GURL("http://example.com/path"),
29                                  "name", "value").spec());
30 
31   // Appending a name-value pair to a URL with a query component.
32   // The original component should be preserved, and the new pair should be
33   // appended with '&'.
34   EXPECT_EQ("http://example.com/path?existing=one&name=value",
35             AppendQueryParameter(GURL("http://example.com/path?existing=one"),
36                                  "name", "value").spec());
37 
38   // Appending a name-value pair with unsafe characters included. The
39   // unsafe characters should be escaped.
40   EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
41             AppendQueryParameter(GURL("http://example.com/path?existing=one"),
42                                  "na me", "v.alue=")
43                 .spec());
44 }
45 
TEST(UrlUtilTest,AppendOrReplaceQueryParameter)46 TEST(UrlUtilTest, AppendOrReplaceQueryParameter) {
47   // Appending a name-value pair to a URL without a query component.
48   EXPECT_EQ("http://example.com/path?name=value",
49             AppendOrReplaceQueryParameter(GURL("http://example.com/path"),
50                                  "name", "value").spec());
51 
52   // Appending a name-value pair to a URL with a query component.
53   // The original component should be preserved, and the new pair should be
54   // appended with '&'.
55   EXPECT_EQ("http://example.com/path?existing=one&name=value",
56       AppendOrReplaceQueryParameter(
57           GURL("http://example.com/path?existing=one"),
58           "name", "value").spec());
59 
60   // Appending a name-value pair with unsafe characters included. The
61   // unsafe characters should be escaped.
62   EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
63       AppendOrReplaceQueryParameter(
64           GURL("http://example.com/path?existing=one"),
65           "na me", "v.alue=").spec());
66 
67   // Replace value of an existing paramater.
68   EXPECT_EQ("http://example.com/path?existing=one&name=new",
69       AppendOrReplaceQueryParameter(
70           GURL("http://example.com/path?existing=one&name=old"),
71           "name", "new").spec());
72 
73   // Replace a name-value pair with unsafe characters included. The
74   // unsafe characters should be escaped.
75   EXPECT_EQ("http://example.com/path?na+me=n.ew%3D&existing=one",
76       AppendOrReplaceQueryParameter(
77           GURL("http://example.com/path?na+me=old&existing=one"),
78           "na me", "n.ew=").spec());
79 
80   // Replace the value of first parameter with this name only.
81   EXPECT_EQ("http://example.com/path?name=new&existing=one&name=old",
82       AppendOrReplaceQueryParameter(
83           GURL("http://example.com/path?name=old&existing=one&name=old"),
84           "name", "new").spec());
85 
86   // Preserve the content of the original params regardless of our failure to
87   // interpret them correctly.
88   EXPECT_EQ("http://example.com/path?bar&name=new&left=&"
89             "=right&=&&name=again",
90       AppendOrReplaceQueryParameter(
91           GURL("http://example.com/path?bar&name=old&left=&"
92                 "=right&=&&name=again"),
93           "name", "new").spec());
94 
95   // ----- Removing the key using nullopt value -----
96 
97   // Removes the name-value pair from the URL preserving other query parameters.
98   EXPECT_EQ("http://example.com/path?abc=xyz",
99             AppendOrReplaceQueryParameter(
100                 GURL("http://example.com/path?name=value&abc=xyz"), "name",
101                 std::nullopt)
102                 .spec());
103 
104   // Removes the name-value pair from the URL.
105   EXPECT_EQ("http://example.com/path?",
106             AppendOrReplaceQueryParameter(
107                 GURL("http://example.com/path?existing=one"), "existing",
108                 std::nullopt)
109                 .spec());
110 
111   // Removes the first name-value pair.
112   EXPECT_EQ("http://example.com/path?c=d&e=f",
113             AppendOrReplaceQueryParameter(
114                 GURL("http://example.com/path?a=b&c=d&e=f"), "a", std::nullopt)
115                 .spec());
116 
117   // Removes a name-value pair in between two query params.
118   EXPECT_EQ(
119       "http://example.com/path?existing=one&hello=world",
120       AppendOrReplaceQueryParameter(
121           GURL("http://example.com/path?existing=one&replace=sure&hello=world"),
122           "replace", std::nullopt)
123           .spec());
124 
125   // Removes the last name-value pair.
126   EXPECT_EQ("http://example.com/path?existing=one",
127             AppendOrReplaceQueryParameter(
128                 GURL("http://example.com/path?existing=one&replace=sure"),
129                 "replace", std::nullopt)
130                 .spec());
131 
132   // Removing a name-value pair with unsafe characters included. The
133   // unsafe characters should be escaped.
134   EXPECT_EQ("http://example.com/path?existing=one&hello=world",
135             AppendOrReplaceQueryParameter(
136                 GURL("http://example.com/"
137                      "path?existing=one&na+me=v.alue%3D&hello=world"),
138                 "na me", std::nullopt)
139                 .spec());
140 
141   // Does nothing if the provided query param key does not exist.
142   EXPECT_EQ("http://example.com/path?existing=one&name=old",
143             AppendOrReplaceQueryParameter(
144                 GURL("http://example.com/path?existing=one&name=old"), "old",
145                 std::nullopt)
146                 .spec());
147 
148   // Remove the value of first parameter with this name only.
149   EXPECT_EQ(
150       "http://example.com/path?existing=one&name=old",
151       AppendOrReplaceQueryParameter(
152           GURL("http://example.com/path?name=something&existing=one&name=old"),
153           "name", std::nullopt)
154           .spec());
155 
156   // Preserve the content of the original params regardless of our failure to
157   // interpret them correctly.
158   EXPECT_EQ(
159       "http://example.com/path?bar&left=&"
160       "=right&=&&name=again",
161       AppendOrReplaceQueryParameter(
162           GURL("http://example.com/path?bar&name=old&left=&"
163                "=right&=&&name=again"),
164           "name", std::nullopt)
165           .spec());
166 }
167 
TEST(UrlUtilTest,AppendOrReplaceRef)168 TEST(UrlUtilTest, AppendOrReplaceRef) {
169   // Setting a new ref should append it.
170   EXPECT_EQ("http://example.com/path#ref",
171             AppendOrReplaceRef(GURL("http://example.com/path"), "ref").spec());
172 
173   // Setting a ref over an existing one should replace it.
174   EXPECT_EQ("http://example.com/path#ref",
175             AppendOrReplaceRef(GURL("http://example.com/path#old_ref"), "ref")
176                 .spec());
177 
178   // Setting a ref on a url with existing query parameters should simply append
179   // it at the end
180   EXPECT_EQ(
181       "http://example.com/path?query=value#ref",
182       AppendOrReplaceRef(GURL("http://example.com/path?query=value#ref"), "ref")
183           .spec());
184 
185   // Setting a ref on a url with existing query parameters and with special
186   // encoded characters: `special-chars?query=value#ref chars%\";'`
187   EXPECT_EQ(
188       "http://example.com/special-chars?query=value#ref%20chars%%22;'",
189       AppendOrReplaceRef(GURL("http://example.com/special-chars?query=value"),
190                          "ref chars%\";'")
191           .spec());
192 
193   // Testing adding a ref to a URL with specially encoded characters.
194   // `special chars%\";'?query=value#ref`
195   EXPECT_EQ(
196       "http://example.com/special%20chars%%22;'?query=value#ref",
197       AppendOrReplaceRef(
198           GURL("http://example.com/special chars%\";'?query=value"), "ref")
199           .spec());
200 }
201 
TEST(UrlUtilTest,GetValueForKeyInQuery)202 TEST(UrlUtilTest, GetValueForKeyInQuery) {
203   GURL url("http://example.com/path?name=value&boolParam&"
204            "url=http://test.com/q?n1%3Dv1%26n2");
205   std::string value;
206 
207   // False when getting a non-existent query param.
208   EXPECT_FALSE(GetValueForKeyInQuery(url, "non-exist", &value));
209 
210   // True when query param exist.
211   EXPECT_TRUE(GetValueForKeyInQuery(url, "name", &value));
212   EXPECT_EQ("value", value);
213 
214   EXPECT_TRUE(GetValueForKeyInQuery(url, "boolParam", &value));
215   EXPECT_EQ("", value);
216 
217   EXPECT_TRUE(GetValueForKeyInQuery(url, "url", &value));
218   EXPECT_EQ("http://test.com/q?n1=v1&n2", value);
219 }
220 
TEST(UrlUtilTest,GetValueForKeyInQueryInvalidURL)221 TEST(UrlUtilTest, GetValueForKeyInQueryInvalidURL) {
222   GURL url("http://%01/?test");
223   std::string value;
224 
225   // Always false when parsing an invalid URL.
226   EXPECT_FALSE(GetValueForKeyInQuery(url, "test", &value));
227 }
228 
TEST(UrlUtilTest,ParseQuery)229 TEST(UrlUtilTest, ParseQuery) {
230   const GURL url("http://example.com/path?name=value&boolParam&"
231                  "url=http://test.com/q?n1%3Dv1%26n2&"
232                  "multikey=value1&multikey=value2&multikey");
233   QueryIterator it(url);
234 
235   ASSERT_FALSE(it.IsAtEnd());
236   EXPECT_EQ("name", it.GetKey());
237   EXPECT_EQ("value", it.GetValue());
238   EXPECT_EQ("value", it.GetUnescapedValue());
239   it.Advance();
240 
241   ASSERT_FALSE(it.IsAtEnd());
242   EXPECT_EQ("boolParam", it.GetKey());
243   EXPECT_EQ("", it.GetValue());
244   EXPECT_EQ("", it.GetUnescapedValue());
245   it.Advance();
246 
247   ASSERT_FALSE(it.IsAtEnd());
248   EXPECT_EQ("url", it.GetKey());
249   EXPECT_EQ("http://test.com/q?n1%3Dv1%26n2", it.GetValue());
250   EXPECT_EQ("http://test.com/q?n1=v1&n2", it.GetUnescapedValue());
251   it.Advance();
252 
253   ASSERT_FALSE(it.IsAtEnd());
254   EXPECT_EQ("multikey", it.GetKey());
255   EXPECT_EQ("value1", it.GetValue());
256   EXPECT_EQ("value1", it.GetUnescapedValue());
257   it.Advance();
258 
259   ASSERT_FALSE(it.IsAtEnd());
260   EXPECT_EQ("multikey", it.GetKey());
261   EXPECT_EQ("value2", it.GetValue());
262   EXPECT_EQ("value2", it.GetUnescapedValue());
263   it.Advance();
264 
265   ASSERT_FALSE(it.IsAtEnd());
266   EXPECT_EQ("multikey", it.GetKey());
267   EXPECT_EQ("", it.GetValue());
268   EXPECT_EQ("", it.GetUnescapedValue());
269   it.Advance();
270 
271   EXPECT_TRUE(it.IsAtEnd());
272 }
273 
TEST(UrlUtilTest,ParseQueryInvalidURL)274 TEST(UrlUtilTest, ParseQueryInvalidURL) {
275   const GURL url("http://%01/?test");
276   QueryIterator it(url);
277   EXPECT_TRUE(it.IsAtEnd());
278 }
279 
TEST(UrlUtilTest,ParseHostAndPort)280 TEST(UrlUtilTest, ParseHostAndPort) {
281   const struct {
282     const char* const input;
283     bool success;
284     const char* const expected_host;
285     int expected_port;
286   } tests[] = {
287     // Valid inputs:
288     {"foo:10", true, "foo", 10},
289     {"foo", true, "foo", -1},
290     {
291       "[1080:0:0:0:8:800:200C:4171]:11",
292       true,
293       "1080:0:0:0:8:800:200C:4171",
294       11
295     },
296     {
297       "[1080:0:0:0:8:800:200C:4171]",
298       true,
299       "1080:0:0:0:8:800:200C:4171",
300       -1
301     },
302 
303     // Because no validation is done on the host, the following are accepted,
304     // even though they are invalid names.
305     {"]", true, "]", -1},
306     {"::1", true, ":", 1},
307     // Invalid inputs:
308     {"foo:bar", false, "", -1},
309     {"foo:", false, "", -1},
310     {":", false, "", -1},
311     {":80", false, "", -1},
312     {"", false, "", -1},
313     {"porttoolong:300000", false, "", -1},
314     {"usrname@host", false, "", -1},
315     {"usrname:password@host", false, "", -1},
316     {":password@host", false, "", -1},
317     {":password@host:80", false, "", -1},
318     {":password@host", false, "", -1},
319     {"@host", false, "", -1},
320     {"[", false, "", -1},
321     {"[]", false, "", -1},
322   };
323 
324   for (const auto& test : tests) {
325     std::string host;
326     int port;
327     bool ok = ParseHostAndPort(test.input, &host, &port);
328     EXPECT_EQ(test.success, ok);
329 
330     if (test.success) {
331       EXPECT_EQ(test.expected_host, host);
332       EXPECT_EQ(test.expected_port, port);
333     }
334   }
335 }
TEST(UrlUtilTest,GetHostAndPort)336 TEST(UrlUtilTest, GetHostAndPort) {
337   const struct {
338     GURL url;
339     const char* const expected_host_and_port;
340   } tests[] = {
341     { GURL("http://www.foo.com/x"), "www.foo.com:80"},
342     { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
343 
344     // For IPv6 literals should always include the brackets.
345     { GURL("http://[1::2]/x"), "[1::2]:80"},
346     { GURL("http://[::a]:33/x"), "[::a]:33"},
347   };
348   for (const auto& test : tests) {
349     std::string host_and_port = GetHostAndPort(test.url);
350     EXPECT_EQ(std::string(test.expected_host_and_port), host_and_port);
351   }
352 }
353 
TEST(UrlUtilTest,GetHostAndOptionalPort)354 TEST(UrlUtilTest, GetHostAndOptionalPort) {
355   const struct {
356     GURL url;
357     const char* const expected_host_and_port;
358   } tests[] = {
359       {GURL("http://www.foo.com/x"), "www.foo.com"},
360       {GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
361       {GURL("http://www.foo.com:443/x"), "www.foo.com:443"},
362 
363       {GURL("https://www.foo.com/x"), "www.foo.com"},
364       {GURL("https://www.foo.com:80/x"), "www.foo.com:80"},
365 
366       // For IPv6 literals should always include the brackets.
367       {GURL("http://[1::2]/x"), "[1::2]"},
368       {GURL("http://[::a]:33/x"), "[::a]:33"},
369   };
370   for (const auto& test : tests) {
371     EXPECT_EQ(test.expected_host_and_port, GetHostAndOptionalPort(test.url));
372     // Also test the SchemeHostPort variant.
373     EXPECT_EQ(test.expected_host_and_port,
374               GetHostAndOptionalPort(url::SchemeHostPort(test.url)));
375   }
376 }
377 
TEST(UrlUtilTest,GetHostOrSpecFromURL)378 TEST(UrlUtilTest, GetHostOrSpecFromURL) {
379   EXPECT_EQ("example.com",
380             GetHostOrSpecFromURL(GURL("http://example.com/test")));
381   EXPECT_EQ("example.com",
382             GetHostOrSpecFromURL(GURL("http://example.com./test")));
383   EXPECT_EQ("file:///tmp/test.html",
384             GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
385 }
386 
TEST(UrlUtilTest,GetSuperdomain)387 TEST(UrlUtilTest, GetSuperdomain) {
388   struct {
389     const char* const domain;
390     const char* const expected_superdomain;
391   } tests[] = {
392       // Basic cases
393       {"foo.bar.example", "bar.example"},
394       {"bar.example", "example"},
395       {"example", ""},
396 
397       // Returned value may be an eTLD.
398       {"google.com", "com"},
399       {"google.co.uk", "co.uk"},
400 
401       // Weird cases.
402       {"", ""},
403       {"has.trailing.dot.", "trailing.dot."},
404       {"dot.", ""},
405       {".has.leading.dot", "has.leading.dot"},
406       {".", ""},
407       {"..", "."},
408       {"127.0.0.1", "0.0.1"},
409   };
410 
411   for (const auto& test : tests) {
412     EXPECT_EQ(test.expected_superdomain, GetSuperdomain(test.domain));
413   }
414 }
415 
TEST(UrlUtilTest,IsSubdomainOf)416 TEST(UrlUtilTest, IsSubdomainOf) {
417   struct {
418     const char* subdomain;
419     const char* superdomain;
420     bool is_subdomain;
421   } tests[] = {
422       {"bar.foo.com", "foo.com", true},
423       {"barfoo.com", "foo.com", false},
424       {"bar.foo.com", "com", true},
425       {"bar.foo.com", "other.com", false},
426       {"bar.foo.com", "bar.foo.com", true},
427       {"bar.foo.com", "baz.foo.com", false},
428       {"bar.foo.com", "baz.bar.foo.com", false},
429       {"bar.foo.com", "ar.foo.com", false},
430       {"foo.com", "foo.com.", false},
431       {"bar.foo.com", "foo.com.", false},
432       {"", "", true},
433       {"a", "", false},
434       {"", "a", false},
435       {"127.0.0.1", "0.0.1", true},  // Don't do this...
436   };
437 
438   for (const auto& test : tests) {
439     EXPECT_EQ(test.is_subdomain,
440               IsSubdomainOf(test.subdomain, test.superdomain));
441   }
442 }
443 
TEST(UrlUtilTest,CompliantHost)444 TEST(UrlUtilTest, CompliantHost) {
445   struct {
446     const char* const host;
447     bool expected_output;
448   } compliant_host_cases[] = {
449       {"", false},
450       {"a", true},
451       {"-", false},
452       {"_", false},
453       {".", false},
454       {"9", true},
455       {"9a", true},
456       {"9_", true},
457       {"a.", true},
458       {".a", false},
459       {"a.a", true},
460       {"9.a", true},
461       {"a.9", true},
462       {"_9a", false},
463       {"-9a", false},
464       {"a.a9", true},
465       {"_.9a", true},
466       {"a.-a9", false},
467       {"a+9a", false},
468       {"-a.a9", true},
469       {"a_.a9", true},
470       {"1-.a-b", true},
471       {"1_.a-b", true},
472       {"1-2.a_b", true},
473       {"a.b.c.d.e", true},
474       {"1.2.3.4.5", true},
475       {"1.2.3..4.5", false},
476       {"1.2.3.4.5.", true},
477       {"1.2.3.4.5..", false},
478       {"%20%20noodles.blorg", false},
479       {"noo dles.blorg ", false},
480       {"noo dles.blorg. ", false},
481       {"^noodles.blorg", false},
482       {"noodles^.blorg", false},
483       {"noo&dles.blorg", false},
484       {"noodles.blorg`", false},
485       {"www.noodles.blorg", true},
486       {"1www.noodles.blorg", true},
487       {"www.2noodles.blorg", true},
488       {"www.n--oodles.blorg", true},
489       {"www.noodl_es.blorg", true},
490       {"www.no-_odles.blorg", true},
491       {"www_.noodles.blorg", true},
492       {"www.noodles.blorg.", true},
493       {"_privet._tcp.local", true},
494       // 63-char label (before or without dot) allowed
495       {"z23456789a123456789a123456789a123456789a123456789a123456789a123", true},
496       {"z23456789a123456789a123456789a123456789a123456789a123456789a123.",
497        true},
498       // 64-char label (before or without dot) disallowed
499       {"123456789a123456789a123456789a123456789a123456789a123456789a1234",
500        false},
501       {"123456789a123456789a123456789a123456789a123456789a123456789a1234.",
502        false},
503       // 253-char host allowed
504       {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
505        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
506        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
507        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc",
508        true},
509       // 253-char+dot host allowed
510       {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
511        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
512        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
513        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc.",
514        true},
515       // 254-char host disallowed
516       {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
517        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
518        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
519        "123456789.123456789.123456789.123456789.1234",
520        false},
521       // 254-char+dot host disallowed
522       {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
523        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
524        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
525        "123456789.123456789.123456789.123456789.1234.",
526        false},
527   };
528 
529   for (const auto& compliant_host : compliant_host_cases) {
530     EXPECT_EQ(compliant_host.expected_output,
531               IsCanonicalizedHostCompliant(compliant_host.host))
532         << compliant_host.host;
533   }
534 }
535 
536 struct NonUniqueNameTestData {
537   bool is_unique;
538   const char* const hostname;
539 };
540 
541 // Google Test pretty-printer.
PrintTo(const NonUniqueNameTestData & data,std::ostream * os)542 void PrintTo(const NonUniqueNameTestData& data, std::ostream* os) {
543   ASSERT_TRUE(data.hostname);
544   *os << " hostname: " << testing::PrintToString(data.hostname)
545       << "; is_unique: " << testing::PrintToString(data.is_unique);
546 }
547 
548 const NonUniqueNameTestData kNonUniqueNameTestData[] = {
549     // Domains under ICANN-assigned domains.
550     { true, "google.com" },
551     { true, "google.co.uk" },
552     // Domains under private registries.
553     { true, "appspot.com" },
554     { true, "test.appspot.com" },
555     // Unreserved IPv4 addresses (in various forms).
556     { true, "8.8.8.8" },
557     { true, "99.64.0.0" },
558     { true, "212.15.0.0" },
559     { true, "212.15" },
560     { true, "212.15.0" },
561     { true, "3557752832" },
562     // Reserved IPv4 addresses (in various forms).
563     { false, "192.168.0.0" },
564     { false, "192.168.0.6" },
565     { false, "10.0.0.5" },
566     { false, "10.0" },
567     { false, "10.0.0" },
568     { false, "3232235526" },
569     // Unreserved IPv6 addresses.
570     { true, "FFC0:ba98:7654:3210:FEDC:BA98:7654:3210" },
571     { true, "2000:ba98:7654:2301:EFCD:BA98:7654:3210" },
572     // Reserved IPv6 addresses.
573     { false, "::192.9.5.5" },
574     { false, "FEED::BEEF" },
575     { false, "FEC0:ba98:7654:3210:FEDC:BA98:7654:3210" },
576     // 'internal'/non-IANA assigned domains.
577     { false, "intranet" },
578     { false, "intranet." },
579     { false, "intranet.example" },
580     { false, "host.intranet.example" },
581     // gTLDs under discussion, but not yet assigned.
582     { false, "intranet.corp" },
583     { false, "intranet.internal" },
584     // Invalid host names are treated as unique - but expected to be
585     // filtered out before then.
586     { true, "junk)(£)$*!@~#" },
587     { true, "w$w.example.com" },
588     { true, "nocolonsallowed:example" },
589     { true, "[::4.5.6.9]" },
590 };
591 
592 class UrlUtilNonUniqueNameTest
593     : public testing::TestWithParam<NonUniqueNameTestData> {
594  public:
595   ~UrlUtilNonUniqueNameTest() override = default;
596 
597  protected:
IsUnique(const std::string & hostname)598   bool IsUnique(const std::string& hostname) {
599     return !IsHostnameNonUnique(hostname);
600   }
601 };
602 
603 // Test that internal/non-unique names are properly identified as such, but
604 // that IP addresses and hosts beneath registry-controlled domains are flagged
605 // as unique names.
TEST_P(UrlUtilNonUniqueNameTest,IsHostnameNonUnique)606 TEST_P(UrlUtilNonUniqueNameTest, IsHostnameNonUnique) {
607   const NonUniqueNameTestData& test_data = GetParam();
608 
609   EXPECT_EQ(test_data.is_unique, IsUnique(test_data.hostname));
610 }
611 
612 INSTANTIATE_TEST_SUITE_P(All,
613                          UrlUtilNonUniqueNameTest,
614                          testing::ValuesIn(kNonUniqueNameTestData));
615 
TEST(UrlUtilTest,IsLocalhost)616 TEST(UrlUtilTest, IsLocalhost) {
617   EXPECT_TRUE(HostStringIsLocalhost("localhost"));
618   EXPECT_TRUE(HostStringIsLocalhost("localHosT"));
619   EXPECT_TRUE(HostStringIsLocalhost("localhost."));
620   EXPECT_TRUE(HostStringIsLocalhost("localHost."));
621   EXPECT_TRUE(HostStringIsLocalhost("127.0.0.1"));
622   EXPECT_TRUE(HostStringIsLocalhost("127.0.1.0"));
623   EXPECT_TRUE(HostStringIsLocalhost("127.1.0.0"));
624   EXPECT_TRUE(HostStringIsLocalhost("127.0.0.255"));
625   EXPECT_TRUE(HostStringIsLocalhost("127.0.255.0"));
626   EXPECT_TRUE(HostStringIsLocalhost("127.255.0.0"));
627   EXPECT_TRUE(HostStringIsLocalhost("::1"));
628   EXPECT_TRUE(HostStringIsLocalhost("0:0:0:0:0:0:0:1"));
629   EXPECT_TRUE(HostStringIsLocalhost("foo.localhost"));
630   EXPECT_TRUE(HostStringIsLocalhost("foo.localhost."));
631   EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST"));
632   EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST."));
633 
634   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain"));
635   EXPECT_FALSE(HostStringIsLocalhost("localhost.localDOMain"));
636   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain."));
637   EXPECT_FALSE(HostStringIsLocalhost("localhost6"));
638   EXPECT_FALSE(HostStringIsLocalhost("localhost6."));
639   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6"));
640   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6."));
641 
642   EXPECT_FALSE(HostStringIsLocalhost("localhostx"));
643   EXPECT_FALSE(HostStringIsLocalhost("localhost.x"));
644   EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain"));
645   EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain.x"));
646   EXPECT_FALSE(HostStringIsLocalhost("localhost6x"));
647   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain6"));
648   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain"));
649   EXPECT_FALSE(HostStringIsLocalhost("127.0.0.1.1"));
650   EXPECT_FALSE(HostStringIsLocalhost(".127.0.0.255"));
651   EXPECT_FALSE(HostStringIsLocalhost("::2"));
652   EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
653   EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:1:0:0:1"));
654   EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
655   EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:0:0:0:0:1"));
656   EXPECT_FALSE(HostStringIsLocalhost("foo.localhost.com"));
657   EXPECT_FALSE(HostStringIsLocalhost("foo.localhoste"));
658   EXPECT_FALSE(HostStringIsLocalhost("foo.localhos"));
659   EXPECT_FALSE(HostStringIsLocalhost("[::1]"));
660 
661   GURL localhost6("http://[::1]/");
662   EXPECT_TRUE(IsLocalhost(localhost6));
663 }
664 
665 class UrlUtilTypedTest : public ::testing::TestWithParam<bool> {
666  public:
UrlUtilTypedTest()667   UrlUtilTypedTest()
668       : use_standard_compliant_non_special_scheme_url_parsing_(GetParam()) {
669     if (use_standard_compliant_non_special_scheme_url_parsing_) {
670       scoped_feature_list_.InitAndEnableFeature(
671           url::kStandardCompliantNonSpecialSchemeURLParsing);
672     } else {
673       scoped_feature_list_.InitAndDisableFeature(
674           url::kStandardCompliantNonSpecialSchemeURLParsing);
675     }
676   }
677 
678  protected:
679   bool use_standard_compliant_non_special_scheme_url_parsing_;
680 
681  private:
682   base::test::ScopedFeatureList scoped_feature_list_;
683 };
684 
685 INSTANTIATE_TEST_SUITE_P(All, UrlUtilTypedTest, ::testing::Bool());
686 
TEST(UrlUtilTest,SimplifyUrlForRequest)687 TEST(UrlUtilTest, SimplifyUrlForRequest) {
688   struct {
689     const char* const input_url;
690     const char* const expected_simplified_url;
691   } tests[] = {
692     {
693       // Reference section should be stripped.
694       "http://www.google.com:78/foobar?query=1#hash",
695       "http://www.google.com:78/foobar?query=1",
696     },
697     {
698       // Reference section can itself contain #.
699       "http://192.168.0.1?query=1#hash#10#11#13#14",
700       "http://192.168.0.1?query=1",
701     },
702     { // Strip username/password.
703       "http://user:[email protected]",
704       "http://google.com/",
705     },
706     { // Strip both the reference and the username/password.
707       "http://user:[email protected]:80/sup?yo#X#X",
708       "http://google.com/sup?yo",
709     },
710     { // Try an HTTPS URL -- strip both the reference and the username/password.
711       "https://user:[email protected]:80/sup?yo#X#X",
712       "https://google.com:80/sup?yo",
713     },
714     { // Try an FTP URL -- strip both the reference and the username/password.
715       "ftp://user:[email protected]:80/sup?yo#X#X",
716       "ftp://google.com:80/sup?yo",
717     },
718   };
719   for (const auto& test : tests) {
720     SCOPED_TRACE(test.input_url);
721     GURL input_url(GURL(test.input_url));
722     GURL expected_url(GURL(test.expected_simplified_url));
723     EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
724   }
725 }
726 
TEST_P(UrlUtilTypedTest,SimplifyUrlForRequest)727 TEST_P(UrlUtilTypedTest, SimplifyUrlForRequest) {
728   static constexpr struct {
729     const char* const input_url;
730     const char* const expected_when_compliant;
731     const char* const expected_when_non_compliant;
732   } tests[] = {
733       {
734           // Try a non-special URL
735           "foobar://user:[email protected]:80/sup?yo#X#X",
736           "foobar://google.com:80/sup?yo",
737           "foobar://user:[email protected]:80/sup?yo",
738       },
739   };
740 
741   for (const auto& test : tests) {
742     SCOPED_TRACE(test.input_url);
743     GURL simplified = SimplifyUrlForRequest(GURL(test.input_url));
744     if (use_standard_compliant_non_special_scheme_url_parsing_) {
745       EXPECT_EQ(simplified, GURL(test.expected_when_compliant));
746     } else {
747       EXPECT_EQ(simplified, GURL(test.expected_when_non_compliant));
748     }
749   }
750 }
751 
TEST(UrlUtilTest,ChangeWebSocketSchemeToHttpScheme)752 TEST(UrlUtilTest, ChangeWebSocketSchemeToHttpScheme) {
753   struct {
754     const char* const input_url;
755     const char* const expected_output_url;
756   } tests[] = {
757       {"ws://google.com:78/path?query=1", "http://google.com:78/path?query=1"},
758       {"wss://google.com:441/path?q=1", "https://google.com:441/path?q=1"}};
759   for (const auto& test : tests) {
760     GURL input_url(test.input_url);
761     GURL expected_output_url(test.expected_output_url);
762     EXPECT_EQ(expected_output_url,
763               ChangeWebSocketSchemeToHttpScheme(input_url));
764   }
765 }
766 
TEST(UrlUtilTest,SchemeHasNetworkHost)767 TEST(UrlUtilTest, SchemeHasNetworkHost) {
768   const char kCustomSchemeWithHostPortAndUserInformation[] = "foo";
769   const char kCustomSchemeWithHostAndPort[] = "bar";
770   const char kCustomSchemeWithHost[] = "baz";
771   const char kCustomSchemeWithoutAuthority[] = "qux";
772   const char kNonStandardScheme[] = "not-registered";
773 
774   url::ScopedSchemeRegistryForTests scheme_registry;
775   AddStandardScheme(kCustomSchemeWithHostPortAndUserInformation,
776                     url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
777   AddStandardScheme(kCustomSchemeWithHostAndPort,
778                     url::SCHEME_WITH_HOST_AND_PORT);
779   AddStandardScheme(kCustomSchemeWithHost, url::SCHEME_WITH_HOST);
780   AddStandardScheme(kCustomSchemeWithoutAuthority,
781                     url::SCHEME_WITHOUT_AUTHORITY);
782 
783   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpScheme));
784   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpsScheme));
785   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWsScheme));
786   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWssScheme));
787   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFtpScheme));
788   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFileScheme));
789   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(
790       kCustomSchemeWithHostPortAndUserInformation));
791   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHostAndPort));
792 
793   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(url::kFileSystemScheme));
794   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHost));
795   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithoutAuthority));
796   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kNonStandardScheme));
797 }
798 
TEST(UrlUtilTest,GetIdentityFromURL)799 TEST(UrlUtilTest, GetIdentityFromURL) {
800   struct {
801     const char* const input_url;
802     const char* const expected_username;
803     const char* const expected_password;
804   } tests[] = {
805       {
806           "http://username:[email protected]",
807           "username",
808           "password",
809       },
810       {
811           // Test for http://crbug.com/19200
812           "http://username:p@[email protected]",
813           "username",
814           "p@ssword",
815       },
816       {
817           // Special URL characters should be unescaped.
818           "http://username:p%3fa%26s%2fs%[email protected]",
819           "username",
820           "p?a&s/s#",
821       },
822       {
823           // Username contains %20, password %25.
824           "http://use rname:password%[email protected]",
825           "use rname",
826           "password%",
827       },
828       {
829           // Username and password contain forward / backward slashes.
830           "http://username%2F:password%[email protected]",
831           "username/",
832           "password\\",
833       },
834       {
835           // Keep %00 and %01 as-is, and ignore other escaped characters when
836           // present.
837           "http://use%00rname%20:pass%01word%[email protected]",
838           "use%00rname%20",
839           "pass%01word%25",
840       },
841       {
842           // Keep CR and LF as-is.
843           "http://use%0Arname:pass%[email protected]",
844           "use%0Arname",
845           "pass%0Dword",
846       },
847       {
848           // Use a '+' in the username.
849           "http://use+rname:[email protected]",
850           "use+rname",
851           "password",
852       },
853       {
854           // Use a '&' in the password.
855           "http://username:p&[email protected]",
856           "username",
857           "p&ssword",
858       },
859       {
860           // These UTF-8 characters are considered unsafe to unescape by
861           // UnescapeURLComponent, but raise no special concerns as part of the
862           // identity portion of a URL.
863           "http://%F0%9F%94%92:%E2%80%[email protected]",
864           "\xF0\x9F\x94\x92",
865           "\xE2\x80\x82",
866       },
867       {
868           // Leave invalid UTF-8 alone, and leave valid UTF-8 characters alone
869           // if there's also an invalid character in the string - strings should
870           // not be partially unescaped.
871           "http://%81:%E2%80%82%E2%[email protected]",
872           "%81",
873           "%E2%80%82%E2%80",
874       },
875   };
876   for (const auto& test : tests) {
877     SCOPED_TRACE(test.input_url);
878     GURL url(test.input_url);
879 
880     std::u16string username, password;
881     GetIdentityFromURL(url, &username, &password);
882 
883     EXPECT_EQ(base::UTF8ToUTF16(test.expected_username), username);
884     EXPECT_EQ(base::UTF8ToUTF16(test.expected_password), password);
885   }
886 }
887 
888 // Try extracting a username which was encoded with UTF8.
TEST(UrlUtilTest,GetIdentityFromURL_UTF8)889 TEST(UrlUtilTest, GetIdentityFromURL_UTF8) {
890   GURL url(u"http://foo:\x4f60\[email protected]");
891 
892   EXPECT_EQ("foo", url.username());
893   EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
894 
895   // Extract the unescaped identity.
896   std::u16string username, password;
897   GetIdentityFromURL(url, &username, &password);
898 
899   // Verify that it was decoded as UTF8.
900   EXPECT_EQ(u"foo", username);
901   EXPECT_EQ(u"\x4f60\x597d", password);
902 }
903 
TEST(UrlUtilTest,GoogleHost)904 TEST(UrlUtilTest, GoogleHost) {
905   struct {
906     GURL url;
907     bool expected_output;
908   } google_host_cases[] = {
909       {GURL("http://.google.com"), true},
910       {GURL("http://.youtube.com"), true},
911       {GURL("http://.gmail.com"), true},
912       {GURL("http://.doubleclick.net"), true},
913       {GURL("http://.gstatic.com"), true},
914       {GURL("http://.googlevideo.com"), true},
915       {GURL("http://.googleusercontent.com"), true},
916       {GURL("http://.googlesyndication.com"), true},
917       {GURL("http://.google-analytics.com"), true},
918       {GURL("http://.googleadservices.com"), true},
919       {GURL("http://.googleapis.com"), true},
920       {GURL("http://a.google.com"), true},
921       {GURL("http://b.youtube.com"), true},
922       {GURL("http://c.gmail.com"), true},
923       {GURL("http://google.com"), false},
924       {GURL("http://youtube.com"), false},
925       {GURL("http://gmail.com"), false},
926       {GURL("http://google.coma"), false},
927       {GURL("http://agoogle.com"), false},
928       {GURL("http://oogle.com"), false},
929       {GURL("http://google.co"), false},
930       {GURL("http://oggole.com"), false},
931   };
932 
933   for (const auto& host : google_host_cases) {
934     EXPECT_EQ(host.expected_output, HasGoogleHost(host.url));
935   }
936 }
937 
TEST(UrlUtilTest,IsLocalHostname)938 TEST(UrlUtilTest, IsLocalHostname) {
939   EXPECT_TRUE(IsLocalHostname("localhost"));
940   EXPECT_TRUE(IsLocalHostname("localhost."));
941   EXPECT_TRUE(IsLocalHostname("LOCALhost"));
942   EXPECT_TRUE(IsLocalHostname("LOCALhost."));
943   EXPECT_TRUE(IsLocalHostname("abc.localhost"));
944   EXPECT_TRUE(IsLocalHostname("abc.localhost."));
945   EXPECT_TRUE(IsLocalHostname("abc.LOCALhost"));
946   EXPECT_TRUE(IsLocalHostname("abc.LOCALhost."));
947   EXPECT_TRUE(IsLocalHostname("abc.def.localhost"));
948 
949   EXPECT_FALSE(IsLocalHostname("localhost.actuallynot"));
950   EXPECT_FALSE(IsLocalHostname("notlocalhost"));
951   EXPECT_FALSE(IsLocalHostname("notlocalhost."));
952   EXPECT_FALSE(IsLocalHostname("still.notlocalhost"));
953   EXPECT_FALSE(IsLocalHostname("localhostjustkidding"));
954 }
955 
TEST(UrlUtilTest,GoogleHostWithAlpnH3)956 TEST(UrlUtilTest, GoogleHostWithAlpnH3) {
957   struct {
958     std::string_view host;
959     bool expected_output;
960   } test_cases[] = {
961       {"google.com", true},        {"www.google.com", true},
962       {"google.CoM", true},        {"www.Google.cOm", true},
963       {"www.google.cat", false},   {"www.google.co.in", false},
964       {"www.google.co.jp", false},
965   };
966 
967   for (const auto& host : test_cases) {
968     EXPECT_EQ(host.expected_output, IsGoogleHostWithAlpnH3(host.host));
969   }
970 }
971 
972 }  // namespace
973 }  // namespace net
974