xref: /aosp_15_r20/external/cronet/url/gurl_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "url/gurl.h"
6 
7 #include <stddef.h>
8 
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/test/scoped_feature_list.h"
12 #include "testing/gtest/include/gtest/gtest.h"
13 #include "url/gurl_abstract_tests.h"
14 #include "url/origin.h"
15 #include "url/url_canon.h"
16 #include "url/url_features.h"
17 #include "url/url_test_utils.h"
18 
19 namespace url {
20 
21 namespace {
22 
23 // Returns the canonicalized string for the given URL string for the
24 // GURLTest.Types test.
TypesTestCase(const char * src)25 std::string TypesTestCase(const char* src) {
26   GURL gurl(src);
27   return gurl.possibly_invalid_spec();
28 }
29 
30 }  // namespace
31 
32 // Different types of URLs should be handled differently, and handed off to
33 // different canonicalizers.
TEST(GURLTest,Types)34 TEST(GURLTest, Types) {
35   // URLs with unknown schemes should be treated as path URLs, even when they
36   // have things like "://".
37   EXPECT_EQ("something:///HOSTNAME.com/",
38             TypesTestCase("something:///HOSTNAME.com/"));
39 
40   // Conversely, URLs with known schemes should always trigger standard URL
41   // handling.
42   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
43   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
44   EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
45   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
46 
47 #ifdef WIN32
48   // URLs that look like Windows absolute path specs.
49   EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
50   EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
51   EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
52   EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
53 #endif
54 }
55 
56 // Test the basic creation and querying of components in a GURL. We assume that
57 // the parser is already tested and works, so we are mostly interested if the
58 // object does the right thing with the results.
TEST(GURLTest,Components)59 TEST(GURLTest, Components) {
60   GURL empty_url(u"");
61   EXPECT_TRUE(empty_url.is_empty());
62   EXPECT_FALSE(empty_url.is_valid());
63 
64   GURL url(u"http://user:[email protected]:99/foo;bar?q=a#ref");
65   EXPECT_FALSE(url.is_empty());
66   EXPECT_TRUE(url.is_valid());
67   EXPECT_TRUE(url.SchemeIs("http"));
68   EXPECT_FALSE(url.SchemeIsFile());
69 
70   // This is the narrow version of the URL, which should match the wide input.
71   EXPECT_EQ("http://user:[email protected]:99/foo;bar?q=a#ref", url.spec());
72 
73   EXPECT_EQ("http", url.scheme());
74   EXPECT_EQ("user", url.username());
75   EXPECT_EQ("pass", url.password());
76   EXPECT_EQ("google.com", url.host());
77   EXPECT_EQ("99", url.port());
78   EXPECT_EQ(99, url.IntPort());
79   EXPECT_EQ("/foo;bar", url.path());
80   EXPECT_EQ("q=a", url.query());
81   EXPECT_EQ("ref", url.ref());
82 
83   // Test parsing userinfo with special characters.
84   GURL url_special_pass("http://user:%40!$&'()*+,;=:@google.com:12345");
85   EXPECT_TRUE(url_special_pass.is_valid());
86   // GURL canonicalizes some delimiters.
87   EXPECT_EQ("%40!$&%27()*+,%3B%3D%3A", url_special_pass.password());
88   EXPECT_EQ("google.com", url_special_pass.host());
89   EXPECT_EQ("12345", url_special_pass.port());
90 }
91 
TEST(GURLTest,Empty)92 TEST(GURLTest, Empty) {
93   GURL url;
94   EXPECT_FALSE(url.is_valid());
95   EXPECT_EQ("", url.spec());
96 
97   EXPECT_EQ("", url.scheme());
98   EXPECT_EQ("", url.username());
99   EXPECT_EQ("", url.password());
100   EXPECT_EQ("", url.host());
101   EXPECT_EQ("", url.port());
102   EXPECT_EQ(PORT_UNSPECIFIED, url.IntPort());
103   EXPECT_EQ("", url.path());
104   EXPECT_EQ("", url.query());
105   EXPECT_EQ("", url.ref());
106 }
107 
TEST(GURLTest,Copy)108 TEST(GURLTest, Copy) {
109   GURL url(u"http://user:[email protected]:99/foo;bar?q=a#ref");
110 
111   GURL url2(url);
112   EXPECT_TRUE(url2.is_valid());
113 
114   EXPECT_EQ("http://user:[email protected]:99/foo;bar?q=a#ref", url2.spec());
115   EXPECT_EQ("http", url2.scheme());
116   EXPECT_EQ("user", url2.username());
117   EXPECT_EQ("pass", url2.password());
118   EXPECT_EQ("google.com", url2.host());
119   EXPECT_EQ("99", url2.port());
120   EXPECT_EQ(99, url2.IntPort());
121   EXPECT_EQ("/foo;bar", url2.path());
122   EXPECT_EQ("q=a", url2.query());
123   EXPECT_EQ("ref", url2.ref());
124 
125   // Copying of invalid URL should be invalid
126   GURL invalid;
127   GURL invalid2(invalid);
128   EXPECT_FALSE(invalid2.is_valid());
129   EXPECT_EQ("", invalid2.spec());
130   EXPECT_EQ("", invalid2.scheme());
131   EXPECT_EQ("", invalid2.username());
132   EXPECT_EQ("", invalid2.password());
133   EXPECT_EQ("", invalid2.host());
134   EXPECT_EQ("", invalid2.port());
135   EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
136   EXPECT_EQ("", invalid2.path());
137   EXPECT_EQ("", invalid2.query());
138   EXPECT_EQ("", invalid2.ref());
139 }
140 
TEST(GURLTest,Assign)141 TEST(GURLTest, Assign) {
142   GURL url(u"http://user:[email protected]:99/foo;bar?q=a#ref");
143 
144   GURL url2;
145   url2 = url;
146   EXPECT_TRUE(url2.is_valid());
147 
148   EXPECT_EQ("http://user:[email protected]:99/foo;bar?q=a#ref", url2.spec());
149   EXPECT_EQ("http", url2.scheme());
150   EXPECT_EQ("user", url2.username());
151   EXPECT_EQ("pass", url2.password());
152   EXPECT_EQ("google.com", url2.host());
153   EXPECT_EQ("99", url2.port());
154   EXPECT_EQ(99, url2.IntPort());
155   EXPECT_EQ("/foo;bar", url2.path());
156   EXPECT_EQ("q=a", url2.query());
157   EXPECT_EQ("ref", url2.ref());
158 
159   // Assignment of invalid URL should be invalid
160   GURL invalid;
161   GURL invalid2;
162   invalid2 = invalid;
163   EXPECT_FALSE(invalid2.is_valid());
164   EXPECT_EQ("", invalid2.spec());
165   EXPECT_EQ("", invalid2.scheme());
166   EXPECT_EQ("", invalid2.username());
167   EXPECT_EQ("", invalid2.password());
168   EXPECT_EQ("", invalid2.host());
169   EXPECT_EQ("", invalid2.port());
170   EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
171   EXPECT_EQ("", invalid2.path());
172   EXPECT_EQ("", invalid2.query());
173   EXPECT_EQ("", invalid2.ref());
174 }
175 
176 // This is a regression test for http://crbug.com/309975.
TEST(GURLTest,SelfAssign)177 TEST(GURLTest, SelfAssign) {
178   GURL a("filesystem:http://example.com/temporary/");
179   // This should not crash.
180   a = *&a;  // The *& defeats Clang's -Wself-assign warning.
181 }
182 
TEST(GURLTest,CopyFileSystem)183 TEST(GURLTest, CopyFileSystem) {
184   GURL url(u"filesystem:https://user:[email protected]:99/t/foo;bar?q=a#ref");
185 
186   GURL url2(url);
187   EXPECT_TRUE(url2.is_valid());
188 
189   EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec());
190   EXPECT_EQ("filesystem", url2.scheme());
191   EXPECT_EQ("", url2.username());
192   EXPECT_EQ("", url2.password());
193   EXPECT_EQ("", url2.host());
194   EXPECT_EQ("", url2.port());
195   EXPECT_EQ(PORT_UNSPECIFIED, url2.IntPort());
196   EXPECT_EQ("/foo;bar", url2.path());
197   EXPECT_EQ("q=a", url2.query());
198   EXPECT_EQ("ref", url2.ref());
199 
200   const GURL* inner = url2.inner_url();
201   ASSERT_TRUE(inner);
202   EXPECT_EQ("https", inner->scheme());
203   EXPECT_EQ("", inner->username());
204   EXPECT_EQ("", inner->password());
205   EXPECT_EQ("google.com", inner->host());
206   EXPECT_EQ("99", inner->port());
207   EXPECT_EQ(99, inner->IntPort());
208   EXPECT_EQ("/t", inner->path());
209   EXPECT_EQ("", inner->query());
210   EXPECT_EQ("", inner->ref());
211 }
212 
TEST(GURLTest,IsValid)213 TEST(GURLTest, IsValid) {
214   const char* valid_cases[] = {
215       "http://google.com",
216       "unknown://google.com",
217       "http://user:[email protected]",
218       "http://google.com:12345",
219       "http://google.com:0",  // 0 is a valid port
220       "http://google.com/path",
221       "http://google.com//path",
222       "http://google.com?k=v#fragment",
223       "http://user:[email protected]:12345/path?k=v#fragment",
224       "http:/path",
225       "http:path",
226   };
227   for (size_t i = 0; i < std::size(valid_cases); i++) {
228     EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
229         << "Case: " << valid_cases[i];
230   }
231 
232   const char* invalid_cases[] = {
233       "http://?k=v",
234       "http:://google.com",
235       "http//google.com",
236       "http://google.com:12three45",
237       "file://server:123",  // file: URLs cannot have a port
238       "file://server:0",
239       "://google.com",
240       "path",
241   };
242   for (size_t i = 0; i < std::size(invalid_cases); i++) {
243     EXPECT_FALSE(GURL(invalid_cases[i]).is_valid())
244         << "Case: " << invalid_cases[i];
245   }
246 }
247 
TEST(GURLTest,ExtraSlashesBeforeAuthority)248 TEST(GURLTest, ExtraSlashesBeforeAuthority) {
249   // According to RFC3986, the hierarchical part for URI with an authority
250   // must use only two slashes; GURL intentionally just ignores extra slashes
251   // if there are more than 2, and parses the following part as an authority.
252   GURL url("http:///host");
253   EXPECT_EQ("host", url.host());
254   EXPECT_EQ("/", url.path());
255 }
256 
257 // Given invalid URLs, we should still get most of the components.
TEST(GURLTest,ComponentGettersWorkEvenForInvalidURL)258 TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
259   constexpr struct InvalidURLTestExpectations {
260     const char* url;
261     const char* spec;
262     const char* scheme;
263     const char* host;
264     const char* port;
265     const char* path;
266     // Extend as needed...
267   } expectations[] = {
268       {
269           "http:google.com:foo",
270           "http://google.com:foo/",
271           "http",
272           "google.com",
273           "foo",
274           "/",
275       },
276       {
277           "https:google.com:foo",
278           "https://google.com:foo/",
279           "https",
280           "google.com",
281           "foo",
282           "/",
283       },
284   };
285 
286   for (const auto& e : expectations) {
287     const GURL url(e.url);
288     EXPECT_FALSE(url.is_valid());
289     EXPECT_EQ(e.spec, url.possibly_invalid_spec());
290     EXPECT_EQ(e.scheme, url.scheme());
291     EXPECT_EQ("", url.username());
292     EXPECT_EQ("", url.password());
293     EXPECT_EQ(e.host, url.host());
294     EXPECT_EQ(e.port, url.port());
295     EXPECT_EQ(PORT_INVALID, url.IntPort());
296     EXPECT_EQ(e.path, url.path());
297     EXPECT_EQ("", url.query());
298     EXPECT_EQ("", url.ref());
299   }
300 }
301 
TEST(GURLTest,Resolve)302 TEST(GURLTest, Resolve) {
303   // The tricky cases for relative URL resolving are tested in the
304   // canonicalizer unit test. Here, we just test that the GURL integration
305   // works properly.
306   struct ResolveCase {
307     const char* base;
308     const char* relative;
309     bool expected_valid;
310     const char* expected;
311   } resolve_cases[] = {
312       {"http://www.google.com/", "foo.html", true,
313        "http://www.google.com/foo.html"},
314       {"http://www.google.com/foo/", "bar", true,
315        "http://www.google.com/foo/bar"},
316       {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
317       {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
318       {"http://www.google.com/", "http://images.google.com/foo.html", true,
319        "http://images.google.com/foo.html"},
320       {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html",
321        true, "http://images.google.com/foo.html"},
322       {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b",
323        true, "http://www.google.com/hello/world.html?a#b"},
324       {"http://www.google.com/foo#bar", "#com", true,
325        "http://www.google.com/foo#com"},
326       {"http://www.google.com/", "Https:images.google.com", true,
327        "https://images.google.com/"},
328       // An opaque path URL can be replaced with a special absolute URL.
329       {"data:blahblah", "http://google.com/", true, "http://google.com/"},
330       {"data:blahblah", "http:google.com", true, "http://google.com/"},
331       {"data:blahblah", "https:google.com", true, "https://google.com/"},
332       // An opaque path URL can not be replaced with a relative URL.
333       {"git:opaque", "", false, ""},
334       {"git:opaque", "path", false, ""},
335       // A non-special URL which doesn't have a host can be replaced with a
336       // relative URL.
337       {"git:/a", "b", true, "git:/b"},
338       // Filesystem URLs have different paths to test.
339       {"filesystem:http://www.google.com/type/", "foo.html", true,
340        "filesystem:http://www.google.com/type/foo.html"},
341       {"filesystem:http://www.google.com/type/", "../foo.html", true,
342        "filesystem:http://www.google.com/type/foo.html"},
343       // https://crbug.com/530123 - scheme validation (e.g. are "10.0.0.7:"
344       // or "x1:" valid schemes) when deciding if |relative| is an absolute url.
345       {"file:///some/dir/ip-relative.html", "10.0.0.7:8080/foo.html", true,
346        "file:///some/dir/10.0.0.7:8080/foo.html"},
347       {"file:///some/dir/", "1://host", true, "file:///some/dir/1://host"},
348       {"file:///some/dir/", "x1://host", true, "x1://host"},
349       {"file:///some/dir/", "X1://host", true, "x1://host"},
350       {"file:///some/dir/", "x.://host", true, "x.://host"},
351       {"file:///some/dir/", "x+://host", true, "x+://host"},
352       {"file:///some/dir/", "x-://host", true, "x-://host"},
353       {"file:///some/dir/", "x!://host", true, "file:///some/dir/x!://host"},
354       {"file:///some/dir/", "://host", true, "file:///some/dir/://host"},
355   };
356 
357   for (size_t i = 0; i < std::size(resolve_cases); i++) {
358     // 8-bit code path.
359     GURL input(resolve_cases[i].base);
360     GURL output = input.Resolve(resolve_cases[i].relative);
361     EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
362     EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
363     EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
364 
365     // Wide code path.
366     GURL inputw(base::UTF8ToUTF16(resolve_cases[i].base));
367     GURL outputw =
368         input.Resolve(base::UTF8ToUTF16(resolve_cases[i].relative));
369     EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
370     EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
371     EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL);
372   }
373 }
374 
375 class GURLTypedTest : public ::testing::TestWithParam<bool> {
376  public:
GURLTypedTest()377   GURLTypedTest()
378       : use_standard_compliant_non_special_scheme_url_parsing_(GetParam()) {
379     if (use_standard_compliant_non_special_scheme_url_parsing_) {
380       scoped_feature_list_.InitAndEnableFeature(
381           kStandardCompliantNonSpecialSchemeURLParsing);
382     } else {
383       scoped_feature_list_.InitAndDisableFeature(
384           kStandardCompliantNonSpecialSchemeURLParsing);
385     }
386   }
387 
388  protected:
389   struct ResolveCase {
390     std::string_view base;
391     std::string_view relative;
392     std::optional<std::string_view> expected;
393   };
394 
395   using ApplyReplacementsFunc = GURL(const GURL&);
396 
397   struct ReplaceCase {
398     std::string_view base;
399     ApplyReplacementsFunc* apply_replacements;
400     std::string_view expected;
401   };
402 
403   struct ReplaceHostCase {
404     std::string_view base;
405     std::string_view replacement_host;
406     std::string_view expected;
407   };
408 
409   struct ReplacePathCase {
410     std::string_view base;
411     std::string_view replacement_path;
412     std::string_view expected;
413   };
414 
TestResolve(const ResolveCase & resolve_case)415   void TestResolve(const ResolveCase& resolve_case) {
416     SCOPED_TRACE(testing::Message() << "base: " << resolve_case.base
417                                     << ", relative: " << resolve_case.relative);
418     GURL input(resolve_case.base);
419     GURL output = input.Resolve(resolve_case.relative);
420     if (resolve_case.expected) {
421       ASSERT_TRUE(output.is_valid());
422       EXPECT_EQ(output.spec(), *resolve_case.expected);
423     } else {
424       EXPECT_FALSE(output.is_valid());
425     }
426   }
427 
TestReplace(const ReplaceCase & replace)428   void TestReplace(const ReplaceCase& replace) {
429     GURL output = replace.apply_replacements(GURL(replace.base));
430     EXPECT_EQ(output.spec(), replace.expected);
431   }
432 
TestReplaceHost(const ReplaceHostCase & replace)433   void TestReplaceHost(const ReplaceHostCase& replace) {
434     GURL url(replace.base);
435     GURL::Replacements replacements;
436     replacements.SetHostStr(replace.replacement_host);
437     GURL output = url.ReplaceComponents(replacements);
438     EXPECT_EQ(output.spec(), replace.expected);
439   }
440 
TestReplacePath(const ReplacePathCase & replace)441   void TestReplacePath(const ReplacePathCase& replace) {
442     GURL url(replace.base);
443     GURL::Replacements replacements;
444     replacements.SetPathStr(replace.replacement_path);
445     GURL output = url.ReplaceComponents(replacements);
446     EXPECT_EQ(output.spec(), replace.expected);
447   }
448 
449   bool use_standard_compliant_non_special_scheme_url_parsing_;
450 
451  private:
452   base::test::ScopedFeatureList scoped_feature_list_;
453 };
454 
TEST_P(GURLTypedTest,Resolve)455 TEST_P(GURLTypedTest, Resolve) {
456   // Test flag-dependent behaviors.
457   // Existing tests in GURLTest::Resolve cover common cases.
458   if (use_standard_compliant_non_special_scheme_url_parsing_) {
459     ResolveCase cases[] = {
460         // Non-special base URLs whose paths are empty.
461         {"git://host", "", "git://host"},
462         {"git://host", ".", "git://host/"},
463         {"git://host", "..", "git://host/"},
464         {"git://host", "a", "git://host/a"},
465         {"git://host", "/a", "git://host/a"},
466 
467         // Non-special base URLs whose paths are "/".
468         {"git://host/", "", "git://host/"},
469         {"git://host/", ".", "git://host/"},
470         {"git://host/", "..", "git://host/"},
471         {"git://host/", "a", "git://host/a"},
472         {"git://host/", "/a", "git://host/a"},
473 
474         // Non-special base URLs whose hosts and paths are non-empty.
475         {"git://host/b", "a", "git://host/a"},
476         {"git://host/b/c", "a", "git://host/b/a"},
477         {"git://host/b/c", "../a", "git://host/a"},
478 
479         // An opaque path can be specified.
480         {"git://host", "git:opaque", "git:opaque"},
481         {"git://host/path#ref", "git:opaque", "git:opaque"},
482         {"git:/path", "git:opaque", "git:opaque"},
483         {"https://host/path", "git:opaque", "git:opaque"},
484 
485         // Path-only base URLs should remain path-only URLs unless a host is
486         // specified.
487         {"git:/", "", "git:/"},
488         {"git:/", ".", "git:/"},
489         {"git:/", "..", "git:/"},
490         {"git:/", "a", "git:/a"},
491         {"git:/", "/a", "git:/a"},
492         {"git:/#ref", "", "git:/"},
493         {"git:/#ref", "a", "git:/a"},
494 
495         // Non-special base URLs whose hosts and path are both empty. The
496         // result's host should remain empty unless a relative URL specify a
497         // host.
498         {"git://", "", "git://"},
499         {"git://", ".", "git:///"},
500         {"git://", "..", "git:///"},
501         {"git://", "a", "git:///a"},
502         {"git://", "/a", "git:///a"},
503 
504         // Non-special base URLs whose hosts are empty, but with non-empty path.
505         {"git:///", "", "git:///"},
506         {"git:///", ".", "git:///"},
507         {"git:///", "..", "git:///"},
508         {"git:///", "a", "git:///a"},
509         {"git:///", "/a", "git:///a"},
510         {"git:///#ref", "", "git:///"},
511         {"git:///#ref", "a", "git:///a"},
512 
513         // Relative URLs can specify empty hosts for non-special base URLs.
514         // e.g. "///path"
515         {"git://host/", "//", "git://"},
516         {"git://host/", "//a", "git://a"},
517         {"git://host/", "///", "git:///"},
518         {"git://host/", "////", "git:////"},
519         {"git://host/", "////..", "git:///"},
520         {"git://host/", "////../..", "git:///"},
521         {"git://host/", "////../../..", "git:///"},
522     };
523     for (const auto& i : cases) {
524       TestResolve(i);
525     }
526   } else {
527     ResolveCase cases[] = {
528         {"git:/", "", "git:/"},
529         {"git:/", "a", "git:/a"},
530         {"git:/path", "a", "git:/a"},
531         // All non-special base URLs which don't start with a *single* slash
532         // can not be resolved with a relative URL.
533         {"git:", "", std::nullopt},
534         {"git://host", "", std::nullopt},
535         {"git://host", "a", std::nullopt},
536         {"git://", "", std::nullopt},
537         {"git:///", "", std::nullopt},
538     };
539     for (const auto& i : cases) {
540       TestResolve(i);
541     }
542   }
543 }
544 
545 INSTANTIATE_TEST_SUITE_P(All, GURLTypedTest, ::testing::Bool());
546 
TEST(GURLTest,GetOrigin)547 TEST(GURLTest, GetOrigin) {
548   struct TestCase {
549     const char* input;
550     const char* expected;
551   } cases[] = {
552       {"http://www.google.com", "http://www.google.com/"},
553       {"javascript:window.alert(\"hello,world\");", ""},
554       {"http://user:[email protected]:21/blah#baz",
555        "http://www.google.com:21/"},
556       {"http://[email protected]", "http://www.google.com/"},
557       {"http://:[email protected]", "http://www.google.com/"},
558       {"http://:@www.google.com", "http://www.google.com/"},
559       {"filesystem:http://www.google.com/temp/foo?q#b",
560        "http://www.google.com/"},
561       {"filesystem:http://user:[email protected]:21/blah#baz",
562        "http://google.com:21/"},
563       {"blob:null/guid-goes-here", ""},
564       {"blob:http://origin/guid-goes-here", "" /* should be http://origin/ */},
565   };
566   for (size_t i = 0; i < std::size(cases); i++) {
567     GURL url(cases[i].input);
568     GURL origin = url.DeprecatedGetOriginAsURL();
569     EXPECT_EQ(cases[i].expected, origin.spec());
570   }
571 }
572 
TEST(GURLTest,GetAsReferrer)573 TEST(GURLTest, GetAsReferrer) {
574   struct TestCase {
575     const char* input;
576     const char* expected;
577   } cases[] = {
578     {"http://www.google.com", "http://www.google.com/"},
579     {"http://user:[email protected]:21/blah#baz", "http://www.google.com:21/blah"},
580     {"http://[email protected]", "http://www.google.com/"},
581     {"http://:[email protected]", "http://www.google.com/"},
582     {"http://:@www.google.com", "http://www.google.com/"},
583     {"http://www.google.com/temp/foo?q#b", "http://www.google.com/temp/foo?q"},
584     {"not a url", ""},
585     {"unknown-scheme://foo.html", ""},
586     {"file:///tmp/test.html", ""},
587     {"https://www.google.com", "https://www.google.com/"},
588   };
589   for (size_t i = 0; i < std::size(cases); i++) {
590     GURL url(cases[i].input);
591     GURL origin = url.GetAsReferrer();
592     EXPECT_EQ(cases[i].expected, origin.spec());
593   }
594 }
595 
TEST(GURLTest,GetWithEmptyPath)596 TEST(GURLTest, GetWithEmptyPath) {
597   struct TestCase {
598     const char* input;
599     const char* expected;
600   } cases[] = {
601     {"http://www.google.com", "http://www.google.com/"},
602     {"javascript:window.alert(\"hello, world\");", ""},
603     {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
604     {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
605     {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"},
606   };
607 
608   for (size_t i = 0; i < std::size(cases); i++) {
609     GURL url(cases[i].input);
610     GURL empty_path = url.GetWithEmptyPath();
611     EXPECT_EQ(cases[i].expected, empty_path.spec());
612   }
613 }
614 
TEST(GURLTest,GetWithoutFilename)615 TEST(GURLTest, GetWithoutFilename) {
616   struct TestCase {
617     const char* input;
618     const char* expected;
619   } cases[] = {
620     // Common Standard URLs.
621     {"https://www.google.com",                    "https://www.google.com/"},
622     {"https://www.google.com/",                   "https://www.google.com/"},
623     {"https://www.google.com/maps.htm",           "https://www.google.com/"},
624     {"https://www.google.com/maps/",              "https://www.google.com/maps/"},
625     {"https://www.google.com/index.html",         "https://www.google.com/"},
626     {"https://www.google.com/index.html?q=maps",  "https://www.google.com/"},
627     {"https://www.google.com/index.html#maps/",   "https://www.google.com/"},
628     {"https://foo:[email protected]/maps.htm",   "https://foo:[email protected]/"},
629     {"https://www.google.com/maps/au/index.html", "https://www.google.com/maps/au/"},
630     {"https://www.google.com/maps/au/north",      "https://www.google.com/maps/au/"},
631     {"https://www.google.com/maps/au/north/",     "https://www.google.com/maps/au/north/"},
632     {"https://www.google.com/maps/au/index.html?q=maps#fragment/",     "https://www.google.com/maps/au/"},
633     {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/", "http://www.google.com:8000/maps/au/"},
634     {"https://www.google.com/maps/au/north/?q=maps#fragment",          "https://www.google.com/maps/au/north/"},
635     {"https://www.google.com/maps/au/north?q=maps#fragment",           "https://www.google.com/maps/au/"},
636     // Less common standard URLs.
637     {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
638     {"file:///temporary/bar.html?baz=22","file:///temporary/"},
639     {"ftp://foo/test/index.html",        "ftp://foo/test/"},
640     {"gopher://foo/test/index.html",     "gopher://foo/test/"},
641     {"ws://foo/test/index.html",         "ws://foo/test/"},
642     // Non-standard, hierarchical URLs.
643     {"chrome://foo/bar.html", "chrome://foo/"},
644     {"httpa://foo/test/index.html", "httpa://foo/test/"},
645     // Non-standard, non-hierarchical URLs.
646     {"blob:https://foo.bar/test/index.html", ""},
647     {"about:blank", ""},
648     {"data:foobar", ""},
649     {"scheme:opaque_data", ""},
650     // Invalid URLs.
651     {"foobar", ""},
652   };
653 
654   for (size_t i = 0; i < std::size(cases); i++) {
655     GURL url(cases[i].input);
656     GURL without_filename = url.GetWithoutFilename();
657     EXPECT_EQ(cases[i].expected, without_filename.spec()) << i;
658   }
659 }
660 
TEST(GURLTest,GetWithoutRef)661 TEST(GURLTest, GetWithoutRef) {
662   struct TestCase {
663     const char* input;
664     const char* expected;
665   } cases[] = {
666       // Common Standard URLs.
667       {"https://www.google.com/index.html",
668        "https://www.google.com/index.html"},
669       {"https://www.google.com/index.html#maps/",
670        "https://www.google.com/index.html"},
671 
672       {"https://foo:[email protected]/maps.htm",
673        "https://foo:[email protected]/maps.htm"},
674       {"https://foo:[email protected]/maps.htm#fragment",
675        "https://foo:[email protected]/maps.htm"},
676 
677       {"https://www.google.com/maps/au/index.html?q=maps",
678        "https://www.google.com/maps/au/index.html?q=maps"},
679       {"https://www.google.com/maps/au/index.html?q=maps#fragment/",
680        "https://www.google.com/maps/au/index.html?q=maps"},
681 
682       {"http://www.google.com:8000/maps/au/index.html?q=maps",
683        "http://www.google.com:8000/maps/au/index.html?q=maps"},
684       {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/",
685        "http://www.google.com:8000/maps/au/index.html?q=maps"},
686 
687       {"https://www.google.com/maps/au/north/?q=maps",
688        "https://www.google.com/maps/au/north/?q=maps"},
689       {"https://www.google.com/maps/au/north?q=maps#fragment",
690        "https://www.google.com/maps/au/north?q=maps"},
691 
692       // Less common standard URLs.
693       {"filesystem:http://www.google.com/temporary/bar.html?baz=22",
694        "filesystem:http://www.google.com/temporary/bar.html?baz=22"},
695       {"file:///temporary/bar.html?baz=22#fragment",
696        "file:///temporary/bar.html?baz=22"},
697 
698       {"ftp://foo/test/index.html", "ftp://foo/test/index.html"},
699       {"ftp://foo/test/index.html#fragment", "ftp://foo/test/index.html"},
700 
701       {"gopher://foo/test/index.html", "gopher://foo/test/index.html"},
702       {"gopher://foo/test/index.html#fragment", "gopher://foo/test/index.html"},
703 
704       {"ws://foo/test/index.html", "ws://foo/test/index.html"},
705       {"ws://foo/test/index.html#fragment", "ws://foo/test/index.html"},
706 
707       // Non-standard, hierarchical URLs.
708       {"chrome://foo/bar.html", "chrome://foo/bar.html"},
709       {"chrome://foo/bar.html#fragment", "chrome://foo/bar.html"},
710 
711       {"httpa://foo/test/index.html", "httpa://foo/test/index.html"},
712       {"httpa://foo/test/index.html#fragment", "httpa://foo/test/index.html"},
713 
714       // Non-standard, non-hierarchical URLs.
715       {"blob:https://foo.bar/test/index.html",
716        "blob:https://foo.bar/test/index.html"},
717       {"blob:https://foo.bar/test/index.html#fragment",
718        "blob:https://foo.bar/test/index.html"},
719 
720       {"about:blank", "about:blank"},
721       {"about:blank#ref", "about:blank"},
722 
723       {"data:foobar", "data:foobar"},
724       {"scheme:opaque_data", "scheme:opaque_data"},
725       // Invalid URLs.
726       {"foobar", ""},
727   };
728 
729   for (size_t i = 0; i < std::size(cases); i++) {
730     GURL url(cases[i].input);
731     GURL without_ref = url.GetWithoutRef();
732     EXPECT_EQ(cases[i].expected, without_ref.spec());
733   }
734 }
735 
TEST(GURLTest,Replacements)736 TEST(GURLTest, Replacements) {
737   // The URL canonicalizer replacement test will handle most of these case.
738   // The most important thing to do here is to check that the proper
739   // canonicalizer gets called based on the scheme of the input.
740   struct ReplaceCase {
741     using ApplyReplacementsFunc = GURL(const GURL&);
742 
743     const char* base;
744     ApplyReplacementsFunc* apply_replacements;
745     const char* expected;
746   } replace_cases[] = {
747       {.base = "http://www.google.com/foo/bar.html?foo#bar",
748        .apply_replacements =
749            +[](const GURL& url) {
750              GURL::Replacements replacements;
751              replacements.SetPathStr("/");
752              replacements.ClearQuery();
753              replacements.ClearRef();
754              return url.ReplaceComponents(replacements);
755            },
756        .expected = "http://www.google.com/"},
757       {.base = "file:///C:/foo/bar.txt",
758        .apply_replacements =
759            +[](const GURL& url) {
760              GURL::Replacements replacements;
761              replacements.SetSchemeStr("http");
762              replacements.SetHostStr("www.google.com");
763              replacements.SetPortStr("99");
764              replacements.SetPathStr("/foo");
765              replacements.SetQueryStr("search");
766              replacements.SetRefStr("ref");
767              return url.ReplaceComponents(replacements);
768            },
769        .expected = "http://www.google.com:99/foo?search#ref"},
770 #ifdef WIN32
771       {.base = "http://www.google.com/foo/bar.html?foo#bar",
772        .apply_replacements =
773            +[](const GURL& url) {
774              GURL::Replacements replacements;
775              replacements.SetSchemeStr("file");
776              replacements.ClearUsername();
777              replacements.ClearPassword();
778              replacements.ClearHost();
779              replacements.ClearPort();
780              replacements.SetPathStr("c:\\");
781              replacements.ClearQuery();
782              replacements.ClearRef();
783              return url.ReplaceComponents(replacements);
784            },
785        .expected = "file:///C:/"},
786 #endif
787       {.base = "filesystem:http://www.google.com/foo/bar.html?foo#bar",
788        .apply_replacements =
789            +[](const GURL& url) {
790              GURL::Replacements replacements;
791              replacements.SetPathStr("/");
792              replacements.ClearQuery();
793              replacements.ClearRef();
794              return url.ReplaceComponents(replacements);
795            },
796        .expected = "filesystem:http://www.google.com/foo/"},
797       // Lengthen the URL instead of shortening it, to test creation of
798       // inner_url.
799       {.base = "filesystem:http://www.google.com/foo/",
800        .apply_replacements =
801            +[](const GURL& url) {
802              GURL::Replacements replacements;
803              replacements.SetPathStr("bar.html");
804              replacements.SetQueryStr("foo");
805              replacements.SetRefStr("bar");
806              return url.ReplaceComponents(replacements);
807            },
808        .expected = "filesystem:http://www.google.com/foo/bar.html?foo#bar"},
809   };
810 
811   for (const ReplaceCase& c : replace_cases) {
812     GURL output = c.apply_replacements(GURL(c.base));
813 
814     EXPECT_EQ(c.expected, output.spec());
815 
816     EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
817     if (output.SchemeIsFileSystem()) {
818       // TODO(mmenke): inner_url()->spec() is currently the same as the spec()
819       // for the GURL itself.  This should be fixed.
820       // See https://crbug.com/619596
821       EXPECT_EQ(c.expected, output.inner_url()->spec());
822     }
823   }
824 }
825 
TEST_P(GURLTypedTest,Replacements)826 TEST_P(GURLTypedTest, Replacements) {
827   // Test flag-dependent behavior.
828   // Existing tests in GURLTest::Replacements cover common cases.
829 
830   if (use_standard_compliant_non_special_scheme_url_parsing_) {
831     ReplaceCase replace_cases[] = {
832         {.base = "git://a1/a2?a3=a4#a5",
833          .apply_replacements =
834              +[](const GURL& url) {
835                GURL::Replacements replacements;
836                replacements.SetHostStr("b1");
837                replacements.SetPortStr("99");
838                replacements.SetPathStr("b2");
839                replacements.SetQueryStr("b3=b4");
840                replacements.SetRefStr("b5");
841                return url.ReplaceComponents(replacements);
842              },
843          .expected = "git://b1:99/b2?b3=b4#b5"},
844         // URL Standard: https://url.spec.whatwg.org/#dom-url-username
845         // > 1. If this’s URL cannot have a username/password/port, then return.
846         {.base = "git:///",
847          .apply_replacements =
848              +[](const GURL& url) {
849                GURL::Replacements replacements;
850                replacements.SetUsernameStr("x");
851                return url.ReplaceComponents(replacements);
852              },
853          .expected = "git:///"},
854         // URL Standard: https://url.spec.whatwg.org/#dom-url-password
855         // > 1. If this’s URL cannot have a username/password/port, then return.
856         {.base = "git:///",
857          .apply_replacements =
858              +[](const GURL& url) {
859                GURL::Replacements replacements;
860                replacements.SetPasswordStr("x");
861                return url.ReplaceComponents(replacements);
862              },
863          .expected = "git:///"},
864         // URL Standard: https://url.spec.whatwg.org/#dom-url-port
865         // > 1. If this’s URL cannot have a username/password/port, then return.
866         {.base = "git:///",
867          .apply_replacements =
868              +[](const GURL& url) {
869                GURL::Replacements replacements;
870                replacements.SetPortStr("80");
871                return url.ReplaceComponents(replacements);
872              },
873          .expected = "git:///"}};
874 
875     for (const ReplaceCase& c : replace_cases) {
876       TestReplace(c);
877     }
878 
879     ReplaceHostCase replace_host_cases[] = {
880         {"git:/", "host", "git://host/"},
881         {"git:/a", "host", "git://host/a"},
882         {"git://", "host", "git://host"},
883         {"git:///", "host", "git://host/"},
884         {"git://h/a", "host", "git://host/a"},
885         // The following behavior is different from Web-facing URL APIs
886         // because DOMURLUtils::setHostname disallows setting an empty host.
887         //
888         // Web-facing URL API behavior is:
889         // > const url = new URL("git://u:p@h:80/");
890         // > url.hostname = "";
891         // > assertEquals(url.href, "git://u:p@h:80/");
892         {"git://u:p@h:80/", "", "git:///"}};
893     for (const ReplaceHostCase& c : replace_host_cases) {
894       TestReplaceHost(c);
895     }
896 
897     ReplacePathCase replace_path_cases[] = {
898         {"git:/", "a", "git:/a"},
899         {"git:/", "", "git:/"},
900         {"git:/", "//a", "git:/.//a"},
901         {"git:/", "/.//a", "git:/.//a"},
902         {"git://", "a", "git:///a"},
903         {"git:///", "a", "git:///a"},
904         {"git://host", "a", "git://host/a"},
905         {"git://host/b", "a", "git://host/a"}};
906     for (const ReplacePathCase& c : replace_path_cases) {
907       TestReplacePath(c);
908     }
909   } else {
910     // Non-compliant behaviors.
911     ReplaceHostCase replace_host_cases[] = {
912         {"git://host", "h2", "git://host"},
913     };
914     for (const ReplaceHostCase& c : replace_host_cases) {
915       TestReplaceHost(c);
916     }
917 
918     // Non-compliant behaviors.
919     ReplacePathCase replace_path_cases[] = {{"git://host", "path", "git:path"}};
920     for (const ReplacePathCase& c : replace_path_cases) {
921       TestReplacePath(c);
922     }
923   }
924 }
925 
TEST(GURLTypedTest,ClearFragmentOnDataUrl)926 TEST(GURLTypedTest, ClearFragmentOnDataUrl) {
927   // http://crbug.com/291747 - a data URL may legitimately have trailing
928   // whitespace in the spec after the ref is cleared. Test this does not trigger
929   // the Parsed importing validation DCHECK in GURL.
930   GURL url(" data: one # two ");
931   EXPECT_TRUE(url.is_valid());
932 
933   // By default the trailing whitespace will have been stripped.
934   EXPECT_EQ("data: one #%20two", url.spec());
935 
936   // Clear the URL's ref and observe the trailing whitespace.
937   GURL::Replacements repl;
938   repl.ClearRef();
939   GURL url_no_ref = url.ReplaceComponents(repl);
940   EXPECT_TRUE(url_no_ref.is_valid());
941   EXPECT_EQ("data: one ", url_no_ref.spec());
942 
943   // Importing a parsed URL via this constructor overload will retain trailing
944   // whitespace.
945   GURL import_url(url_no_ref.spec(),
946                   url_no_ref.parsed_for_possibly_invalid_spec(),
947                   url_no_ref.is_valid());
948   EXPECT_TRUE(import_url.is_valid());
949   EXPECT_EQ(url_no_ref, import_url);
950   EXPECT_EQ("data: one ", import_url.spec());
951   EXPECT_EQ(" one ", import_url.path());
952 
953   // For completeness, test that re-parsing the same URL rather than importing
954   // it trims the trailing whitespace.
955   GURL reparsed_url(url_no_ref.spec());
956   EXPECT_TRUE(reparsed_url.is_valid());
957   EXPECT_EQ("data: one", reparsed_url.spec());
958 }
959 
TEST(GURLTest,PathForRequest)960 TEST(GURLTest, PathForRequest) {
961   struct TestCase {
962     const char* input;
963     const char* expected;
964     const char* inner_expected;
965   } cases[] = {
966       {"http://www.google.com", "/", nullptr},
967       {"http://www.google.com/", "/", nullptr},
968       {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22",
969        nullptr},
970       {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", nullptr},
971       {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query",
972        nullptr},
973       {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref",
974        "/foo/bar.html?query", "/temporary"},
975       {"filesystem:http://www.google.com/temporary/foo/bar.html?query",
976        "/foo/bar.html?query", "/temporary"},
977   };
978 
979   for (size_t i = 0; i < std::size(cases); i++) {
980     GURL url(cases[i].input);
981     EXPECT_EQ(cases[i].expected, url.PathForRequest());
982     EXPECT_EQ(cases[i].expected, url.PathForRequestPiece());
983     EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL);
984     if (url.inner_url() && cases[i].inner_expected) {
985       EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest());
986       EXPECT_EQ(cases[i].inner_expected,
987                 url.inner_url()->PathForRequestPiece());
988     }
989   }
990 }
991 
TEST(GURLTest,EffectiveIntPort)992 TEST(GURLTest, EffectiveIntPort) {
993   struct PortTest {
994     const char* spec;
995     int expected_int_port;
996   } port_tests[] = {
997     // http
998     {"http://www.google.com/", 80},
999     {"http://www.google.com:80/", 80},
1000     {"http://www.google.com:443/", 443},
1001 
1002     // https
1003     {"https://www.google.com/", 443},
1004     {"https://www.google.com:443/", 443},
1005     {"https://www.google.com:80/", 80},
1006 
1007     // ftp
1008     {"ftp://www.google.com/", 21},
1009     {"ftp://www.google.com:21/", 21},
1010     {"ftp://www.google.com:80/", 80},
1011 
1012     // file - no port
1013     {"file://www.google.com/", PORT_UNSPECIFIED},
1014     {"file://www.google.com:443/", PORT_UNSPECIFIED},
1015 
1016     // data - no port
1017     {"data:www.google.com:90", PORT_UNSPECIFIED},
1018     {"data:www.google.com", PORT_UNSPECIFIED},
1019 
1020     // filesystem - no port
1021     {"filesystem:http://www.google.com:90/t/foo", PORT_UNSPECIFIED},
1022     {"filesystem:file:///t/foo", PORT_UNSPECIFIED},
1023   };
1024 
1025   for (size_t i = 0; i < std::size(port_tests); i++) {
1026     GURL url(port_tests[i].spec);
1027     EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
1028   }
1029 }
1030 
TEST(GURLTest,IPAddress)1031 TEST(GURLTest, IPAddress) {
1032   struct IPTest {
1033     const char* spec;
1034     bool expected_ip;
1035   } ip_tests[] = {
1036     {"http://www.google.com/", false},
1037     {"http://192.168.9.1/", true},
1038     {"http://192.168.9.1.2/", false},
1039     {"http://192.168.m.1/", false},
1040     {"http://2001:db8::1/", false},
1041     {"http://[2001:db8::1]/", true},
1042     {"", false},
1043     {"some random input!", false},
1044   };
1045 
1046   for (size_t i = 0; i < std::size(ip_tests); i++) {
1047     GURL url(ip_tests[i].spec);
1048     EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
1049   }
1050 }
1051 
TEST(GURLTest,HostNoBrackets)1052 TEST(GURLTest, HostNoBrackets) {
1053   struct TestCase {
1054     const char* input;
1055     const char* expected_host;
1056     const char* expected_plainhost;
1057   } cases[] = {
1058     {"http://www.google.com", "www.google.com", "www.google.com"},
1059     {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
1060     {"http://[::]/", "[::]", "::"},
1061 
1062     // Don't require a valid URL, but don't crash either.
1063     {"http://[]/", "[]", ""},
1064     {"http://[x]/", "[x]", "x"},
1065     {"http://[x/", "[x", "[x"},
1066     {"http://x]/", "x]", "x]"},
1067     {"http://[/", "[", "["},
1068     {"http://]/", "]", "]"},
1069     {"", "", ""},
1070   };
1071   for (size_t i = 0; i < std::size(cases); i++) {
1072     GURL url(cases[i].input);
1073     EXPECT_EQ(cases[i].expected_host, url.host());
1074     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
1075     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBracketsPiece());
1076   }
1077 }
1078 
TEST(GURLTest,DomainIs)1079 TEST(GURLTest, DomainIs) {
1080   GURL url_1("http://google.com/foo");
1081   EXPECT_TRUE(url_1.DomainIs("google.com"));
1082 
1083   // Subdomain and port are ignored.
1084   GURL url_2("http://www.google.com:99/foo");
1085   EXPECT_TRUE(url_2.DomainIs("google.com"));
1086 
1087   // Different top-level domain.
1088   GURL url_3("http://www.google.com.cn/foo");
1089   EXPECT_FALSE(url_3.DomainIs("google.com"));
1090 
1091   // Different host name.
1092   GURL url_4("http://www.iamnotgoogle.com/foo");
1093   EXPECT_FALSE(url_4.DomainIs("google.com"));
1094 
1095   // The input must be lower-cased otherwise DomainIs returns false.
1096   GURL url_5("http://www.google.com/foo");
1097   EXPECT_FALSE(url_5.DomainIs("Google.com"));
1098 
1099   // If the URL is invalid, DomainIs returns false.
1100   GURL invalid_url("google.com");
1101   EXPECT_FALSE(invalid_url.is_valid());
1102   EXPECT_FALSE(invalid_url.DomainIs("google.com"));
1103 
1104   GURL url_with_escape_chars("https://www.,.test");
1105   EXPECT_TRUE(url_with_escape_chars.is_valid());
1106   EXPECT_EQ(url_with_escape_chars.host(), "www.,.test");
1107   EXPECT_TRUE(url_with_escape_chars.DomainIs(",.test"));
1108 }
1109 
TEST(GURLTest,DomainIsTerminatingDotBehavior)1110 TEST(GURLTest, DomainIsTerminatingDotBehavior) {
1111   // If the host part ends with a dot, it matches input domains
1112   // with or without a dot.
1113   GURL url_with_dot("http://www.google.com./foo");
1114   EXPECT_TRUE(url_with_dot.DomainIs("google.com"));
1115   EXPECT_TRUE(url_with_dot.DomainIs("google.com."));
1116   EXPECT_TRUE(url_with_dot.DomainIs(".com"));
1117   EXPECT_TRUE(url_with_dot.DomainIs(".com."));
1118 
1119   // But, if the host name doesn't end with a dot and the input
1120   // domain does, then it's considered to not match.
1121   GURL url_without_dot("http://google.com/foo");
1122   EXPECT_FALSE(url_without_dot.DomainIs("google.com."));
1123 
1124   // If the URL ends with two dots, it doesn't match.
1125   GURL url_with_two_dots("http://www.google.com../foo");
1126   EXPECT_FALSE(url_with_two_dots.DomainIs("google.com"));
1127 }
1128 
TEST(GURLTest,DomainIsWithFilesystemScheme)1129 TEST(GURLTest, DomainIsWithFilesystemScheme) {
1130   GURL url_1("filesystem:http://www.google.com:99/foo/");
1131   EXPECT_TRUE(url_1.DomainIs("google.com"));
1132 
1133   GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/");
1134   EXPECT_FALSE(url_2.DomainIs("google.com"));
1135 }
1136 
1137 // Newlines should be stripped from inputs.
TEST(GURLTest,Newlines)1138 TEST(GURLTest, Newlines) {
1139   // Constructor.
1140   GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
1141   EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
1142   EXPECT_FALSE(
1143       url_1.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
1144 
1145   // Relative path resolver.
1146   GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
1147   EXPECT_EQ("http://www.google.com/foo", url_2.spec());
1148   EXPECT_FALSE(
1149       url_2.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
1150 
1151   // Constructor.
1152   GURL url_3(" \t ht\ntp://\twww.goo\rgle.com/as\ndf< \n ");
1153   EXPECT_EQ("http://www.google.com/asdf%3C", url_3.spec());
1154   EXPECT_TRUE(
1155       url_3.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
1156 
1157   // Relative path resolver.
1158   GURL url_4 = url_1.Resolve(" \n /fo\to<\r ");
1159   EXPECT_EQ("http://www.google.com/foo%3C", url_4.spec());
1160   EXPECT_TRUE(
1161       url_4.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
1162 
1163   // Note that newlines are NOT stripped from ReplaceComponents.
1164 }
1165 
TEST(GURLTest,IsStandard)1166 TEST(GURLTest, IsStandard) {
1167   GURL a("http:foo/bar");
1168   EXPECT_TRUE(a.IsStandard());
1169 
1170   GURL b("foo:bar/baz");
1171   EXPECT_FALSE(b.IsStandard());
1172 
1173   GURL c("foo://bar/baz");
1174   EXPECT_FALSE(c.IsStandard());
1175 
1176   GURL d("cid:bar@baz");
1177   EXPECT_FALSE(d.IsStandard());
1178 }
1179 
TEST(GURLTest,SchemeIsHTTPOrHTTPS)1180 TEST(GURLTest, SchemeIsHTTPOrHTTPS) {
1181   EXPECT_TRUE(GURL("http://bar/").SchemeIsHTTPOrHTTPS());
1182   EXPECT_TRUE(GURL("HTTPS://BAR").SchemeIsHTTPOrHTTPS());
1183   EXPECT_FALSE(GURL("ftp://bar/").SchemeIsHTTPOrHTTPS());
1184 }
1185 
TEST(GURLTest,SchemeIsWSOrWSS)1186 TEST(GURLTest, SchemeIsWSOrWSS) {
1187   EXPECT_TRUE(GURL("WS://BAR/").SchemeIsWSOrWSS());
1188   EXPECT_TRUE(GURL("wss://bar/").SchemeIsWSOrWSS());
1189   EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS());
1190 }
1191 
TEST(GURLTest,SchemeIsCryptographic)1192 TEST(GURLTest, SchemeIsCryptographic) {
1193   EXPECT_TRUE(GURL("https://foo.bar.com/").SchemeIsCryptographic());
1194   EXPECT_TRUE(GURL("HTTPS://foo.bar.com/").SchemeIsCryptographic());
1195   EXPECT_TRUE(GURL("HtTpS://foo.bar.com/").SchemeIsCryptographic());
1196 
1197   EXPECT_TRUE(GURL("wss://foo.bar.com/").SchemeIsCryptographic());
1198   EXPECT_TRUE(GURL("WSS://foo.bar.com/").SchemeIsCryptographic());
1199   EXPECT_TRUE(GURL("WsS://foo.bar.com/").SchemeIsCryptographic());
1200 
1201   EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsCryptographic());
1202   EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic());
1203 }
1204 
TEST(GURLTest,SchemeIsCryptographicStatic)1205 TEST(GURLTest, SchemeIsCryptographicStatic) {
1206   EXPECT_TRUE(GURL::SchemeIsCryptographic("https"));
1207   EXPECT_TRUE(GURL::SchemeIsCryptographic("wss"));
1208   EXPECT_FALSE(GURL::SchemeIsCryptographic("http"));
1209   EXPECT_FALSE(GURL::SchemeIsCryptographic("ws"));
1210   EXPECT_FALSE(GURL::SchemeIsCryptographic("ftp"));
1211 }
1212 
TEST(GURLTest,SchemeIsBlob)1213 TEST(GURLTest, SchemeIsBlob) {
1214   EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
1215   EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
1216   EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
1217 }
1218 
TEST(GURLTest,SchemeIsLocal)1219 TEST(GURLTest, SchemeIsLocal) {
1220   EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsLocal());
1221   EXPECT_TRUE(GURL("blob://bar/").SchemeIsLocal());
1222   EXPECT_TRUE(GURL("DATA:TEXT/HTML,BAR").SchemeIsLocal());
1223   EXPECT_TRUE(GURL("data:text/html,bar").SchemeIsLocal());
1224   EXPECT_TRUE(GURL("ABOUT:BAR").SchemeIsLocal());
1225   EXPECT_TRUE(GURL("about:bar").SchemeIsLocal());
1226   EXPECT_TRUE(GURL("FILESYSTEM:HTTP://FOO.EXAMPLE/BAR").SchemeIsLocal());
1227   EXPECT_TRUE(GURL("filesystem:http://foo.example/bar").SchemeIsLocal());
1228 
1229   EXPECT_FALSE(GURL("http://bar/").SchemeIsLocal());
1230   EXPECT_FALSE(GURL("file:///bar").SchemeIsLocal());
1231 }
1232 
1233 // Tests that the 'content' of the URL is properly extracted. This can be
1234 // complex in cases such as multiple schemes (view-source:http:) or for
1235 // javascript URLs. See GURL::GetContent for more details.
TEST(GURLTest,ContentForNonStandardURLs)1236 TEST(GURLTest, ContentForNonStandardURLs) {
1237   struct TestCase {
1238     const char* url;
1239     const char* expected;
1240   } cases[] = {
1241       {"null", ""},
1242       {"not-a-standard-scheme:this is arbitrary content",
1243        "this is arbitrary content"},
1244 
1245       // When there are multiple schemes, only the first is excluded from the
1246       // content. Note also that for e.g. 'http://', the '//' is part of the
1247       // content not the scheme.
1248       {"view-source:http://example.com/path", "http://example.com/path"},
1249       {"blob:http://example.com/GUID", "http://example.com/GUID"},
1250       {"blob:http://user:[email protected]/GUID",
1251        "http://user:[email protected]/GUID"},
1252 
1253       // The octothorpe character ('#') marks the end of the URL content, and
1254       // the start of the fragment. It should not be included in the content.
1255       {"http://www.example.com/GUID#ref", "www.example.com/GUID"},
1256       {"http://me:[email protected]/GUID/#ref", "me:[email protected]/GUID/"},
1257       {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
1258        "text/html,Question?%3Cdiv%20style=%22color:%20"},
1259 
1260       // TODO(mkwst): This seems like a bug. https://crbug.com/513600
1261       {"filesystem:http://example.com/path", "/"},
1262 
1263       // Javascript URLs include '#' symbols in their content.
1264       {"javascript:#", "#"},
1265       {"javascript:alert('#');", "alert('#');"},
1266   };
1267 
1268   for (const auto& test : cases) {
1269     GURL url(test.url);
1270     EXPECT_EQ(test.expected, url.GetContent()) << test.url;
1271     EXPECT_EQ(test.expected, url.GetContentPiece()) << test.url;
1272   }
1273 }
1274 
TEST_P(GURLTypedTest,ContentForNonStandardURLs)1275 TEST_P(GURLTypedTest, ContentForNonStandardURLs) {
1276   struct TestCase {
1277     const std::string_view url;
1278     const std::string_view expected;
1279   };
1280 
1281   if (use_standard_compliant_non_special_scheme_url_parsing_) {
1282     TestCase cases[] = {
1283         {"blob://http://example.com/GUID", "http//example.com/GUID"},
1284         {"git://host/path#fragment", "host/path"},
1285     };
1286     for (const auto& test : cases) {
1287       GURL url(test.url);
1288       EXPECT_EQ(url.GetContent(), test.expected) << test.url;
1289       EXPECT_EQ(url.GetContentPiece(), test.expected) << test.url;
1290     }
1291   } else {
1292     TestCase cases[] = {
1293         {"blob://http://example.com/GUID", "//http://example.com/GUID"},
1294         {"git://host/path#fragment", "//host/path"},
1295     };
1296     for (const auto& test : cases) {
1297       GURL url(test.url);
1298       EXPECT_EQ(url.GetContent(), test.expected) << test.url;
1299       EXPECT_EQ(url.GetContentPiece(), test.expected) << test.url;
1300     }
1301   }
1302 }
1303 
1304 // Tests that the URL path is properly extracted for unusual URLs. This can be
1305 // complex in cases such as multiple schemes (view-source:http:) or when
1306 // octothorpes ('#') are involved.
TEST(GURLTest,PathForNonStandardURLs)1307 TEST(GURLTest, PathForNonStandardURLs) {
1308   struct TestCase {
1309     const char* url;
1310     const char* expected;
1311   } cases[] = {
1312       {"null", ""},
1313       {"not-a-standard-scheme:this is arbitrary content",
1314        "this is arbitrary content"},
1315       {"view-source:http://example.com/path", "http://example.com/path"},
1316       {"blob:http://example.com/GUID", "http://example.com/GUID"},
1317       {"blob:http://user:[email protected]/GUID",
1318        "http://user:[email protected]/GUID"},
1319 
1320       {"http://www.example.com/GUID#ref", "/GUID"},
1321       {"http://me:[email protected]/GUID/#ref", "/GUID/"},
1322       {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
1323        "text/html,Question"},
1324 
1325       // TODO(mkwst): This seems like a bug. https://crbug.com/513600
1326       {"filesystem:http://example.com/path", "/"},
1327   };
1328 
1329   for (const auto& test : cases) {
1330     GURL url(test.url);
1331     EXPECT_EQ(test.expected, url.path()) << test.url;
1332   }
1333 }
1334 
TEST_P(GURLTypedTest,PathForNonStandardURLs)1335 TEST_P(GURLTypedTest, PathForNonStandardURLs) {
1336   struct TestCase {
1337     const std::string_view url;
1338     const std::string_view expected;
1339   };
1340 
1341   if (use_standard_compliant_non_special_scheme_url_parsing_) {
1342     TestCase cases[] = {
1343         {"blob://http://example.com/GUID", "//example.com/GUID"},
1344         {"git://host/path#fragment", "/path"},
1345     };
1346     for (const auto& test : cases) {
1347       GURL url(test.url);
1348       EXPECT_EQ(url.path(), test.expected) << test.url;
1349     }
1350   } else {
1351     TestCase cases[] = {
1352         {"blob://http://example.com/GUID", "//http://example.com/GUID"},
1353         {"git://host/path#fragment", "//host/path"},
1354     };
1355     for (const auto& test : cases) {
1356       GURL url(test.url);
1357       EXPECT_EQ(url.path(), test.expected) << test.url;
1358     }
1359   }
1360 }
1361 
TEST(GURLTest,EqualsIgnoringRef)1362 TEST(GURLTest, EqualsIgnoringRef) {
1363   const struct {
1364     const char* url_a;
1365     const char* url_b;
1366     bool are_equals;
1367   } kTestCases[] = {
1368       // No ref.
1369       {"http://a.com", "http://a.com", true},
1370       {"http://a.com", "http://b.com", false},
1371 
1372       // Same Ref.
1373       {"http://a.com#foo", "http://a.com#foo", true},
1374       {"http://a.com#foo", "http://b.com#foo", false},
1375 
1376       // Different Refs.
1377       {"http://a.com#foo", "http://a.com#bar", true},
1378       {"http://a.com#foo", "http://b.com#bar", false},
1379 
1380       // One has a ref, the other doesn't.
1381       {"http://a.com#foo", "http://a.com", true},
1382       {"http://a.com#foo", "http://b.com", false},
1383 
1384       // Empty refs.
1385       {"http://a.com#", "http://a.com#", true},
1386       {"http://a.com#", "http://a.com", true},
1387 
1388       // URLs that differ only by their last character.
1389       {"http://aaa", "http://aab", false},
1390       {"http://aaa#foo", "http://aab#foo", false},
1391 
1392       // Different size of the part before the ref.
1393       {"http://123#a", "http://123456#a", false},
1394 
1395       // Blob URLs
1396       {"blob:http://a.com#foo", "blob:http://a.com#foo", true},
1397       {"blob:http://a.com#foo", "blob:http://a.com#bar", true},
1398       {"blob:http://a.com#foo", "blob:http://b.com#bar", false},
1399 
1400       // Filesystem URLs
1401       {"filesystem:http://a.com#foo", "filesystem:http://a.com#foo", true},
1402       {"filesystem:http://a.com#foo", "filesystem:http://a.com#bar", true},
1403       {"filesystem:http://a.com#foo", "filesystem:http://b.com#bar", false},
1404 
1405       // Data URLs
1406       {"data:text/html,a#foo", "data:text/html,a#bar", true},
1407       {"data:text/html,a#foo", "data:text/html,a#foo", true},
1408       {"data:text/html,a#foo", "data:text/html,b#foo", false},
1409   };
1410 
1411   for (const auto& test_case : kTestCases) {
1412     SCOPED_TRACE(testing::Message()
1413                  << std::endl
1414                  << "url_a = " << test_case.url_a << std::endl
1415                  << "url_b = " << test_case.url_b << std::endl);
1416     // A versus B.
1417     EXPECT_EQ(test_case.are_equals,
1418               GURL(test_case.url_a).EqualsIgnoringRef(GURL(test_case.url_b)));
1419     // B versus A.
1420     EXPECT_EQ(test_case.are_equals,
1421               GURL(test_case.url_b).EqualsIgnoringRef(GURL(test_case.url_a)));
1422   }
1423 }
1424 
TEST(GURLTest,DebugAlias)1425 TEST(GURLTest, DebugAlias) {
1426   GURL url("https://foo.com/bar");
1427   DEBUG_ALIAS_FOR_GURL(url_debug_alias, url);
1428   EXPECT_STREQ("https://foo.com/bar", url_debug_alias);
1429 }
1430 
TEST(GURLTest,InvalidHost)1431 TEST(GURLTest, InvalidHost) {
1432   // This contains an invalid percent escape (%T%) and also a valid
1433   // percent escape that's not 7-bit ascii (%ae), so that the unescaped
1434   // host contains both an invalid percent escape and invalid UTF-8.
1435   GURL url("http://%T%Ae");
1436 
1437   EXPECT_FALSE(url.is_valid());
1438   EXPECT_TRUE(url.SchemeIs(url::kHttpScheme));
1439 
1440   // The invalid percent escape becomes an escaped percent sign (%25), and the
1441   // invalid UTF-8 character becomes REPLACEMENT CHARACTER' (U+FFFD) encoded as
1442   // UTF-8.
1443   EXPECT_EQ(url.host_piece(), "%25t%EF%BF%BD");
1444 }
1445 
TEST(GURLTest,PortZero)1446 TEST(GURLTest, PortZero) {
1447   GURL port_zero_url("http://127.0.0.1:0/blah");
1448 
1449   // https://url.spec.whatwg.org/#port-state says that the port 1) consists of
1450   // ASCII digits (this excludes negative numbers) and 2) cannot be greater than
1451   // 2^16-1.  This means that port=0 should be valid.
1452   EXPECT_TRUE(port_zero_url.is_valid());
1453   EXPECT_EQ("0", port_zero_url.port());
1454   EXPECT_EQ("127.0.0.1", port_zero_url.host());
1455   EXPECT_EQ("http", port_zero_url.scheme());
1456 
1457   // https://crbug.com/1065532: SchemeHostPort would previously incorrectly
1458   // consider port=0 to be invalid.
1459   SchemeHostPort scheme_host_port(port_zero_url);
1460   EXPECT_TRUE(scheme_host_port.IsValid());
1461   EXPECT_EQ(port_zero_url.scheme(), scheme_host_port.scheme());
1462   EXPECT_EQ(port_zero_url.host(), scheme_host_port.host());
1463   EXPECT_EQ(port_zero_url.port(),
1464             base::NumberToString(scheme_host_port.port()));
1465 
1466   // https://crbug.com/1065532: The SchemeHostPort problem above would lead to
1467   // bizarre results below - resolved origin would incorrectly be returned as an
1468   // opaque origin derived from |another_origin|.
1469   url::Origin another_origin = url::Origin::Create(GURL("http://other.com"));
1470   url::Origin resolved_origin =
1471       url::Origin::Resolve(port_zero_url, another_origin);
1472   EXPECT_FALSE(resolved_origin.opaque());
1473   EXPECT_EQ(port_zero_url.scheme(), resolved_origin.scheme());
1474   EXPECT_EQ(port_zero_url.host(), resolved_origin.host());
1475   EXPECT_EQ(port_zero_url.port(), base::NumberToString(resolved_origin.port()));
1476 
1477   // port=0 and default HTTP port are different.
1478   GURL default_port("http://127.0.0.1/foo");
1479   EXPECT_EQ(0, SchemeHostPort(port_zero_url).port());
1480   EXPECT_EQ(80, SchemeHostPort(default_port).port());
1481   url::Origin default_port_origin = url::Origin::Create(default_port);
1482   EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin));
1483 }
1484 
1485 class GURLTestTraits {
1486  public:
1487   using UrlType = GURL;
1488 
CreateUrlFromString(std::string_view s)1489   static UrlType CreateUrlFromString(std::string_view s) { return GURL(s); }
IsAboutBlank(const UrlType & url)1490   static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
IsAboutSrcdoc(const UrlType & url)1491   static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }
1492 
1493   // Only static members.
1494   GURLTestTraits() = delete;
1495 };
1496 
1497 INSTANTIATE_TYPED_TEST_SUITE_P(GURL, AbstractUrlTest, GURLTestTraits);
1498 
1499 }  // namespace url
1500