1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
6 
7 #include "base/strings/utf_string_conversions.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9 #include "url/buildflags.h"
10 #include "url/gurl.h"
11 #include "url/origin.h"
12 
13 namespace {
14 
15 namespace test1 {
16 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-reversed-inc.cc"
17 }
18 namespace test2 {
19 #include "net/base/registry_controlled_domains/effective_tld_names_unittest2-reversed-inc.cc"
20 }
21 namespace test3 {
22 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-reversed-inc.cc"
23 }
24 namespace test4 {
25 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-reversed-inc.cc"
26 }
27 namespace test5 {
28 #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-reversed-inc.cc"
29 }
30 namespace test6 {
31 #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-reversed-inc.cc"
32 }
33 
34 }  // namespace
35 
36 namespace net::registry_controlled_domains {
37 
38 namespace {
39 
GetDomainFromHost(const std::string & host)40 std::string GetDomainFromHost(const std::string& host) {
41   return GetDomainAndRegistry(host, EXCLUDE_PRIVATE_REGISTRIES);
42 }
43 
GetRegistryLengthFromURL(const std::string & url,UnknownRegistryFilter unknown_filter)44 size_t GetRegistryLengthFromURL(
45     const std::string& url,
46     UnknownRegistryFilter unknown_filter) {
47   return GetRegistryLength(GURL(url),
48                            unknown_filter,
49                            EXCLUDE_PRIVATE_REGISTRIES);
50 }
51 
GetRegistryLengthFromURLIncludingPrivate(const std::string & url,UnknownRegistryFilter unknown_filter)52 size_t GetRegistryLengthFromURLIncludingPrivate(
53     const std::string& url,
54     UnknownRegistryFilter unknown_filter) {
55   return GetRegistryLength(GURL(url),
56                            unknown_filter,
57                            INCLUDE_PRIVATE_REGISTRIES);
58 }
59 
PermissiveGetHostRegistryLength(std::string_view host)60 size_t PermissiveGetHostRegistryLength(std::string_view host) {
61   return PermissiveGetHostRegistryLength(host, EXCLUDE_UNKNOWN_REGISTRIES,
62                                          EXCLUDE_PRIVATE_REGISTRIES);
63 }
64 
65 // Only called when using ICU (avoids unused static function error).
66 #if !BUILDFLAG(USE_PLATFORM_ICU_ALTERNATIVES)
PermissiveGetHostRegistryLength(std::u16string_view host)67 size_t PermissiveGetHostRegistryLength(std::u16string_view host) {
68   return PermissiveGetHostRegistryLength(host, EXCLUDE_UNKNOWN_REGISTRIES,
69                                          EXCLUDE_PRIVATE_REGISTRIES);
70 }
71 #endif
72 
GetCanonicalHostRegistryLength(const std::string & host,UnknownRegistryFilter unknown_filter)73 size_t GetCanonicalHostRegistryLength(const std::string& host,
74                                       UnknownRegistryFilter unknown_filter) {
75   return GetCanonicalHostRegistryLength(host, unknown_filter,
76                                         EXCLUDE_PRIVATE_REGISTRIES);
77 }
78 
GetCanonicalHostRegistryLengthIncludingPrivate(const std::string & host)79 size_t GetCanonicalHostRegistryLengthIncludingPrivate(const std::string& host) {
80   return GetCanonicalHostRegistryLength(host, EXCLUDE_UNKNOWN_REGISTRIES,
81                                         INCLUDE_PRIVATE_REGISTRIES);
82 }
83 
84 }  // namespace
85 
86 class RegistryControlledDomainTest : public testing::Test {
87  protected:
88   template <typename Graph>
UseDomainData(const Graph & graph)89   void UseDomainData(const Graph& graph) {
90     // This is undone in TearDown.
91     SetFindDomainGraphForTesting(graph, sizeof(Graph));
92   }
93 
CompareDomains(const std::string & url1,const std::string & url2)94   bool CompareDomains(const std::string& url1, const std::string& url2) {
95     SCOPED_TRACE(url1 + " " + url2);
96     GURL g1 = GURL(url1);
97     GURL g2 = GURL(url2);
98     url::Origin o1 = url::Origin::Create(g1);
99     url::Origin o2 = url::Origin::Create(g2);
100     EXPECT_EQ(SameDomainOrHost(o1, o2, EXCLUDE_PRIVATE_REGISTRIES),
101               SameDomainOrHost(g1, g2, EXCLUDE_PRIVATE_REGISTRIES));
102     return SameDomainOrHost(g1, g2, EXCLUDE_PRIVATE_REGISTRIES);
103   }
104 
TearDown()105   void TearDown() override { ResetFindDomainGraphForTesting(); }
106 };
107 
TEST_F(RegistryControlledDomainTest,TestGetDomainAndRegistry)108 TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) {
109   UseDomainData(test1::kDafsa);
110 
111   struct {
112     std::string url;
113     std::string expected_domain_and_registry;
114   } kTestCases[] = {
115       {"http://a.baz.jp/file.html", "baz.jp"},
116       {"http://a.baz.jp./file.html", "baz.jp."},
117       {"http://ac.jp", ""},
118       {"http://a.bar.jp", ""},
119       {"http://bar.jp", ""},
120       {"http://baz.bar.jp", ""},
121       {"http://a.b.baz.bar.jp", "a.b.baz.bar.jp"},
122 
123       {"http://baz.pref.bar.jp", "pref.bar.jp"},
124       {"http://a.b.bar.baz.com.", "b.bar.baz.com."},
125 
126       {"http://a.d.c", "a.d.c"},
127       {"http://.a.d.c", "a.d.c"},
128       {"http://..a.d.c", "a.d.c"},
129       {"http://a.b.c", "b.c"},
130       {"http://baz.com", "baz.com"},
131       {"http://baz.com.", "baz.com."},
132 
133       {"", ""},
134       {"http://", ""},
135       {"file:///C:/file.html", ""},
136       {"http://foo.com..", ""},
137       {"http://...", ""},
138       {"http://192.168.0.1", ""},
139       {"http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/", ""},
140       {"http://localhost", ""},
141       {"http://localhost.", ""},
142       {"http:////Comment", ""},
143   };
144   for (const auto& test_case : kTestCases) {
145     const GURL url(test_case.url);
146     EXPECT_EQ(test_case.expected_domain_and_registry,
147               GetDomainAndRegistry(url, EXCLUDE_PRIVATE_REGISTRIES));
148     EXPECT_EQ(test_case.expected_domain_and_registry,
149               GetDomainAndRegistry(url::Origin::Create(url),
150                                    EXCLUDE_PRIVATE_REGISTRIES));
151   }
152 
153   // Test std::string version of GetDomainAndRegistry().  Uses the same
154   // underpinnings as the GURL version, so this is really more of a check of
155   // CanonicalizeHost().
156   EXPECT_EQ("baz.jp", GetDomainFromHost("a.baz.jp"));                  // 1
157   EXPECT_EQ("baz.jp.", GetDomainFromHost("a.baz.jp."));                // 1
158   EXPECT_EQ("", GetDomainFromHost("ac.jp"));                           // 2
159   EXPECT_EQ("", GetDomainFromHost("a.bar.jp"));                        // 3
160   EXPECT_EQ("", GetDomainFromHost("bar.jp"));                          // 3
161   EXPECT_EQ("", GetDomainFromHost("baz.bar.jp"));                      // 3 4
162   EXPECT_EQ("a.b.baz.bar.jp", GetDomainFromHost("a.b.baz.bar.jp"));    // 3 4
163   EXPECT_EQ("pref.bar.jp", GetDomainFromHost("baz.pref.bar.jp"));      // 5
164   EXPECT_EQ("b.bar.baz.com.", GetDomainFromHost("a.b.bar.baz.com."));  // 6
165   EXPECT_EQ("a.d.c", GetDomainFromHost("a.d.c"));                      // 7
166   EXPECT_EQ("a.d.c", GetDomainFromHost(".a.d.c"));                     // 7
167   EXPECT_EQ("a.d.c", GetDomainFromHost("..a.d.c"));                    // 7
168   EXPECT_EQ("b.c", GetDomainFromHost("a.b.c"));                        // 7 8
169   EXPECT_EQ("baz.com", GetDomainFromHost("baz.com"));                  // none
170   EXPECT_EQ("baz.com.", GetDomainFromHost("baz.com."));                // none
171 
172   EXPECT_EQ("", GetDomainFromHost(std::string()));
173   EXPECT_EQ("", GetDomainFromHost("foo.com.."));
174   EXPECT_EQ("", GetDomainFromHost("..."));
175   EXPECT_EQ("", GetDomainFromHost("192.168.0.1"));
176   EXPECT_EQ("", GetDomainFromHost("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"));
177   EXPECT_EQ("", GetDomainFromHost("localhost."));
178   EXPECT_EQ("", GetDomainFromHost(".localhost."));
179 }
180 
TEST_F(RegistryControlledDomainTest,TestGetRegistryLength)181 TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) {
182   UseDomainData(test1::kDafsa);
183 
184   // Test GURL version of GetRegistryLength().
185   EXPECT_EQ(2U, GetRegistryLengthFromURL("http://a.baz.jp/file.html",
186                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 1
187   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://a.baz.jp./file.html",
188                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 1
189   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://ac.jp",
190                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 2
191   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://a.bar.jp",
192                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 3
193   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://bar.jp",
194                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 3
195   EXPECT_EQ(2U, GetRegistryLengthFromURL("http://xbar.jp",
196                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 1
197   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://baz.bar.jp",
198                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 3 4
199   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://.baz.bar.jp",
200                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 3 4
201   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://..baz.bar.jp",
202                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 3 4
203   EXPECT_EQ(11U, GetRegistryLengthFromURL("http://foo..baz.bar.jp",
204                                           EXCLUDE_UNKNOWN_REGISTRIES));  // 3 4
205   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://xbaz.bar.jp",
206                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 3
207   EXPECT_EQ(11U, GetRegistryLengthFromURL("http://x.xbaz.bar.jp",
208                                           EXCLUDE_UNKNOWN_REGISTRIES));  // 3
209   EXPECT_EQ(12U, GetRegistryLengthFromURL("http://a.b.baz.bar.jp",
210                                           EXCLUDE_UNKNOWN_REGISTRIES));  // 4
211   EXPECT_EQ(6U, GetRegistryLengthFromURL("http://baz.pref.bar.jp",
212                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 5
213   EXPECT_EQ(6U, GetRegistryLengthFromURL("http://z.baz.pref.bar.jp",
214                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 5
215   EXPECT_EQ(10U, GetRegistryLengthFromURL("http://p.ref.bar.jp",
216                                           EXCLUDE_UNKNOWN_REGISTRIES));  // 5
217   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://xpref.bar.jp",
218                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 5
219   EXPECT_EQ(12U, GetRegistryLengthFromURL("http://baz.xpref.bar.jp",
220                                           EXCLUDE_UNKNOWN_REGISTRIES));  // 5
221   EXPECT_EQ(6U, GetRegistryLengthFromURL("http://baz..pref.bar.jp",
222                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 5
223   EXPECT_EQ(11U, GetRegistryLengthFromURL("http://a.b.bar.baz.com",
224                                           EXCLUDE_UNKNOWN_REGISTRIES));  // 6
225   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://a.d.c",
226                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 7
227   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://.a.d.c",
228                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 7
229   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://..a.d.c",
230                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 7
231   EXPECT_EQ(1U, GetRegistryLengthFromURL("http://a.b.c",
232                                          EXCLUDE_UNKNOWN_REGISTRIES));  // 7 8
233   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://baz.com",
234                                          EXCLUDE_UNKNOWN_REGISTRIES));  // none
235   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://baz.com.",
236                                          EXCLUDE_UNKNOWN_REGISTRIES));  // none
237   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://baz.com",
238                                          INCLUDE_UNKNOWN_REGISTRIES));  // none
239   EXPECT_EQ(4U, GetRegistryLengthFromURL("http://baz.com.",
240                                          INCLUDE_UNKNOWN_REGISTRIES));  // none
241 
242   EXPECT_EQ(std::string::npos,
243       GetRegistryLengthFromURL(std::string(), EXCLUDE_UNKNOWN_REGISTRIES));
244   EXPECT_EQ(std::string::npos,
245       GetRegistryLengthFromURL("http://", EXCLUDE_UNKNOWN_REGISTRIES));
246   EXPECT_EQ(std::string::npos,
247       GetRegistryLengthFromURL("file:///C:/file.html",
248                                EXCLUDE_UNKNOWN_REGISTRIES));
249   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://foo.com..",
250                                          EXCLUDE_UNKNOWN_REGISTRIES));
251   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://...",
252                                          EXCLUDE_UNKNOWN_REGISTRIES));
253   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://192.168.0.1",
254                                          EXCLUDE_UNKNOWN_REGISTRIES));
255   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost",
256                                          EXCLUDE_UNKNOWN_REGISTRIES));
257   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost",
258                                          INCLUDE_UNKNOWN_REGISTRIES));
259   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost.",
260                                          EXCLUDE_UNKNOWN_REGISTRIES));
261   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost.",
262                                          INCLUDE_UNKNOWN_REGISTRIES));
263   EXPECT_EQ(0U, GetRegistryLengthFromURL("http:////Comment",
264                                          EXCLUDE_UNKNOWN_REGISTRIES));
265 
266   // Test std::string version of GetRegistryLength().  Uses the same
267   // underpinnings as the GURL version, so this is really more of a check of
268   // CanonicalizeHost().
269   EXPECT_EQ(2U, GetCanonicalHostRegistryLength(
270                     "a.baz.jp", EXCLUDE_UNKNOWN_REGISTRIES));  // 1
271   EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
272                     "a.baz.jp.", EXCLUDE_UNKNOWN_REGISTRIES));  // 1
273   EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
274                     "ac.jp", EXCLUDE_UNKNOWN_REGISTRIES));  // 2
275   EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
276                     "a.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES));  // 3
277   EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
278                     "bar.jp", EXCLUDE_UNKNOWN_REGISTRIES));  // 3
279   EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
280                     "baz.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES));  // 3 4
281   EXPECT_EQ(12U, GetCanonicalHostRegistryLength(
282                      "a.b.baz.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES));  // 4
283   EXPECT_EQ(6U, GetCanonicalHostRegistryLength(
284                     "baz.pref.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES));  // 5
285   EXPECT_EQ(11U, GetCanonicalHostRegistryLength(
286                      "a.b.bar.baz.com", EXCLUDE_UNKNOWN_REGISTRIES));  // 6
287   EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
288                     "a.d.c", EXCLUDE_UNKNOWN_REGISTRIES));  // 7
289   EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
290                     ".a.d.c", EXCLUDE_UNKNOWN_REGISTRIES));  // 7
291   EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
292                     "..a.d.c", EXCLUDE_UNKNOWN_REGISTRIES));  // 7
293   EXPECT_EQ(1U, GetCanonicalHostRegistryLength(
294                     "a.b.c", EXCLUDE_UNKNOWN_REGISTRIES));  // 7 8
295   EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
296                     "baz.com", EXCLUDE_UNKNOWN_REGISTRIES));  // none
297   EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
298                     "baz.com.", EXCLUDE_UNKNOWN_REGISTRIES));  // none
299   EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
300                     "baz.com", INCLUDE_UNKNOWN_REGISTRIES));  // none
301   EXPECT_EQ(4U, GetCanonicalHostRegistryLength(
302                     "baz.com.", INCLUDE_UNKNOWN_REGISTRIES));  // none
303 
304   EXPECT_EQ(std::string::npos, GetCanonicalHostRegistryLength(
305                                    std::string(), EXCLUDE_UNKNOWN_REGISTRIES));
306   EXPECT_EQ(0U, GetCanonicalHostRegistryLength("foo.com..",
307                                                EXCLUDE_UNKNOWN_REGISTRIES));
308   EXPECT_EQ(0U,
309             GetCanonicalHostRegistryLength("..", EXCLUDE_UNKNOWN_REGISTRIES));
310   EXPECT_EQ(0U, GetCanonicalHostRegistryLength("192.168.0.1",
311                                                EXCLUDE_UNKNOWN_REGISTRIES));
312   EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost",
313                                                EXCLUDE_UNKNOWN_REGISTRIES));
314   EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost",
315                                                INCLUDE_UNKNOWN_REGISTRIES));
316   EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost.",
317                                                EXCLUDE_UNKNOWN_REGISTRIES));
318   EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost.",
319                                                INCLUDE_UNKNOWN_REGISTRIES));
320 
321   // IDN case.
322   EXPECT_EQ(10U, GetCanonicalHostRegistryLength("foo.xn--fiqs8s",
323                                                 EXCLUDE_UNKNOWN_REGISTRIES));
324 }
325 
TEST_F(RegistryControlledDomainTest,HostHasRegistryControlledDomain)326 TEST_F(RegistryControlledDomainTest, HostHasRegistryControlledDomain) {
327   UseDomainData(test1::kDafsa);
328 
329   // Invalid hosts.
330   EXPECT_FALSE(HostHasRegistryControlledDomain(
331       std::string(), EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
332   EXPECT_FALSE(HostHasRegistryControlledDomain(
333       "%00asdf", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
334 
335   // Invalid host but valid R.C.D.
336   EXPECT_TRUE(HostHasRegistryControlledDomain(
337       "%00foo.jp", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
338 
339   // Valid R.C.D. when canonicalized, even with an invalid prefix and an
340   // escaped dot.
341   EXPECT_TRUE(HostHasRegistryControlledDomain("%00foo.Google%2EjP",
342                                               EXCLUDE_UNKNOWN_REGISTRIES,
343                                               EXCLUDE_PRIVATE_REGISTRIES));
344 
345   // Regular, no match.
346   EXPECT_FALSE(HostHasRegistryControlledDomain(
347       "bar.notatld", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
348 
349   // Regular, match.
350   EXPECT_TRUE(HostHasRegistryControlledDomain(
351       "www.Google.Jp", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
352 }
353 
TEST_F(RegistryControlledDomainTest,TestSameDomainOrHost)354 TEST_F(RegistryControlledDomainTest, TestSameDomainOrHost) {
355   UseDomainData(test2::kDafsa);
356 
357   EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html",
358                              "http://a.b.bar.jp/file.html"));  // b.bar.jp
359   EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html",
360                              "http://b.b.bar.jp/file.html"));  // b.bar.jp
361   EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html",     // foo.jp
362                               "http://a.not.jp/file.html"));   // not.jp
363   EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html",     // foo.jp
364                               "http://a.foo.jp./file.html"));  // foo.jp.
365   EXPECT_FALSE(CompareDomains("http://a.com/file.html",        // a.com
366                               "http://b.com/file.html"));      // b.com
367   EXPECT_TRUE(CompareDomains("http://a.x.com/file.html",
368                              "http://b.x.com/file.html"));     // x.com
369   EXPECT_TRUE(CompareDomains("http://a.x.com/file.html",
370                              "http://.x.com/file.html"));      // x.com
371   EXPECT_TRUE(CompareDomains("http://a.x.com/file.html",
372                              "http://..b.x.com/file.html"));   // x.com
373   EXPECT_TRUE(CompareDomains("http://intranet/file.html",
374                              "http://intranet/file.html"));    // intranet
375   EXPECT_FALSE(CompareDomains("http://intranet1/file.html",
376                               "http://intranet2/file.html"));  // intranet
377   EXPECT_TRUE(CompareDomains(
378       "http://intranet1.corp.example.com/file.html",
379       "http://intranet2.corp.example.com/file.html"));  // intranet
380   EXPECT_TRUE(CompareDomains("http://127.0.0.1/file.html",
381                              "http://127.0.0.1/file.html"));   // 127.0.0.1
382   EXPECT_FALSE(CompareDomains("http://192.168.0.1/file.html",  // 192.168.0.1
383                               "http://127.0.0.1/file.html"));  // 127.0.0.1
384   EXPECT_FALSE(CompareDomains("file:///C:/file.html",
385                               "file:///C:/file.html"));        // no host
386 
387   // The trailing dot means different sites - see also
388   // https://github.com/mikewest/sec-metadata/issues/15.
389   EXPECT_FALSE(
390       CompareDomains("https://foo.example.com", "https://foo.example.com."));
391 }
392 
TEST_F(RegistryControlledDomainTest,TestDefaultData)393 TEST_F(RegistryControlledDomainTest, TestDefaultData) {
394   // Note that no data is set: we're using the default rules.
395   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://google.com",
396                                          EXCLUDE_UNKNOWN_REGISTRIES));
397   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://stanford.edu",
398                                          EXCLUDE_UNKNOWN_REGISTRIES));
399   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://ustreas.gov",
400                                          EXCLUDE_UNKNOWN_REGISTRIES));
401   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://icann.net",
402                                          EXCLUDE_UNKNOWN_REGISTRIES));
403   EXPECT_EQ(3U, GetRegistryLengthFromURL("http://ferretcentral.org",
404                                          EXCLUDE_UNKNOWN_REGISTRIES));
405   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://nowhere.notavaliddomain",
406                                          EXCLUDE_UNKNOWN_REGISTRIES));
407   EXPECT_EQ(15U, GetRegistryLengthFromURL("http://nowhere.notavaliddomain",
408                                          INCLUDE_UNKNOWN_REGISTRIES));
409 }
410 
TEST_F(RegistryControlledDomainTest,TestPrivateRegistryHandling)411 TEST_F(RegistryControlledDomainTest, TestPrivateRegistryHandling) {
412   UseDomainData(test1::kDafsa);
413 
414   // Testing the same dataset for INCLUDE_PRIVATE_REGISTRIES and
415   // EXCLUDE_PRIVATE_REGISTRIES arguments.
416   // For the domain data used for this test, the private registries are
417   // 'priv.no' and 'private'.
418 
419   // Non-private registries.
420   EXPECT_EQ(2U, GetRegistryLengthFromURL("http://priv.no",
421                                          EXCLUDE_UNKNOWN_REGISTRIES));
422   EXPECT_EQ(2U, GetRegistryLengthFromURL("http://foo.priv.no",
423                                          EXCLUDE_UNKNOWN_REGISTRIES));
424   EXPECT_EQ(2U, GetRegistryLengthFromURL("http://foo.jp",
425                                          EXCLUDE_UNKNOWN_REGISTRIES));
426   EXPECT_EQ(2U, GetRegistryLengthFromURL("http://www.foo.jp",
427                                          EXCLUDE_UNKNOWN_REGISTRIES));
428   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://private",
429                                          EXCLUDE_UNKNOWN_REGISTRIES));
430   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://foo.private",
431                                          EXCLUDE_UNKNOWN_REGISTRIES));
432   EXPECT_EQ(0U, GetRegistryLengthFromURL("http://private",
433                                          INCLUDE_UNKNOWN_REGISTRIES));
434   EXPECT_EQ(7U, GetRegistryLengthFromURL("http://foo.private",
435                                          INCLUDE_UNKNOWN_REGISTRIES));
436 
437   // Private registries.
438   EXPECT_EQ(0U,
439       GetRegistryLengthFromURLIncludingPrivate("http://priv.no",
440                                                EXCLUDE_UNKNOWN_REGISTRIES));
441   EXPECT_EQ(7U,
442       GetRegistryLengthFromURLIncludingPrivate("http://foo.priv.no",
443                                                EXCLUDE_UNKNOWN_REGISTRIES));
444   EXPECT_EQ(2U,
445       GetRegistryLengthFromURLIncludingPrivate("http://foo.jp",
446                                                EXCLUDE_UNKNOWN_REGISTRIES));
447   EXPECT_EQ(2U,
448       GetRegistryLengthFromURLIncludingPrivate("http://www.foo.jp",
449                                                EXCLUDE_UNKNOWN_REGISTRIES));
450   EXPECT_EQ(0U,
451       GetRegistryLengthFromURLIncludingPrivate("http://private",
452                                                EXCLUDE_UNKNOWN_REGISTRIES));
453   EXPECT_EQ(7U,
454       GetRegistryLengthFromURLIncludingPrivate("http://foo.private",
455                                                EXCLUDE_UNKNOWN_REGISTRIES));
456   EXPECT_EQ(0U,
457       GetRegistryLengthFromURLIncludingPrivate("http://private",
458                                                INCLUDE_UNKNOWN_REGISTRIES));
459   EXPECT_EQ(7U,
460       GetRegistryLengthFromURLIncludingPrivate("http://foo.private",
461                                                INCLUDE_UNKNOWN_REGISTRIES));
462 }
463 
TEST_F(RegistryControlledDomainTest,TestDafsaTwoByteOffsets)464 TEST_F(RegistryControlledDomainTest, TestDafsaTwoByteOffsets) {
465   UseDomainData(test3::kDafsa);
466 
467   // Testing to lookup keys in a DAFSA with two byte offsets.
468   // This DAFSA is constructed so that labels begin and end with unique
469   // characters, which makes it impossible to merge labels. Each inner node
470   // is about 100 bytes and a one byte offset can at most add 64 bytes to
471   // previous offset. Thus the paths must go over two byte offsets.
472 
473   const char key0[] =
474       "a.b.6____________________________________________________"
475       "________________________________________________6";
476   const char key1[] =
477       "a.b.7____________________________________________________"
478       "________________________________________________7";
479   const char key2[] =
480       "a.b.a____________________________________________________"
481       "________________________________________________8";
482 
483   EXPECT_EQ(102U,
484             GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
485   EXPECT_EQ(0U,
486             GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
487   EXPECT_EQ(102U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
488   EXPECT_EQ(0U,
489             GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
490 }
491 
TEST_F(RegistryControlledDomainTest,TestDafsaThreeByteOffsets)492 TEST_F(RegistryControlledDomainTest, TestDafsaThreeByteOffsets) {
493   UseDomainData(test4::kDafsa);
494 
495   // Testing to lookup keys in a DAFSA with three byte offsets.
496   // This DAFSA is constructed so that labels begin and end with unique
497   // characters, which makes it impossible to merge labels. The byte array
498   // has a size of ~54k. A two byte offset can add at most add 8k to the
499   // previous offset. Since we can skip only forward in memory, the nodes
500   // representing the return values must be located near the end of the byte
501   // array. The probability that we can reach from an arbitrary inner node to
502   // a return value without using a three byte offset is small (but not zero).
503   // The test is repeated with some different keys and with a reasonable
504   // probability at least one of the tested paths has go over a three byte
505   // offset.
506 
507   const char key0[] =
508       "a.b.z6___________________________________________________"
509       "_________________________________________________z6";
510   const char key1[] =
511       "a.b.z7___________________________________________________"
512       "_________________________________________________z7";
513   const char key2[] =
514       "a.b.za___________________________________________________"
515       "_________________________________________________z8";
516 
517   EXPECT_EQ(104U,
518             GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
519   EXPECT_EQ(0U,
520             GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
521   EXPECT_EQ(104U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
522   EXPECT_EQ(0U,
523             GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
524 }
525 
TEST_F(RegistryControlledDomainTest,TestDafsaJoinedPrefixes)526 TEST_F(RegistryControlledDomainTest, TestDafsaJoinedPrefixes) {
527   UseDomainData(test5::kDafsa);
528 
529   // Testing to lookup keys in a DAFSA with compressed prefixes.
530   // This DAFSA is constructed from words with similar prefixes but distinct
531   // suffixes. The DAFSA will then form a trie with the implicit source node
532   // as root.
533 
534   const char key0[] = "a.b.ai";
535   const char key1[] = "a.b.bj";
536   const char key2[] = "a.b.aak";
537   const char key3[] = "a.b.bbl";
538   const char key4[] = "a.b.aaa";
539   const char key5[] = "a.b.bbb";
540   const char key6[] = "a.b.aaaam";
541   const char key7[] = "a.b.bbbbn";
542 
543   EXPECT_EQ(2U,
544             GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
545   EXPECT_EQ(0U,
546             GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
547   EXPECT_EQ(2U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
548   EXPECT_EQ(3U,
549             GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
550   EXPECT_EQ(0U,
551             GetCanonicalHostRegistryLength(key3, EXCLUDE_UNKNOWN_REGISTRIES));
552   EXPECT_EQ(3U, GetCanonicalHostRegistryLengthIncludingPrivate(key3));
553   EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key4));
554   EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key5));
555   EXPECT_EQ(5U,
556             GetCanonicalHostRegistryLength(key6, EXCLUDE_UNKNOWN_REGISTRIES));
557   EXPECT_EQ(5U,
558             GetCanonicalHostRegistryLength(key7, EXCLUDE_UNKNOWN_REGISTRIES));
559 }
560 
TEST_F(RegistryControlledDomainTest,TestDafsaJoinedSuffixes)561 TEST_F(RegistryControlledDomainTest, TestDafsaJoinedSuffixes) {
562   UseDomainData(test6::kDafsa);
563 
564   // Testing to lookup keys in a DAFSA with compressed suffixes.
565   // This DAFSA is constructed from words with similar suffixes but distinct
566   // prefixes. The DAFSA will then form a trie with the implicit sink node as
567   // root.
568 
569   const char key0[] = "a.b.ia";
570   const char key1[] = "a.b.jb";
571   const char key2[] = "a.b.kaa";
572   const char key3[] = "a.b.lbb";
573   const char key4[] = "a.b.aaa";
574   const char key5[] = "a.b.bbb";
575   const char key6[] = "a.b.maaaa";
576   const char key7[] = "a.b.nbbbb";
577 
578   EXPECT_EQ(2U,
579             GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
580   EXPECT_EQ(0U,
581             GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
582   EXPECT_EQ(2U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
583   EXPECT_EQ(3U,
584             GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
585   EXPECT_EQ(0U,
586             GetCanonicalHostRegistryLength(key3, EXCLUDE_UNKNOWN_REGISTRIES));
587   EXPECT_EQ(3U, GetCanonicalHostRegistryLengthIncludingPrivate(key3));
588   EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key4));
589   EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key5));
590   EXPECT_EQ(5U,
591             GetCanonicalHostRegistryLength(key6, EXCLUDE_UNKNOWN_REGISTRIES));
592   EXPECT_EQ(5U,
593             GetCanonicalHostRegistryLength(key7, EXCLUDE_UNKNOWN_REGISTRIES));
594 }
595 
TEST_F(RegistryControlledDomainTest,Permissive)596 TEST_F(RegistryControlledDomainTest, Permissive) {
597   UseDomainData(test1::kDafsa);
598 
599   EXPECT_EQ(std::string::npos, PermissiveGetHostRegistryLength(""));
600 
601   // Regular non-canonical host name.
602   EXPECT_EQ(2U, PermissiveGetHostRegistryLength("Www.Google.Jp"));
603   EXPECT_EQ(3U, PermissiveGetHostRegistryLength("Www.Google.Jp."));
604 
605   // Empty returns npos.
606   EXPECT_EQ(std::string::npos, PermissiveGetHostRegistryLength(""));
607 
608   // Trailing spaces are counted as part of the hostname, meaning this will
609   // not match a known registry.
610   EXPECT_EQ(0U, PermissiveGetHostRegistryLength("Www.Google.Jp "));
611 
612   // Invalid characters at the beginning are OK if the suffix still matches.
613   EXPECT_EQ(2U, PermissiveGetHostRegistryLength("*%00#?.Jp"));
614 
615   // Escaped period, this will add new components.
616   EXPECT_EQ(4U, PermissiveGetHostRegistryLength("Www.Googl%45%2e%4Ap"));
617 
618 // IDN cases (not supported when not linking ICU).
619 #if !BUILDFLAG(USE_PLATFORM_ICU_ALTERNATIVES)
620   EXPECT_EQ(10U, PermissiveGetHostRegistryLength("foo.xn--fiqs8s"));
621   EXPECT_EQ(11U, PermissiveGetHostRegistryLength("foo.xn--fiqs8s."));
622   EXPECT_EQ(18U, PermissiveGetHostRegistryLength("foo.%E4%B8%AD%E5%9B%BD"));
623   EXPECT_EQ(19U, PermissiveGetHostRegistryLength("foo.%E4%B8%AD%E5%9B%BD."));
624   EXPECT_EQ(6U,
625             PermissiveGetHostRegistryLength("foo.\xE4\xB8\xAD\xE5\x9B\xBD"));
626   EXPECT_EQ(7U,
627             PermissiveGetHostRegistryLength("foo.\xE4\xB8\xAD\xE5\x9B\xBD."));
628   // UTF-16 IDN.
629   EXPECT_EQ(2U, PermissiveGetHostRegistryLength(u"foo.\x4e2d\x56fd"));
630 
631   // Fullwidth dot (u+FF0E) that will get canonicalized to a dot.
632   EXPECT_EQ(2U, PermissiveGetHostRegistryLength("Www.Google\xEF\xBC\x8Ejp"));
633   // Same but also ending in a fullwidth dot.
634   EXPECT_EQ(5U, PermissiveGetHostRegistryLength(
635                     "Www.Google\xEF\xBC\x8Ejp\xEF\xBC\x8E"));
636   // Escaped UTF-8, also with an escaped fullwidth "Jp".
637   // "Jp" = U+FF2A, U+FF50, UTF-8 = EF BC AA EF BD 90
638   EXPECT_EQ(27U, PermissiveGetHostRegistryLength(
639                      "Www.Google%EF%BC%8E%EF%BC%AA%EF%BD%90%EF%BC%8E"));
640   // UTF-16 (ending in a dot).
641   EXPECT_EQ(3U, PermissiveGetHostRegistryLength(
642                     u"Www.Google\xFF0E\xFF2A\xFF50\xFF0E"));
643 #endif
644 }
645 
646 }  // namespace net::registry_controlled_domains
647