1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
6
7 #include "base/strings/utf_string_conversions.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9 #include "url/buildflags.h"
10 #include "url/gurl.h"
11 #include "url/origin.h"
12
13 namespace {
14
15 namespace test1 {
16 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-reversed-inc.cc"
17 }
18 namespace test2 {
19 #include "net/base/registry_controlled_domains/effective_tld_names_unittest2-reversed-inc.cc"
20 }
21 namespace test3 {
22 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-reversed-inc.cc"
23 }
24 namespace test4 {
25 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-reversed-inc.cc"
26 }
27 namespace test5 {
28 #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-reversed-inc.cc"
29 }
30 namespace test6 {
31 #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-reversed-inc.cc"
32 }
33
34 } // namespace
35
36 namespace net::registry_controlled_domains {
37
38 namespace {
39
GetDomainFromHost(const std::string & host)40 std::string GetDomainFromHost(const std::string& host) {
41 return GetDomainAndRegistry(host, EXCLUDE_PRIVATE_REGISTRIES);
42 }
43
GetRegistryLengthFromURL(const std::string & url,UnknownRegistryFilter unknown_filter)44 size_t GetRegistryLengthFromURL(
45 const std::string& url,
46 UnknownRegistryFilter unknown_filter) {
47 return GetRegistryLength(GURL(url),
48 unknown_filter,
49 EXCLUDE_PRIVATE_REGISTRIES);
50 }
51
GetRegistryLengthFromURLIncludingPrivate(const std::string & url,UnknownRegistryFilter unknown_filter)52 size_t GetRegistryLengthFromURLIncludingPrivate(
53 const std::string& url,
54 UnknownRegistryFilter unknown_filter) {
55 return GetRegistryLength(GURL(url),
56 unknown_filter,
57 INCLUDE_PRIVATE_REGISTRIES);
58 }
59
PermissiveGetHostRegistryLength(std::string_view host)60 size_t PermissiveGetHostRegistryLength(std::string_view host) {
61 return PermissiveGetHostRegistryLength(host, EXCLUDE_UNKNOWN_REGISTRIES,
62 EXCLUDE_PRIVATE_REGISTRIES);
63 }
64
65 // Only called when using ICU (avoids unused static function error).
66 #if !BUILDFLAG(USE_PLATFORM_ICU_ALTERNATIVES)
PermissiveGetHostRegistryLength(std::u16string_view host)67 size_t PermissiveGetHostRegistryLength(std::u16string_view host) {
68 return PermissiveGetHostRegistryLength(host, EXCLUDE_UNKNOWN_REGISTRIES,
69 EXCLUDE_PRIVATE_REGISTRIES);
70 }
71 #endif
72
GetCanonicalHostRegistryLength(const std::string & host,UnknownRegistryFilter unknown_filter)73 size_t GetCanonicalHostRegistryLength(const std::string& host,
74 UnknownRegistryFilter unknown_filter) {
75 return GetCanonicalHostRegistryLength(host, unknown_filter,
76 EXCLUDE_PRIVATE_REGISTRIES);
77 }
78
GetCanonicalHostRegistryLengthIncludingPrivate(const std::string & host)79 size_t GetCanonicalHostRegistryLengthIncludingPrivate(const std::string& host) {
80 return GetCanonicalHostRegistryLength(host, EXCLUDE_UNKNOWN_REGISTRIES,
81 INCLUDE_PRIVATE_REGISTRIES);
82 }
83
84 } // namespace
85
86 class RegistryControlledDomainTest : public testing::Test {
87 protected:
88 template <typename Graph>
UseDomainData(const Graph & graph)89 void UseDomainData(const Graph& graph) {
90 // This is undone in TearDown.
91 SetFindDomainGraphForTesting(graph, sizeof(Graph));
92 }
93
CompareDomains(const std::string & url1,const std::string & url2)94 bool CompareDomains(const std::string& url1, const std::string& url2) {
95 SCOPED_TRACE(url1 + " " + url2);
96 GURL g1 = GURL(url1);
97 GURL g2 = GURL(url2);
98 url::Origin o1 = url::Origin::Create(g1);
99 url::Origin o2 = url::Origin::Create(g2);
100 EXPECT_EQ(SameDomainOrHost(o1, o2, EXCLUDE_PRIVATE_REGISTRIES),
101 SameDomainOrHost(g1, g2, EXCLUDE_PRIVATE_REGISTRIES));
102 return SameDomainOrHost(g1, g2, EXCLUDE_PRIVATE_REGISTRIES);
103 }
104
TearDown()105 void TearDown() override { ResetFindDomainGraphForTesting(); }
106 };
107
TEST_F(RegistryControlledDomainTest,TestGetDomainAndRegistry)108 TEST_F(RegistryControlledDomainTest, TestGetDomainAndRegistry) {
109 UseDomainData(test1::kDafsa);
110
111 struct {
112 std::string url;
113 std::string expected_domain_and_registry;
114 } kTestCases[] = {
115 {"http://a.baz.jp/file.html", "baz.jp"},
116 {"http://a.baz.jp./file.html", "baz.jp."},
117 {"http://ac.jp", ""},
118 {"http://a.bar.jp", ""},
119 {"http://bar.jp", ""},
120 {"http://baz.bar.jp", ""},
121 {"http://a.b.baz.bar.jp", "a.b.baz.bar.jp"},
122
123 {"http://baz.pref.bar.jp", "pref.bar.jp"},
124 {"http://a.b.bar.baz.com.", "b.bar.baz.com."},
125
126 {"http://a.d.c", "a.d.c"},
127 {"http://.a.d.c", "a.d.c"},
128 {"http://..a.d.c", "a.d.c"},
129 {"http://a.b.c", "b.c"},
130 {"http://baz.com", "baz.com"},
131 {"http://baz.com.", "baz.com."},
132
133 {"", ""},
134 {"http://", ""},
135 {"file:///C:/file.html", ""},
136 {"http://foo.com..", ""},
137 {"http://...", ""},
138 {"http://192.168.0.1", ""},
139 {"http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/", ""},
140 {"http://localhost", ""},
141 {"http://localhost.", ""},
142 {"http:////Comment", ""},
143 };
144 for (const auto& test_case : kTestCases) {
145 const GURL url(test_case.url);
146 EXPECT_EQ(test_case.expected_domain_and_registry,
147 GetDomainAndRegistry(url, EXCLUDE_PRIVATE_REGISTRIES));
148 EXPECT_EQ(test_case.expected_domain_and_registry,
149 GetDomainAndRegistry(url::Origin::Create(url),
150 EXCLUDE_PRIVATE_REGISTRIES));
151 }
152
153 // Test std::string version of GetDomainAndRegistry(). Uses the same
154 // underpinnings as the GURL version, so this is really more of a check of
155 // CanonicalizeHost().
156 EXPECT_EQ("baz.jp", GetDomainFromHost("a.baz.jp")); // 1
157 EXPECT_EQ("baz.jp.", GetDomainFromHost("a.baz.jp.")); // 1
158 EXPECT_EQ("", GetDomainFromHost("ac.jp")); // 2
159 EXPECT_EQ("", GetDomainFromHost("a.bar.jp")); // 3
160 EXPECT_EQ("", GetDomainFromHost("bar.jp")); // 3
161 EXPECT_EQ("", GetDomainFromHost("baz.bar.jp")); // 3 4
162 EXPECT_EQ("a.b.baz.bar.jp", GetDomainFromHost("a.b.baz.bar.jp")); // 3 4
163 EXPECT_EQ("pref.bar.jp", GetDomainFromHost("baz.pref.bar.jp")); // 5
164 EXPECT_EQ("b.bar.baz.com.", GetDomainFromHost("a.b.bar.baz.com.")); // 6
165 EXPECT_EQ("a.d.c", GetDomainFromHost("a.d.c")); // 7
166 EXPECT_EQ("a.d.c", GetDomainFromHost(".a.d.c")); // 7
167 EXPECT_EQ("a.d.c", GetDomainFromHost("..a.d.c")); // 7
168 EXPECT_EQ("b.c", GetDomainFromHost("a.b.c")); // 7 8
169 EXPECT_EQ("baz.com", GetDomainFromHost("baz.com")); // none
170 EXPECT_EQ("baz.com.", GetDomainFromHost("baz.com.")); // none
171
172 EXPECT_EQ("", GetDomainFromHost(std::string()));
173 EXPECT_EQ("", GetDomainFromHost("foo.com.."));
174 EXPECT_EQ("", GetDomainFromHost("..."));
175 EXPECT_EQ("", GetDomainFromHost("192.168.0.1"));
176 EXPECT_EQ("", GetDomainFromHost("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"));
177 EXPECT_EQ("", GetDomainFromHost("localhost."));
178 EXPECT_EQ("", GetDomainFromHost(".localhost."));
179 }
180
TEST_F(RegistryControlledDomainTest,TestGetRegistryLength)181 TEST_F(RegistryControlledDomainTest, TestGetRegistryLength) {
182 UseDomainData(test1::kDafsa);
183
184 // Test GURL version of GetRegistryLength().
185 EXPECT_EQ(2U, GetRegistryLengthFromURL("http://a.baz.jp/file.html",
186 EXCLUDE_UNKNOWN_REGISTRIES)); // 1
187 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://a.baz.jp./file.html",
188 EXCLUDE_UNKNOWN_REGISTRIES)); // 1
189 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://ac.jp",
190 EXCLUDE_UNKNOWN_REGISTRIES)); // 2
191 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://a.bar.jp",
192 EXCLUDE_UNKNOWN_REGISTRIES)); // 3
193 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://bar.jp",
194 EXCLUDE_UNKNOWN_REGISTRIES)); // 3
195 EXPECT_EQ(2U, GetRegistryLengthFromURL("http://xbar.jp",
196 EXCLUDE_UNKNOWN_REGISTRIES)); // 1
197 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://baz.bar.jp",
198 EXCLUDE_UNKNOWN_REGISTRIES)); // 3 4
199 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://.baz.bar.jp",
200 EXCLUDE_UNKNOWN_REGISTRIES)); // 3 4
201 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://..baz.bar.jp",
202 EXCLUDE_UNKNOWN_REGISTRIES)); // 3 4
203 EXPECT_EQ(11U, GetRegistryLengthFromURL("http://foo..baz.bar.jp",
204 EXCLUDE_UNKNOWN_REGISTRIES)); // 3 4
205 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://xbaz.bar.jp",
206 EXCLUDE_UNKNOWN_REGISTRIES)); // 3
207 EXPECT_EQ(11U, GetRegistryLengthFromURL("http://x.xbaz.bar.jp",
208 EXCLUDE_UNKNOWN_REGISTRIES)); // 3
209 EXPECT_EQ(12U, GetRegistryLengthFromURL("http://a.b.baz.bar.jp",
210 EXCLUDE_UNKNOWN_REGISTRIES)); // 4
211 EXPECT_EQ(6U, GetRegistryLengthFromURL("http://baz.pref.bar.jp",
212 EXCLUDE_UNKNOWN_REGISTRIES)); // 5
213 EXPECT_EQ(6U, GetRegistryLengthFromURL("http://z.baz.pref.bar.jp",
214 EXCLUDE_UNKNOWN_REGISTRIES)); // 5
215 EXPECT_EQ(10U, GetRegistryLengthFromURL("http://p.ref.bar.jp",
216 EXCLUDE_UNKNOWN_REGISTRIES)); // 5
217 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://xpref.bar.jp",
218 EXCLUDE_UNKNOWN_REGISTRIES)); // 5
219 EXPECT_EQ(12U, GetRegistryLengthFromURL("http://baz.xpref.bar.jp",
220 EXCLUDE_UNKNOWN_REGISTRIES)); // 5
221 EXPECT_EQ(6U, GetRegistryLengthFromURL("http://baz..pref.bar.jp",
222 EXCLUDE_UNKNOWN_REGISTRIES)); // 5
223 EXPECT_EQ(11U, GetRegistryLengthFromURL("http://a.b.bar.baz.com",
224 EXCLUDE_UNKNOWN_REGISTRIES)); // 6
225 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://a.d.c",
226 EXCLUDE_UNKNOWN_REGISTRIES)); // 7
227 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://.a.d.c",
228 EXCLUDE_UNKNOWN_REGISTRIES)); // 7
229 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://..a.d.c",
230 EXCLUDE_UNKNOWN_REGISTRIES)); // 7
231 EXPECT_EQ(1U, GetRegistryLengthFromURL("http://a.b.c",
232 EXCLUDE_UNKNOWN_REGISTRIES)); // 7 8
233 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://baz.com",
234 EXCLUDE_UNKNOWN_REGISTRIES)); // none
235 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://baz.com.",
236 EXCLUDE_UNKNOWN_REGISTRIES)); // none
237 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://baz.com",
238 INCLUDE_UNKNOWN_REGISTRIES)); // none
239 EXPECT_EQ(4U, GetRegistryLengthFromURL("http://baz.com.",
240 INCLUDE_UNKNOWN_REGISTRIES)); // none
241
242 EXPECT_EQ(std::string::npos,
243 GetRegistryLengthFromURL(std::string(), EXCLUDE_UNKNOWN_REGISTRIES));
244 EXPECT_EQ(std::string::npos,
245 GetRegistryLengthFromURL("http://", EXCLUDE_UNKNOWN_REGISTRIES));
246 EXPECT_EQ(std::string::npos,
247 GetRegistryLengthFromURL("file:///C:/file.html",
248 EXCLUDE_UNKNOWN_REGISTRIES));
249 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://foo.com..",
250 EXCLUDE_UNKNOWN_REGISTRIES));
251 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://...",
252 EXCLUDE_UNKNOWN_REGISTRIES));
253 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://192.168.0.1",
254 EXCLUDE_UNKNOWN_REGISTRIES));
255 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost",
256 EXCLUDE_UNKNOWN_REGISTRIES));
257 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost",
258 INCLUDE_UNKNOWN_REGISTRIES));
259 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost.",
260 EXCLUDE_UNKNOWN_REGISTRIES));
261 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://localhost.",
262 INCLUDE_UNKNOWN_REGISTRIES));
263 EXPECT_EQ(0U, GetRegistryLengthFromURL("http:////Comment",
264 EXCLUDE_UNKNOWN_REGISTRIES));
265
266 // Test std::string version of GetRegistryLength(). Uses the same
267 // underpinnings as the GURL version, so this is really more of a check of
268 // CanonicalizeHost().
269 EXPECT_EQ(2U, GetCanonicalHostRegistryLength(
270 "a.baz.jp", EXCLUDE_UNKNOWN_REGISTRIES)); // 1
271 EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
272 "a.baz.jp.", EXCLUDE_UNKNOWN_REGISTRIES)); // 1
273 EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
274 "ac.jp", EXCLUDE_UNKNOWN_REGISTRIES)); // 2
275 EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
276 "a.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES)); // 3
277 EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
278 "bar.jp", EXCLUDE_UNKNOWN_REGISTRIES)); // 3
279 EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
280 "baz.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES)); // 3 4
281 EXPECT_EQ(12U, GetCanonicalHostRegistryLength(
282 "a.b.baz.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES)); // 4
283 EXPECT_EQ(6U, GetCanonicalHostRegistryLength(
284 "baz.pref.bar.jp", EXCLUDE_UNKNOWN_REGISTRIES)); // 5
285 EXPECT_EQ(11U, GetCanonicalHostRegistryLength(
286 "a.b.bar.baz.com", EXCLUDE_UNKNOWN_REGISTRIES)); // 6
287 EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
288 "a.d.c", EXCLUDE_UNKNOWN_REGISTRIES)); // 7
289 EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
290 ".a.d.c", EXCLUDE_UNKNOWN_REGISTRIES)); // 7
291 EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
292 "..a.d.c", EXCLUDE_UNKNOWN_REGISTRIES)); // 7
293 EXPECT_EQ(1U, GetCanonicalHostRegistryLength(
294 "a.b.c", EXCLUDE_UNKNOWN_REGISTRIES)); // 7 8
295 EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
296 "baz.com", EXCLUDE_UNKNOWN_REGISTRIES)); // none
297 EXPECT_EQ(0U, GetCanonicalHostRegistryLength(
298 "baz.com.", EXCLUDE_UNKNOWN_REGISTRIES)); // none
299 EXPECT_EQ(3U, GetCanonicalHostRegistryLength(
300 "baz.com", INCLUDE_UNKNOWN_REGISTRIES)); // none
301 EXPECT_EQ(4U, GetCanonicalHostRegistryLength(
302 "baz.com.", INCLUDE_UNKNOWN_REGISTRIES)); // none
303
304 EXPECT_EQ(std::string::npos, GetCanonicalHostRegistryLength(
305 std::string(), EXCLUDE_UNKNOWN_REGISTRIES));
306 EXPECT_EQ(0U, GetCanonicalHostRegistryLength("foo.com..",
307 EXCLUDE_UNKNOWN_REGISTRIES));
308 EXPECT_EQ(0U,
309 GetCanonicalHostRegistryLength("..", EXCLUDE_UNKNOWN_REGISTRIES));
310 EXPECT_EQ(0U, GetCanonicalHostRegistryLength("192.168.0.1",
311 EXCLUDE_UNKNOWN_REGISTRIES));
312 EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost",
313 EXCLUDE_UNKNOWN_REGISTRIES));
314 EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost",
315 INCLUDE_UNKNOWN_REGISTRIES));
316 EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost.",
317 EXCLUDE_UNKNOWN_REGISTRIES));
318 EXPECT_EQ(0U, GetCanonicalHostRegistryLength("localhost.",
319 INCLUDE_UNKNOWN_REGISTRIES));
320
321 // IDN case.
322 EXPECT_EQ(10U, GetCanonicalHostRegistryLength("foo.xn--fiqs8s",
323 EXCLUDE_UNKNOWN_REGISTRIES));
324 }
325
TEST_F(RegistryControlledDomainTest,HostHasRegistryControlledDomain)326 TEST_F(RegistryControlledDomainTest, HostHasRegistryControlledDomain) {
327 UseDomainData(test1::kDafsa);
328
329 // Invalid hosts.
330 EXPECT_FALSE(HostHasRegistryControlledDomain(
331 std::string(), EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
332 EXPECT_FALSE(HostHasRegistryControlledDomain(
333 "%00asdf", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
334
335 // Invalid host but valid R.C.D.
336 EXPECT_TRUE(HostHasRegistryControlledDomain(
337 "%00foo.jp", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
338
339 // Valid R.C.D. when canonicalized, even with an invalid prefix and an
340 // escaped dot.
341 EXPECT_TRUE(HostHasRegistryControlledDomain("%00foo.Google%2EjP",
342 EXCLUDE_UNKNOWN_REGISTRIES,
343 EXCLUDE_PRIVATE_REGISTRIES));
344
345 // Regular, no match.
346 EXPECT_FALSE(HostHasRegistryControlledDomain(
347 "bar.notatld", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
348
349 // Regular, match.
350 EXPECT_TRUE(HostHasRegistryControlledDomain(
351 "www.Google.Jp", EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES));
352 }
353
TEST_F(RegistryControlledDomainTest,TestSameDomainOrHost)354 TEST_F(RegistryControlledDomainTest, TestSameDomainOrHost) {
355 UseDomainData(test2::kDafsa);
356
357 EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html",
358 "http://a.b.bar.jp/file.html")); // b.bar.jp
359 EXPECT_TRUE(CompareDomains("http://a.b.bar.jp/file.html",
360 "http://b.b.bar.jp/file.html")); // b.bar.jp
361 EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html", // foo.jp
362 "http://a.not.jp/file.html")); // not.jp
363 EXPECT_FALSE(CompareDomains("http://a.foo.jp/file.html", // foo.jp
364 "http://a.foo.jp./file.html")); // foo.jp.
365 EXPECT_FALSE(CompareDomains("http://a.com/file.html", // a.com
366 "http://b.com/file.html")); // b.com
367 EXPECT_TRUE(CompareDomains("http://a.x.com/file.html",
368 "http://b.x.com/file.html")); // x.com
369 EXPECT_TRUE(CompareDomains("http://a.x.com/file.html",
370 "http://.x.com/file.html")); // x.com
371 EXPECT_TRUE(CompareDomains("http://a.x.com/file.html",
372 "http://..b.x.com/file.html")); // x.com
373 EXPECT_TRUE(CompareDomains("http://intranet/file.html",
374 "http://intranet/file.html")); // intranet
375 EXPECT_FALSE(CompareDomains("http://intranet1/file.html",
376 "http://intranet2/file.html")); // intranet
377 EXPECT_TRUE(CompareDomains(
378 "http://intranet1.corp.example.com/file.html",
379 "http://intranet2.corp.example.com/file.html")); // intranet
380 EXPECT_TRUE(CompareDomains("http://127.0.0.1/file.html",
381 "http://127.0.0.1/file.html")); // 127.0.0.1
382 EXPECT_FALSE(CompareDomains("http://192.168.0.1/file.html", // 192.168.0.1
383 "http://127.0.0.1/file.html")); // 127.0.0.1
384 EXPECT_FALSE(CompareDomains("file:///C:/file.html",
385 "file:///C:/file.html")); // no host
386
387 // The trailing dot means different sites - see also
388 // https://github.com/mikewest/sec-metadata/issues/15.
389 EXPECT_FALSE(
390 CompareDomains("https://foo.example.com", "https://foo.example.com."));
391 }
392
TEST_F(RegistryControlledDomainTest,TestDefaultData)393 TEST_F(RegistryControlledDomainTest, TestDefaultData) {
394 // Note that no data is set: we're using the default rules.
395 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://google.com",
396 EXCLUDE_UNKNOWN_REGISTRIES));
397 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://stanford.edu",
398 EXCLUDE_UNKNOWN_REGISTRIES));
399 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://ustreas.gov",
400 EXCLUDE_UNKNOWN_REGISTRIES));
401 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://icann.net",
402 EXCLUDE_UNKNOWN_REGISTRIES));
403 EXPECT_EQ(3U, GetRegistryLengthFromURL("http://ferretcentral.org",
404 EXCLUDE_UNKNOWN_REGISTRIES));
405 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://nowhere.notavaliddomain",
406 EXCLUDE_UNKNOWN_REGISTRIES));
407 EXPECT_EQ(15U, GetRegistryLengthFromURL("http://nowhere.notavaliddomain",
408 INCLUDE_UNKNOWN_REGISTRIES));
409 }
410
TEST_F(RegistryControlledDomainTest,TestPrivateRegistryHandling)411 TEST_F(RegistryControlledDomainTest, TestPrivateRegistryHandling) {
412 UseDomainData(test1::kDafsa);
413
414 // Testing the same dataset for INCLUDE_PRIVATE_REGISTRIES and
415 // EXCLUDE_PRIVATE_REGISTRIES arguments.
416 // For the domain data used for this test, the private registries are
417 // 'priv.no' and 'private'.
418
419 // Non-private registries.
420 EXPECT_EQ(2U, GetRegistryLengthFromURL("http://priv.no",
421 EXCLUDE_UNKNOWN_REGISTRIES));
422 EXPECT_EQ(2U, GetRegistryLengthFromURL("http://foo.priv.no",
423 EXCLUDE_UNKNOWN_REGISTRIES));
424 EXPECT_EQ(2U, GetRegistryLengthFromURL("http://foo.jp",
425 EXCLUDE_UNKNOWN_REGISTRIES));
426 EXPECT_EQ(2U, GetRegistryLengthFromURL("http://www.foo.jp",
427 EXCLUDE_UNKNOWN_REGISTRIES));
428 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://private",
429 EXCLUDE_UNKNOWN_REGISTRIES));
430 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://foo.private",
431 EXCLUDE_UNKNOWN_REGISTRIES));
432 EXPECT_EQ(0U, GetRegistryLengthFromURL("http://private",
433 INCLUDE_UNKNOWN_REGISTRIES));
434 EXPECT_EQ(7U, GetRegistryLengthFromURL("http://foo.private",
435 INCLUDE_UNKNOWN_REGISTRIES));
436
437 // Private registries.
438 EXPECT_EQ(0U,
439 GetRegistryLengthFromURLIncludingPrivate("http://priv.no",
440 EXCLUDE_UNKNOWN_REGISTRIES));
441 EXPECT_EQ(7U,
442 GetRegistryLengthFromURLIncludingPrivate("http://foo.priv.no",
443 EXCLUDE_UNKNOWN_REGISTRIES));
444 EXPECT_EQ(2U,
445 GetRegistryLengthFromURLIncludingPrivate("http://foo.jp",
446 EXCLUDE_UNKNOWN_REGISTRIES));
447 EXPECT_EQ(2U,
448 GetRegistryLengthFromURLIncludingPrivate("http://www.foo.jp",
449 EXCLUDE_UNKNOWN_REGISTRIES));
450 EXPECT_EQ(0U,
451 GetRegistryLengthFromURLIncludingPrivate("http://private",
452 EXCLUDE_UNKNOWN_REGISTRIES));
453 EXPECT_EQ(7U,
454 GetRegistryLengthFromURLIncludingPrivate("http://foo.private",
455 EXCLUDE_UNKNOWN_REGISTRIES));
456 EXPECT_EQ(0U,
457 GetRegistryLengthFromURLIncludingPrivate("http://private",
458 INCLUDE_UNKNOWN_REGISTRIES));
459 EXPECT_EQ(7U,
460 GetRegistryLengthFromURLIncludingPrivate("http://foo.private",
461 INCLUDE_UNKNOWN_REGISTRIES));
462 }
463
TEST_F(RegistryControlledDomainTest,TestDafsaTwoByteOffsets)464 TEST_F(RegistryControlledDomainTest, TestDafsaTwoByteOffsets) {
465 UseDomainData(test3::kDafsa);
466
467 // Testing to lookup keys in a DAFSA with two byte offsets.
468 // This DAFSA is constructed so that labels begin and end with unique
469 // characters, which makes it impossible to merge labels. Each inner node
470 // is about 100 bytes and a one byte offset can at most add 64 bytes to
471 // previous offset. Thus the paths must go over two byte offsets.
472
473 const char key0[] =
474 "a.b.6____________________________________________________"
475 "________________________________________________6";
476 const char key1[] =
477 "a.b.7____________________________________________________"
478 "________________________________________________7";
479 const char key2[] =
480 "a.b.a____________________________________________________"
481 "________________________________________________8";
482
483 EXPECT_EQ(102U,
484 GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
485 EXPECT_EQ(0U,
486 GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
487 EXPECT_EQ(102U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
488 EXPECT_EQ(0U,
489 GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
490 }
491
TEST_F(RegistryControlledDomainTest,TestDafsaThreeByteOffsets)492 TEST_F(RegistryControlledDomainTest, TestDafsaThreeByteOffsets) {
493 UseDomainData(test4::kDafsa);
494
495 // Testing to lookup keys in a DAFSA with three byte offsets.
496 // This DAFSA is constructed so that labels begin and end with unique
497 // characters, which makes it impossible to merge labels. The byte array
498 // has a size of ~54k. A two byte offset can add at most add 8k to the
499 // previous offset. Since we can skip only forward in memory, the nodes
500 // representing the return values must be located near the end of the byte
501 // array. The probability that we can reach from an arbitrary inner node to
502 // a return value without using a three byte offset is small (but not zero).
503 // The test is repeated with some different keys and with a reasonable
504 // probability at least one of the tested paths has go over a three byte
505 // offset.
506
507 const char key0[] =
508 "a.b.z6___________________________________________________"
509 "_________________________________________________z6";
510 const char key1[] =
511 "a.b.z7___________________________________________________"
512 "_________________________________________________z7";
513 const char key2[] =
514 "a.b.za___________________________________________________"
515 "_________________________________________________z8";
516
517 EXPECT_EQ(104U,
518 GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
519 EXPECT_EQ(0U,
520 GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
521 EXPECT_EQ(104U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
522 EXPECT_EQ(0U,
523 GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
524 }
525
TEST_F(RegistryControlledDomainTest,TestDafsaJoinedPrefixes)526 TEST_F(RegistryControlledDomainTest, TestDafsaJoinedPrefixes) {
527 UseDomainData(test5::kDafsa);
528
529 // Testing to lookup keys in a DAFSA with compressed prefixes.
530 // This DAFSA is constructed from words with similar prefixes but distinct
531 // suffixes. The DAFSA will then form a trie with the implicit source node
532 // as root.
533
534 const char key0[] = "a.b.ai";
535 const char key1[] = "a.b.bj";
536 const char key2[] = "a.b.aak";
537 const char key3[] = "a.b.bbl";
538 const char key4[] = "a.b.aaa";
539 const char key5[] = "a.b.bbb";
540 const char key6[] = "a.b.aaaam";
541 const char key7[] = "a.b.bbbbn";
542
543 EXPECT_EQ(2U,
544 GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
545 EXPECT_EQ(0U,
546 GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
547 EXPECT_EQ(2U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
548 EXPECT_EQ(3U,
549 GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
550 EXPECT_EQ(0U,
551 GetCanonicalHostRegistryLength(key3, EXCLUDE_UNKNOWN_REGISTRIES));
552 EXPECT_EQ(3U, GetCanonicalHostRegistryLengthIncludingPrivate(key3));
553 EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key4));
554 EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key5));
555 EXPECT_EQ(5U,
556 GetCanonicalHostRegistryLength(key6, EXCLUDE_UNKNOWN_REGISTRIES));
557 EXPECT_EQ(5U,
558 GetCanonicalHostRegistryLength(key7, EXCLUDE_UNKNOWN_REGISTRIES));
559 }
560
TEST_F(RegistryControlledDomainTest,TestDafsaJoinedSuffixes)561 TEST_F(RegistryControlledDomainTest, TestDafsaJoinedSuffixes) {
562 UseDomainData(test6::kDafsa);
563
564 // Testing to lookup keys in a DAFSA with compressed suffixes.
565 // This DAFSA is constructed from words with similar suffixes but distinct
566 // prefixes. The DAFSA will then form a trie with the implicit sink node as
567 // root.
568
569 const char key0[] = "a.b.ia";
570 const char key1[] = "a.b.jb";
571 const char key2[] = "a.b.kaa";
572 const char key3[] = "a.b.lbb";
573 const char key4[] = "a.b.aaa";
574 const char key5[] = "a.b.bbb";
575 const char key6[] = "a.b.maaaa";
576 const char key7[] = "a.b.nbbbb";
577
578 EXPECT_EQ(2U,
579 GetCanonicalHostRegistryLength(key0, EXCLUDE_UNKNOWN_REGISTRIES));
580 EXPECT_EQ(0U,
581 GetCanonicalHostRegistryLength(key1, EXCLUDE_UNKNOWN_REGISTRIES));
582 EXPECT_EQ(2U, GetCanonicalHostRegistryLengthIncludingPrivate(key1));
583 EXPECT_EQ(3U,
584 GetCanonicalHostRegistryLength(key2, EXCLUDE_UNKNOWN_REGISTRIES));
585 EXPECT_EQ(0U,
586 GetCanonicalHostRegistryLength(key3, EXCLUDE_UNKNOWN_REGISTRIES));
587 EXPECT_EQ(3U, GetCanonicalHostRegistryLengthIncludingPrivate(key3));
588 EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key4));
589 EXPECT_EQ(0U, GetCanonicalHostRegistryLengthIncludingPrivate(key5));
590 EXPECT_EQ(5U,
591 GetCanonicalHostRegistryLength(key6, EXCLUDE_UNKNOWN_REGISTRIES));
592 EXPECT_EQ(5U,
593 GetCanonicalHostRegistryLength(key7, EXCLUDE_UNKNOWN_REGISTRIES));
594 }
595
TEST_F(RegistryControlledDomainTest,Permissive)596 TEST_F(RegistryControlledDomainTest, Permissive) {
597 UseDomainData(test1::kDafsa);
598
599 EXPECT_EQ(std::string::npos, PermissiveGetHostRegistryLength(""));
600
601 // Regular non-canonical host name.
602 EXPECT_EQ(2U, PermissiveGetHostRegistryLength("Www.Google.Jp"));
603 EXPECT_EQ(3U, PermissiveGetHostRegistryLength("Www.Google.Jp."));
604
605 // Empty returns npos.
606 EXPECT_EQ(std::string::npos, PermissiveGetHostRegistryLength(""));
607
608 // Trailing spaces are counted as part of the hostname, meaning this will
609 // not match a known registry.
610 EXPECT_EQ(0U, PermissiveGetHostRegistryLength("Www.Google.Jp "));
611
612 // Invalid characters at the beginning are OK if the suffix still matches.
613 EXPECT_EQ(2U, PermissiveGetHostRegistryLength("*%00#?.Jp"));
614
615 // Escaped period, this will add new components.
616 EXPECT_EQ(4U, PermissiveGetHostRegistryLength("Www.Googl%45%2e%4Ap"));
617
618 // IDN cases (not supported when not linking ICU).
619 #if !BUILDFLAG(USE_PLATFORM_ICU_ALTERNATIVES)
620 EXPECT_EQ(10U, PermissiveGetHostRegistryLength("foo.xn--fiqs8s"));
621 EXPECT_EQ(11U, PermissiveGetHostRegistryLength("foo.xn--fiqs8s."));
622 EXPECT_EQ(18U, PermissiveGetHostRegistryLength("foo.%E4%B8%AD%E5%9B%BD"));
623 EXPECT_EQ(19U, PermissiveGetHostRegistryLength("foo.%E4%B8%AD%E5%9B%BD."));
624 EXPECT_EQ(6U,
625 PermissiveGetHostRegistryLength("foo.\xE4\xB8\xAD\xE5\x9B\xBD"));
626 EXPECT_EQ(7U,
627 PermissiveGetHostRegistryLength("foo.\xE4\xB8\xAD\xE5\x9B\xBD."));
628 // UTF-16 IDN.
629 EXPECT_EQ(2U, PermissiveGetHostRegistryLength(u"foo.\x4e2d\x56fd"));
630
631 // Fullwidth dot (u+FF0E) that will get canonicalized to a dot.
632 EXPECT_EQ(2U, PermissiveGetHostRegistryLength("Www.Google\xEF\xBC\x8Ejp"));
633 // Same but also ending in a fullwidth dot.
634 EXPECT_EQ(5U, PermissiveGetHostRegistryLength(
635 "Www.Google\xEF\xBC\x8Ejp\xEF\xBC\x8E"));
636 // Escaped UTF-8, also with an escaped fullwidth "Jp".
637 // "Jp" = U+FF2A, U+FF50, UTF-8 = EF BC AA EF BD 90
638 EXPECT_EQ(27U, PermissiveGetHostRegistryLength(
639 "Www.Google%EF%BC%8E%EF%BC%AA%EF%BD%90%EF%BC%8E"));
640 // UTF-16 (ending in a dot).
641 EXPECT_EQ(3U, PermissiveGetHostRegistryLength(
642 u"Www.Google\xFF0E\xFF2A\xFF50\xFF0E"));
643 #endif
644 }
645
646 } // namespace net::registry_controlled_domains
647