xref: /aosp_15_r20/external/cronet/net/base/lookup_string_in_fixed_set_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/lookup_string_in_fixed_set.h"
6 
7 #include <string.h>
8 
9 #include <algorithm>
10 #include <limits>
11 #include <ostream>
12 #include <utility>
13 #include <vector>
14 
15 #include "base/base_paths.h"
16 #include "base/files/file_path.h"
17 #include "base/files/file_util.h"
18 #include "base/path_service.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/stringprintf.h"
21 #include "testing/gtest/include/gtest/gtest.h"
22 
23 namespace net {
24 namespace {
25 namespace test1 {
26 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
27 }
28 namespace test3 {
29 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc.cc"
30 }
31 namespace test4 {
32 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc.cc"
33 }
34 namespace test5 {
35 #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc.cc"
36 }
37 namespace test6 {
38 #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc.cc"
39 }
40 
41 struct Expectation {
42   const char* const key;
43   int value;
44 };
45 
PrintTo(const Expectation & expectation,std::ostream * os)46 void PrintTo(const Expectation& expectation, std::ostream* os) {
47   *os << "{\"" << expectation.key << "\", " << expectation.value << "}";
48 }
49 
50 class LookupStringInFixedSetTest : public testing::TestWithParam<Expectation> {
51  protected:
52   template <size_t N>
LookupInGraph(const unsigned char (& graph)[N],const char * key)53   int LookupInGraph(const unsigned char(&graph)[N], const char* key) {
54     return LookupStringInFixedSet(graph, N, key, strlen(key));
55   }
56 };
57 
58 class Dafsa1Test : public LookupStringInFixedSetTest {};
59 
TEST_P(Dafsa1Test,BasicTest)60 TEST_P(Dafsa1Test, BasicTest) {
61   const Expectation& param = GetParam();
62   EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key));
63 }
64 
65 const Expectation kBasicTestCases[] = {
66     {"", -1},      {"j", -1},          {"jp", 0}, {"jjp", -1}, {"jpp", -1},
67     {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2},  {"b.c", 1},  {"priv.no", 4},
68 };
69 
70 // Helper function for EnumerateDafsaLanaguage.
RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup & lookup,std::vector<char> * sequence,std::vector<std::string> * language)71 void RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup& lookup,
72                                        std::vector<char>* sequence,
73                                        std::vector<std::string>* language) {
74   int result = lookup.GetResultForCurrentSequence();
75   if (result != kDafsaNotFound) {
76     std::string line(sequence->begin(), sequence->end());
77     line += base::StringPrintf(", %d", result);
78     language->emplace_back(std::move(line));
79   }
80   // Try appending each char value.
81   for (char c = std::numeric_limits<char>::min();; ++c) {
82     FixedSetIncrementalLookup continued_lookup = lookup;
83     if (continued_lookup.Advance(c)) {
84       sequence->push_back(c);
85       size_t saved_language_size = language->size();
86       RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
87       CHECK_LT(saved_language_size, language->size())
88           << "DAFSA includes a branch to nowhere at node: "
89           << std::string(sequence->begin(), sequence->end());
90       sequence->pop_back();
91     }
92     if (c == std::numeric_limits<char>::max())
93       break;
94   }
95 }
96 
97 // Uses FixedSetIncrementalLookup to build a vector of every string in the
98 // language of the DAFSA.
99 template <typename Graph>
EnumerateDafsaLanguage(const Graph & graph)100 std::vector<std::string> EnumerateDafsaLanguage(const Graph& graph) {
101   FixedSetIncrementalLookup query(graph, sizeof(Graph));
102   std::vector<char> sequence;
103   std::vector<std::string> language;
104   RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
105   return language;
106 }
107 
108 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
109                          Dafsa1Test,
110                          ::testing::ValuesIn(kBasicTestCases));
111 
112 class Dafsa3Test : public LookupStringInFixedSetTest {};
113 
114 // This DAFSA is constructed so that labels begin and end with unique
115 // characters, which makes it impossible to merge labels. Each inner node
116 // is about 100 bytes and a one byte offset can at most add 64 bytes to
117 // previous offset. Thus the paths must go over two byte offsets.
TEST_P(Dafsa3Test,TestDafsaTwoByteOffsets)118 TEST_P(Dafsa3Test, TestDafsaTwoByteOffsets) {
119   const Expectation& param = GetParam();
120   EXPECT_EQ(param.value, LookupInGraph(test3::kDafsa, param.key));
121 }
122 
123 const Expectation kTwoByteOffsetTestCases[] = {
124     {"0________________________________________________________________________"
125      "____________________________0",
126      0},
127     {"7________________________________________________________________________"
128      "____________________________7",
129      4},
130     {"a________________________________________________________________________"
131      "____________________________8",
132      -1},
133 };
134 
135 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
136                          Dafsa3Test,
137                          ::testing::ValuesIn(kTwoByteOffsetTestCases));
138 
139 class Dafsa4Test : public LookupStringInFixedSetTest {};
140 
141 // This DAFSA is constructed so that labels begin and end with unique
142 // characters, which makes it impossible to merge labels. The byte array
143 // has a size of ~54k. A two byte offset can add at most add 8k to the
144 // previous offset. Since we can skip only forward in memory, the nodes
145 // representing the return values must be located near the end of the byte
146 // array. The probability that we can reach from an arbitrary inner node to
147 // a return value without using a three byte offset is small (but not zero).
148 // The test is repeated with some different keys and with a reasonable
149 // probability at least one of the tested paths has go over a three byte
150 // offset.
TEST_P(Dafsa4Test,TestDafsaThreeByteOffsets)151 TEST_P(Dafsa4Test, TestDafsaThreeByteOffsets) {
152   const Expectation& param = GetParam();
153   EXPECT_EQ(param.value, LookupInGraph(test4::kDafsa, param.key));
154 }
155 
156 const Expectation kThreeByteOffsetTestCases[] = {
157     {"Z6_______________________________________________________________________"
158      "_____________________________Z6",
159      0},
160     {"Z7_______________________________________________________________________"
161      "_____________________________Z7",
162      4},
163     {"Za_______________________________________________________________________"
164      "_____________________________Z8",
165      -1},
166 };
167 
168 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
169                          Dafsa4Test,
170                          ::testing::ValuesIn(kThreeByteOffsetTestCases));
171 
172 class Dafsa5Test : public LookupStringInFixedSetTest {};
173 
174 // This DAFSA is constructed from words with similar prefixes but distinct
175 // suffixes. The DAFSA will then form a trie with the implicit source node
176 // as root.
TEST_P(Dafsa5Test,TestDafsaJoinedPrefixes)177 TEST_P(Dafsa5Test, TestDafsaJoinedPrefixes) {
178   const Expectation& param = GetParam();
179   EXPECT_EQ(param.value, LookupInGraph(test5::kDafsa, param.key));
180 }
181 
182 const Expectation kJoinedPrefixesTestCases[] = {
183     {"ai", 0},   {"bj", 4},   {"aak", 0},   {"bbl", 4},
184     {"aaa", -1}, {"bbb", -1}, {"aaaam", 0}, {"bbbbn", 0},
185 };
186 
187 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
188                          Dafsa5Test,
189                          ::testing::ValuesIn(kJoinedPrefixesTestCases));
190 
191 class Dafsa6Test : public LookupStringInFixedSetTest {};
192 
193 // This DAFSA is constructed from words with similar suffixes but distinct
194 // prefixes. The DAFSA will then form a trie with the implicit sink node as
195 // root.
TEST_P(Dafsa6Test,TestDafsaJoinedSuffixes)196 TEST_P(Dafsa6Test, TestDafsaJoinedSuffixes) {
197   const Expectation& param = GetParam();
198   EXPECT_EQ(param.value, LookupInGraph(test6::kDafsa, param.key));
199 }
200 
201 const Expectation kJoinedSuffixesTestCases[] = {
202     {"ia", 0},   {"jb", 4},   {"kaa", 0},   {"lbb", 4},
203     {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0},
204 };
205 
206 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
207                          Dafsa6Test,
208                          ::testing::ValuesIn(kJoinedSuffixesTestCases));
209 
210 // Validates that the generated DAFSA contains exactly the same information as
211 // effective_tld_names_unittest1.gperf.
TEST(LookupStringInFixedSetTest,Dafsa1EnumerateLanguage)212 TEST(LookupStringInFixedSetTest, Dafsa1EnumerateLanguage) {
213   auto language = EnumerateDafsaLanguage(test1::kDafsa);
214 
215   // These are the lines of effective_tld_names_unittest1.gperf, in sorted
216   // order.
217   std::vector<std::string> expected_language = {
218       "ac.jp, 0",       "b.c, 1",     "bar.baz.com, 0", "bar.jp, 2",
219       "baz.bar.jp, 2",  "c, 2",       "jp, 0",          "no, 0",
220       "pref.bar.jp, 1", "priv.no, 4", "private, 4",     "xn--fiqs8s, 0",
221   };
222 
223   EXPECT_EQ(expected_language, language);
224 }
225 
226 // Validates that the generated DAFSA contains exactly the same information as
227 // effective_tld_names_unittest5.gperf.
TEST(LookupStringInFixedSetTest,Dafsa5EnumerateLanguage)228 TEST(LookupStringInFixedSetTest, Dafsa5EnumerateLanguage) {
229   auto language = EnumerateDafsaLanguage(test5::kDafsa);
230 
231   std::vector<std::string> expected_language = {
232       "aaaam, 0", "aak, 0", "ai, 0", "bbbbn, 0", "bbl, 4", "bj, 4",
233   };
234 
235   EXPECT_EQ(expected_language, language);
236 }
237 
238 // Validates that the generated DAFSA contains exactly the same information as
239 // effective_tld_names_unittest6.gperf.
TEST(LookupStringInFixedSetTest,Dafsa6EnumerateLanguage)240 TEST(LookupStringInFixedSetTest, Dafsa6EnumerateLanguage) {
241   auto language = EnumerateDafsaLanguage(test6::kDafsa);
242 
243   std::vector<std::string> expected_language = {
244       "ia, 0", "jb, 4", "kaa, 0", "lbb, 4", "maaaa, 0", "nbbbb, 0",
245   };
246 
247   EXPECT_EQ(expected_language, language);
248 }
249 
250 }  // namespace
251 }  // namespace net
252