xref: /aosp_15_r20/external/pigweed/pw_tokenizer/token_database_test.cc (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/token_database.h"
16 
17 #include <cstring>
18 #include <string>
19 #include <string_view>
20 
21 #include "pw_unit_test/framework.h"
22 
23 namespace pw::tokenizer {
24 namespace {
25 
26 using namespace std::literals::string_view_literals;
27 
28 // Use alignas to ensure that the data is properly aligned for database entries.
29 // This avoids unaligned memory reads.
30 constexpr char kBasicData[] =
31     "TOKENS\0\0\x03\x00\x00\x00\0\0\0\0"
32     "\x01\0\0\0\0\0\0\0"
33     "\x02\0\0\0\0\0\0\0"
34     "\xFF\0\0\0\0\0\0\0"
35     "hi!\0"
36     "goodbye\0"
37     ":)";
38 
39 constexpr char kEmptyData[] =
40     "TOKENS\0\0\x00\x00\x00\x00\0\0\0";  // Last byte is null terminator.
41 
42 constexpr char kBadMagic[] =
43     "TOKENs\0\0\x03\x00\x00\x00\0\0\0\0"
44     "\x01\0\0\0\0\0\0\0"
45     "hi!\0";
46 
47 constexpr char kBadVersion[] = "TOKENS\0\1\x00\0\0\0\0\0\0\0";
48 
49 constexpr char kBadEntryCount[] = "TOKENS\0\0\xff\x00\x00\x00\0\0\0\0";
50 
51 // Use signed data and a size with the top bit set to test that the entry count
52 // is read correctly, without per-byte sign extension.
53 constexpr signed char kSignedWithTopBit[] =
54     "TOKENS\0\0\x80\x00\x00\x00\0\0\0\0"
55     // Entries
56     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
57     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
58     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
59     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 32
60     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
61     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
62     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
63     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 64
64     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
65     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
66     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
67     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 96
68     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
69     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
70     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
71     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 128
72     // Strings (empty)
73     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"  //  32
74     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"  //  64
75     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"  //  96
76     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";   // 128
77 
78 constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create<kBasicData>();
79 static_assert(kBasicDatabase.size() == 3u);
80 
TEST(TokenDatabase,EntryCount)81 TEST(TokenDatabase, EntryCount) {
82   static_assert(TokenDatabase::Create<kBasicData>().size() == 3u);
83   static_assert(TokenDatabase::Create(kEmptyData).size() == 0u);
84   EXPECT_EQ(TokenDatabase::Create<kSignedWithTopBit>().size(), 0x80u);
85 }
86 
TEST(TokenDatabase,ValidCheck)87 TEST(TokenDatabase, ValidCheck) {
88   char basic_data[sizeof(kBasicData)];
89   std::memcpy(basic_data, kBasicData, sizeof(basic_data));
90   EXPECT_TRUE(TokenDatabase::IsValid(basic_data));
91 
92   static_assert(TokenDatabase::IsValid(kBasicData));
93   static_assert(TokenDatabase::IsValid(kEmptyData));
94   static_assert(TokenDatabase::IsValid(kSignedWithTopBit));
95 
96   static_assert(!TokenDatabase::IsValid(kBadMagic));
97   static_assert(!TokenDatabase::IsValid(kBadVersion));
98   static_assert(!TokenDatabase::IsValid(kBadEntryCount));
99 
100   static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0"));  // too short
101   static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0"));
102   static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0"));
103   static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0"));
104 
105   // No string table; this is one byte too short.
106   static_assert(
107       !TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv));
108 
109   // Add one byte for the string table.
110   static_assert(
111       TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv));
112 
113   static_assert(
114       !TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
115                               "WXYZdate"
116                               "WXYZdate"
117                               "\0"sv));
118   static_assert(
119       TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
120                              "WXYZdate"
121                              "WXYZdate"
122                              "hi\0\0"sv));
123   static_assert(
124       TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
125                              "WXYZdate"
126                              "WXYZdate"
127                              "hi\0hello\0"sv));
128 }
129 
TEST(TokenDatabase,Iterator)130 TEST(TokenDatabase, Iterator) {
131   auto it = kBasicDatabase.begin();
132   EXPECT_EQ(it->token, 1u);
133   EXPECT_STREQ(it->string, "hi!");
134 
135   ++it;
136   EXPECT_EQ(it->token, 2u);
137   EXPECT_STREQ(it->string, "goodbye");
138   EXPECT_EQ(it - kBasicDatabase.begin(), 1);
139 
140   ++it;
141   EXPECT_EQ((*it).token, 0xFFu);
142   EXPECT_STREQ((*it).string, ":)");
143   EXPECT_EQ(it - kBasicDatabase.begin(), 2);
144 
145   ++it;
146   EXPECT_EQ(it, kBasicDatabase.end());
147   EXPECT_EQ(static_cast<size_t>(it - kBasicDatabase.begin()),
148             kBasicDatabase.size());
149 }
150 
151 static_assert(
__anon88474df30202null152     [] {
153       auto it1 = kBasicDatabase.begin();
154       auto it2 = it1;
155       ++it2;
156       return it1->token == 1u && it2->token == 2u;
157     }(),
158     "Iterators work in constant expression");
159 
160 static_assert(
__anon88474df30302null161     [] {
162       constexpr uint32_t expected[3] = {1, 2, 0xff};
163 
164       int i = 0;
165       for (const auto& entry : kBasicDatabase) {
166         if (entry.token != expected[i++]) {
167           return false;
168         }
169       }
170       return i == 3;
171     }(),
172     "Range based for loop iteration");
173 
TEST(TokenDatabase,Iterator_PreIncrement)174 TEST(TokenDatabase, Iterator_PreIncrement) {
175   auto it = kBasicDatabase.begin();
176   EXPECT_EQ((++it)->token, 2u);
177   EXPECT_STREQ((*it).string, "goodbye");
178 }
179 
TEST(TokenDatabase,Iterator_PostIncrement)180 TEST(TokenDatabase, Iterator_PostIncrement) {
181   auto it = kBasicDatabase.begin();
182   EXPECT_EQ((it++)->token, 1u);
183 
184   EXPECT_EQ(it->token, 2u);
185   EXPECT_STREQ((*it).string, "goodbye");
186 }
187 
TEST(TokenDatabase,SingleEntryLookup_FirstEntry)188 TEST(TokenDatabase, SingleEntryLookup_FirstEntry) {
189   auto match = kBasicDatabase.Find(1);
190   ASSERT_EQ(match.size(), 1u);
191   EXPECT_FALSE(match.empty());
192   EXPECT_STREQ(match[0].string, "hi!");
193 
194   for (const auto& entry : match) {
195     EXPECT_EQ(entry.token, 1u);
196     EXPECT_STREQ(entry.string, "hi!");
197   }
198 }
199 
TEST(TokenDatabase,SingleEntryLookup_MiddleEntry)200 TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) {
201   auto match = kBasicDatabase.Find(2);
202   ASSERT_EQ(match.size(), 1u);
203   EXPECT_FALSE(match.empty());
204   EXPECT_STREQ(match[0].string, "goodbye");
205 }
206 
TEST(TokenDatabase,SingleEntryLookup_LastEntry)207 TEST(TokenDatabase, SingleEntryLookup_LastEntry) {
208   auto match = kBasicDatabase.Find(0xff);
209   ASSERT_EQ(match.size(), 1u);
210   EXPECT_STREQ(match[0].string, ":)");
211   EXPECT_FALSE(match.empty());
212 }
213 
TEST(TokenDatabase,SingleEntryLookup_NonPresent)214 TEST(TokenDatabase, SingleEntryLookup_NonPresent) {
215   EXPECT_TRUE(kBasicDatabase.Find(0).empty());
216   EXPECT_TRUE(kBasicDatabase.Find(3).empty());
217   EXPECT_TRUE(kBasicDatabase.Find(10239).empty());
218   EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty());
219 }
220 
TEST(TokenDatabase,SingleEntryLookup_NoMatches)221 TEST(TokenDatabase, SingleEntryLookup_NoMatches) {
222   // Can also create the database at runtime.
223   TokenDatabase tokens = TokenDatabase::Create(kBasicData);
224   const auto match = tokens.Find(42);
225   ASSERT_EQ(match.size(), 0u);
226   EXPECT_TRUE(match.empty());
227 
228   for (const auto& entry : match) {
229     FAIL();  // There were no matches, so this code should never execute.
230     static_cast<void>(entry);
231   }
232 }
233 
234 constexpr char kCollisionsData[] =
235     "TOKENS\0\0\x05\0\0\0\0\0\0\0"
236     "\x01\0\0\0date"
237     "\x01\0\0\0date"
238     "\x01\0\0\0date"
239     "\x02\0\0\0date"
240     "\xFF\0\0\0date"
241     "hi!\0goodbye\0:)\0\0";
242 
243 constexpr TokenDatabase kCollisions = TokenDatabase::Create<kCollisionsData>();
244 static_assert(kCollisions.size() == 5u);
245 
TEST(TokenDatabase,MultipleEntriesWithSameToken)246 TEST(TokenDatabase, MultipleEntriesWithSameToken) {
247   TokenDatabase::Entries match = kCollisions.Find(1);
248 
249   EXPECT_EQ(match.begin()->token, 1u);
250   EXPECT_EQ(match.end()->token, 2u);
251   ASSERT_EQ(match.size(), 3u);
252 
253   EXPECT_STREQ(match[0].string, "hi!");
254   EXPECT_STREQ(match[1].string, "goodbye");
255   EXPECT_STREQ(match[2].string, ":)");
256 
257   for (const auto& entry : match) {
258     EXPECT_EQ(entry.token, 1u);
259   }
260 }
261 
TEST(TokenDatabase,Empty)262 TEST(TokenDatabase, Empty) {
263   constexpr TokenDatabase empty_db = TokenDatabase::Create<kEmptyData>();
264   static_assert(empty_db.size() == 0u);
265   static_assert(empty_db.ok(), "Database has no entries, but is valid");
266   static_assert(empty_db.end() == empty_db.begin());
267 
268   EXPECT_TRUE(empty_db.Find(0).empty());
269   EXPECT_TRUE(empty_db.Find(123).empty());
270 
271   for (const auto& entry : empty_db) {
272     FAIL();  // The database is empty; this should never execute.
273     static_cast<void>(entry);
274   }
275 }
276 
TEST(TokenDatabase,DefaultConstructedDatabase)277 TEST(TokenDatabase, DefaultConstructedDatabase) {
278   constexpr TokenDatabase empty_db;  // No underlying data
279 
280   static_assert(empty_db.size() == 0u);
281   static_assert(!empty_db.ok());
282   EXPECT_TRUE(empty_db.Find(0).empty());
283 }
284 
TEST(TokenDatabase,InvalidData)285 TEST(TokenDatabase, InvalidData) {
286   constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0");
287 
288   static_assert(!bad_db.ok());
289   EXPECT_TRUE(bad_db.Find(0).empty());
290 }
291 
TEST(TokenDatabase,FromString)292 TEST(TokenDatabase, FromString) {
293   TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!"));
294 
295   EXPECT_FALSE(bad_db.ok());
296 }
297 
298 }  // namespace
299 }  // namespace pw::tokenizer
300