1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 #include "pw_tokenizer/token_database.h"
16
17 #include <cstring>
18 #include <string>
19 #include <string_view>
20
21 #include "pw_unit_test/framework.h"
22
23 namespace pw::tokenizer {
24 namespace {
25
26 using namespace std::literals::string_view_literals;
27
28 // Use alignas to ensure that the data is properly aligned for database entries.
29 // This avoids unaligned memory reads.
30 constexpr char kBasicData[] =
31 "TOKENS\0\0\x03\x00\x00\x00\0\0\0\0"
32 "\x01\0\0\0\0\0\0\0"
33 "\x02\0\0\0\0\0\0\0"
34 "\xFF\0\0\0\0\0\0\0"
35 "hi!\0"
36 "goodbye\0"
37 ":)";
38
39 constexpr char kEmptyData[] =
40 "TOKENS\0\0\x00\x00\x00\x00\0\0\0"; // Last byte is null terminator.
41
42 constexpr char kBadMagic[] =
43 "TOKENs\0\0\x03\x00\x00\x00\0\0\0\0"
44 "\x01\0\0\0\0\0\0\0"
45 "hi!\0";
46
47 constexpr char kBadVersion[] = "TOKENS\0\1\x00\0\0\0\0\0\0\0";
48
49 constexpr char kBadEntryCount[] = "TOKENS\0\0\xff\x00\x00\x00\0\0\0\0";
50
51 // Use signed data and a size with the top bit set to test that the entry count
52 // is read correctly, without per-byte sign extension.
53 constexpr signed char kSignedWithTopBit[] =
54 "TOKENS\0\0\x80\x00\x00\x00\0\0\0\0"
55 // Entries
56 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
57 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
58 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
59 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 32
60 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
61 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
62 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
63 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 64
64 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
65 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
66 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
67 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 96
68 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
69 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
70 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
71 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 128
72 // Strings (empty)
73 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 32
74 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 64
75 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 96
76 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; // 128
77
78 constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create<kBasicData>();
79 static_assert(kBasicDatabase.size() == 3u);
80
TEST(TokenDatabase,EntryCount)81 TEST(TokenDatabase, EntryCount) {
82 static_assert(TokenDatabase::Create<kBasicData>().size() == 3u);
83 static_assert(TokenDatabase::Create(kEmptyData).size() == 0u);
84 EXPECT_EQ(TokenDatabase::Create<kSignedWithTopBit>().size(), 0x80u);
85 }
86
TEST(TokenDatabase,ValidCheck)87 TEST(TokenDatabase, ValidCheck) {
88 char basic_data[sizeof(kBasicData)];
89 std::memcpy(basic_data, kBasicData, sizeof(basic_data));
90 EXPECT_TRUE(TokenDatabase::IsValid(basic_data));
91
92 static_assert(TokenDatabase::IsValid(kBasicData));
93 static_assert(TokenDatabase::IsValid(kEmptyData));
94 static_assert(TokenDatabase::IsValid(kSignedWithTopBit));
95
96 static_assert(!TokenDatabase::IsValid(kBadMagic));
97 static_assert(!TokenDatabase::IsValid(kBadVersion));
98 static_assert(!TokenDatabase::IsValid(kBadEntryCount));
99
100 static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0")); // too short
101 static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0"));
102 static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0"));
103 static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0"));
104
105 // No string table; this is one byte too short.
106 static_assert(
107 !TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv));
108
109 // Add one byte for the string table.
110 static_assert(
111 TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv));
112
113 static_assert(
114 !TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
115 "WXYZdate"
116 "WXYZdate"
117 "\0"sv));
118 static_assert(
119 TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
120 "WXYZdate"
121 "WXYZdate"
122 "hi\0\0"sv));
123 static_assert(
124 TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
125 "WXYZdate"
126 "WXYZdate"
127 "hi\0hello\0"sv));
128 }
129
TEST(TokenDatabase,Iterator)130 TEST(TokenDatabase, Iterator) {
131 auto it = kBasicDatabase.begin();
132 EXPECT_EQ(it->token, 1u);
133 EXPECT_STREQ(it->string, "hi!");
134
135 ++it;
136 EXPECT_EQ(it->token, 2u);
137 EXPECT_STREQ(it->string, "goodbye");
138 EXPECT_EQ(it - kBasicDatabase.begin(), 1);
139
140 ++it;
141 EXPECT_EQ((*it).token, 0xFFu);
142 EXPECT_STREQ((*it).string, ":)");
143 EXPECT_EQ(it - kBasicDatabase.begin(), 2);
144
145 ++it;
146 EXPECT_EQ(it, kBasicDatabase.end());
147 EXPECT_EQ(static_cast<size_t>(it - kBasicDatabase.begin()),
148 kBasicDatabase.size());
149 }
150
151 static_assert(
__anon88474df30202null152 [] {
153 auto it1 = kBasicDatabase.begin();
154 auto it2 = it1;
155 ++it2;
156 return it1->token == 1u && it2->token == 2u;
157 }(),
158 "Iterators work in constant expression");
159
160 static_assert(
__anon88474df30302null161 [] {
162 constexpr uint32_t expected[3] = {1, 2, 0xff};
163
164 int i = 0;
165 for (const auto& entry : kBasicDatabase) {
166 if (entry.token != expected[i++]) {
167 return false;
168 }
169 }
170 return i == 3;
171 }(),
172 "Range based for loop iteration");
173
TEST(TokenDatabase,Iterator_PreIncrement)174 TEST(TokenDatabase, Iterator_PreIncrement) {
175 auto it = kBasicDatabase.begin();
176 EXPECT_EQ((++it)->token, 2u);
177 EXPECT_STREQ((*it).string, "goodbye");
178 }
179
TEST(TokenDatabase,Iterator_PostIncrement)180 TEST(TokenDatabase, Iterator_PostIncrement) {
181 auto it = kBasicDatabase.begin();
182 EXPECT_EQ((it++)->token, 1u);
183
184 EXPECT_EQ(it->token, 2u);
185 EXPECT_STREQ((*it).string, "goodbye");
186 }
187
TEST(TokenDatabase,SingleEntryLookup_FirstEntry)188 TEST(TokenDatabase, SingleEntryLookup_FirstEntry) {
189 auto match = kBasicDatabase.Find(1);
190 ASSERT_EQ(match.size(), 1u);
191 EXPECT_FALSE(match.empty());
192 EXPECT_STREQ(match[0].string, "hi!");
193
194 for (const auto& entry : match) {
195 EXPECT_EQ(entry.token, 1u);
196 EXPECT_STREQ(entry.string, "hi!");
197 }
198 }
199
TEST(TokenDatabase,SingleEntryLookup_MiddleEntry)200 TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) {
201 auto match = kBasicDatabase.Find(2);
202 ASSERT_EQ(match.size(), 1u);
203 EXPECT_FALSE(match.empty());
204 EXPECT_STREQ(match[0].string, "goodbye");
205 }
206
TEST(TokenDatabase,SingleEntryLookup_LastEntry)207 TEST(TokenDatabase, SingleEntryLookup_LastEntry) {
208 auto match = kBasicDatabase.Find(0xff);
209 ASSERT_EQ(match.size(), 1u);
210 EXPECT_STREQ(match[0].string, ":)");
211 EXPECT_FALSE(match.empty());
212 }
213
TEST(TokenDatabase,SingleEntryLookup_NonPresent)214 TEST(TokenDatabase, SingleEntryLookup_NonPresent) {
215 EXPECT_TRUE(kBasicDatabase.Find(0).empty());
216 EXPECT_TRUE(kBasicDatabase.Find(3).empty());
217 EXPECT_TRUE(kBasicDatabase.Find(10239).empty());
218 EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty());
219 }
220
TEST(TokenDatabase,SingleEntryLookup_NoMatches)221 TEST(TokenDatabase, SingleEntryLookup_NoMatches) {
222 // Can also create the database at runtime.
223 TokenDatabase tokens = TokenDatabase::Create(kBasicData);
224 const auto match = tokens.Find(42);
225 ASSERT_EQ(match.size(), 0u);
226 EXPECT_TRUE(match.empty());
227
228 for (const auto& entry : match) {
229 FAIL(); // There were no matches, so this code should never execute.
230 static_cast<void>(entry);
231 }
232 }
233
234 constexpr char kCollisionsData[] =
235 "TOKENS\0\0\x05\0\0\0\0\0\0\0"
236 "\x01\0\0\0date"
237 "\x01\0\0\0date"
238 "\x01\0\0\0date"
239 "\x02\0\0\0date"
240 "\xFF\0\0\0date"
241 "hi!\0goodbye\0:)\0\0";
242
243 constexpr TokenDatabase kCollisions = TokenDatabase::Create<kCollisionsData>();
244 static_assert(kCollisions.size() == 5u);
245
TEST(TokenDatabase,MultipleEntriesWithSameToken)246 TEST(TokenDatabase, MultipleEntriesWithSameToken) {
247 TokenDatabase::Entries match = kCollisions.Find(1);
248
249 EXPECT_EQ(match.begin()->token, 1u);
250 EXPECT_EQ(match.end()->token, 2u);
251 ASSERT_EQ(match.size(), 3u);
252
253 EXPECT_STREQ(match[0].string, "hi!");
254 EXPECT_STREQ(match[1].string, "goodbye");
255 EXPECT_STREQ(match[2].string, ":)");
256
257 for (const auto& entry : match) {
258 EXPECT_EQ(entry.token, 1u);
259 }
260 }
261
TEST(TokenDatabase,Empty)262 TEST(TokenDatabase, Empty) {
263 constexpr TokenDatabase empty_db = TokenDatabase::Create<kEmptyData>();
264 static_assert(empty_db.size() == 0u);
265 static_assert(empty_db.ok(), "Database has no entries, but is valid");
266 static_assert(empty_db.end() == empty_db.begin());
267
268 EXPECT_TRUE(empty_db.Find(0).empty());
269 EXPECT_TRUE(empty_db.Find(123).empty());
270
271 for (const auto& entry : empty_db) {
272 FAIL(); // The database is empty; this should never execute.
273 static_cast<void>(entry);
274 }
275 }
276
TEST(TokenDatabase,DefaultConstructedDatabase)277 TEST(TokenDatabase, DefaultConstructedDatabase) {
278 constexpr TokenDatabase empty_db; // No underlying data
279
280 static_assert(empty_db.size() == 0u);
281 static_assert(!empty_db.ok());
282 EXPECT_TRUE(empty_db.Find(0).empty());
283 }
284
TEST(TokenDatabase,InvalidData)285 TEST(TokenDatabase, InvalidData) {
286 constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0");
287
288 static_assert(!bad_db.ok());
289 EXPECT_TRUE(bad_db.Find(0).empty());
290 }
291
TEST(TokenDatabase,FromString)292 TEST(TokenDatabase, FromString) {
293 TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!"));
294
295 EXPECT_FALSE(bad_db.ok());
296 }
297
298 } // namespace
299 } // namespace pw::tokenizer
300