1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/portable-file-backed-proto-log.h"
16
17 #include <cstdint>
18 #include <cstdlib>
19
20 #include "gmock/gmock.h"
21 #include "gtest/gtest.h"
22 #include "icing/document-builder.h"
23 #include "icing/file/filesystem.h"
24 #include "icing/file/mock-filesystem.h"
25 #include "icing/portable/equals-proto.h"
26 #include "icing/proto/document.pb.h"
27 #include "icing/testing/common-matchers.h"
28 #include "icing/testing/tmp-directory.h"
29
30 namespace icing {
31 namespace lib {
32
33 namespace {
34
35 using ::icing::lib::portable_equals_proto::EqualsProto;
36 using ::testing::A;
37 using ::testing::Eq;
38 using ::testing::Gt;
39 using ::testing::HasSubstr;
40 using ::testing::Not;
41 using ::testing::NotNull;
42 using ::testing::Pair;
43 using ::testing::Return;
44
45 using Header = PortableFileBackedProtoLog<DocumentProto>::Header;
46
ReadHeader(Filesystem filesystem,const std::string & file_path)47 Header ReadHeader(Filesystem filesystem, const std::string& file_path) {
48 Header header;
49 filesystem.PRead(file_path.c_str(), &header, sizeof(Header),
50 /*offset=*/0);
51 return header;
52 }
53
WriteHeader(Filesystem filesystem,const std::string & file_path,Header & header)54 void WriteHeader(Filesystem filesystem, const std::string& file_path,
55 Header& header) {
56 filesystem.Write(file_path.c_str(), &header, sizeof(Header));
57 }
58
59 class PortableFileBackedProtoLogTest : public ::testing::Test {
60 protected:
61 // Adds a user-defined default construct because a const member variable may
62 // make the compiler accidentally delete the default constructor.
63 // https://stackoverflow.com/a/47368753
PortableFileBackedProtoLogTest()64 PortableFileBackedProtoLogTest() {}
65
SetUp()66 void SetUp() override {
67 file_path_ = GetTestTempDir() + "/proto_log";
68 filesystem_.DeleteFile(file_path_.c_str());
69 }
70
TearDown()71 void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
72
73 const Filesystem filesystem_;
74 std::string file_path_;
75 bool compress_ = true;
76 int32_t compression_level_ =
77 PortableFileBackedProtoLog<DocumentProto>::kDefaultCompressionLevel;
78 int64_t max_proto_size_ = 256 * 1024; // 256 KiB
79 };
80
TEST_F(PortableFileBackedProtoLogTest,Initialize)81 TEST_F(PortableFileBackedProtoLogTest, Initialize) {
82 ICING_ASSERT_OK_AND_ASSIGN(
83 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
84 PortableFileBackedProtoLog<DocumentProto>::Create(
85 &filesystem_, file_path_,
86 PortableFileBackedProtoLog<DocumentProto>::Options(
87 compress_, max_proto_size_, compression_level_)));
88 EXPECT_THAT(create_result.proto_log, NotNull());
89 EXPECT_FALSE(create_result.has_data_loss());
90 EXPECT_FALSE(create_result.recalculated_checksum);
91
92 // Can't recreate the same file with different options.
93 ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
94 &filesystem_, file_path_,
95 PortableFileBackedProtoLog<DocumentProto>::Options(
96 !compress_, max_proto_size_, compression_level_)),
97 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
98 }
99
TEST_F(PortableFileBackedProtoLogTest,InitializeValidatesOptions)100 TEST_F(PortableFileBackedProtoLogTest, InitializeValidatesOptions) {
101 // max_proto_size must be greater than 0
102 int invalid_max_proto_size = 0;
103 ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
104 &filesystem_, file_path_,
105 PortableFileBackedProtoLog<DocumentProto>::Options(
106 compress_, invalid_max_proto_size, compression_level_)),
107 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
108
109 // max_proto_size must be under 16 MiB
110 invalid_max_proto_size = 16 * 1024 * 1024;
111 ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
112 &filesystem_, file_path_,
113 PortableFileBackedProtoLog<DocumentProto>::Options(
114 compress_, invalid_max_proto_size, compression_level_)),
115 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
116
117 // compression_level must be between 0 and 9 inclusive
118 int invalid_compression_level = -1;
119 ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
120 &filesystem_, file_path_,
121 PortableFileBackedProtoLog<DocumentProto>::Options(
122 compress_, max_proto_size_, invalid_compression_level)),
123 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
124
125 // compression_level must be between 0 and 9 inclusive
126 invalid_compression_level = 10;
127 ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
128 &filesystem_, file_path_,
129 PortableFileBackedProtoLog<DocumentProto>::Options(
130 compress_, max_proto_size_, invalid_compression_level)),
131 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
132 }
133
TEST_F(PortableFileBackedProtoLogTest,ReservedSpaceForHeader)134 TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
135 ICING_ASSERT_OK_AND_ASSIGN(
136 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
137 PortableFileBackedProtoLog<DocumentProto>::Create(
138 &filesystem_, file_path_,
139 PortableFileBackedProtoLog<DocumentProto>::Options(
140 compress_, max_proto_size_, compression_level_)));
141
142 // With no protos written yet, the log should be minimum the size of the
143 // reserved header space.
144 ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
145 PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes);
146 }
147
TEST_F(PortableFileBackedProtoLogTest,WriteProtoTooLarge)148 TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
149 int max_proto_size = 1;
150 ICING_ASSERT_OK_AND_ASSIGN(
151 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
152 PortableFileBackedProtoLog<DocumentProto>::Create(
153 &filesystem_, file_path_,
154 PortableFileBackedProtoLog<DocumentProto>::Options(
155 compress_, max_proto_size, compression_level_)));
156 auto proto_log = std::move(create_result.proto_log);
157 ASSERT_FALSE(create_result.has_data_loss());
158
159 DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
160
161 // Proto is too large for the max_proto_size_in
162 ASSERT_THAT(proto_log->WriteProto(document),
163 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
164 }
165
TEST_F(PortableFileBackedProtoLogTest,ReadProtoWrongKProtoMagic)166 TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
167 ICING_ASSERT_OK_AND_ASSIGN(
168 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
169 PortableFileBackedProtoLog<DocumentProto>::Create(
170 &filesystem_, file_path_,
171 PortableFileBackedProtoLog<DocumentProto>::Options(
172 compress_, max_proto_size_, compression_level_)));
173 auto proto_log = std::move(create_result.proto_log);
174 ASSERT_FALSE(create_result.has_data_loss());
175
176 // Write a proto
177 DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
178
179 ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
180 proto_log->WriteProto(document));
181
182 // The 4 bytes of metadata that just doesn't have the same kProtoMagic
183 // specified in file-backed-proto-log.h
184 uint32_t wrong_magic = 0x7E000000;
185
186 // Sanity check that we opened the file correctly
187 int fd = filesystem_.OpenForWrite(file_path_.c_str());
188 ASSERT_GT(fd, 0);
189
190 // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
191 // a proto entry.
192 filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
193
194 ASSERT_THAT(proto_log->ReadProto(file_offset),
195 StatusIs(libtextclassifier3::StatusCode::INTERNAL));
196 }
197
TEST_F(PortableFileBackedProtoLogTest,ReadWriteUncompressedProto)198 TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
199 int last_offset;
200 {
201 ICING_ASSERT_OK_AND_ASSIGN(
202 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
203 PortableFileBackedProtoLog<DocumentProto>::Create(
204 &filesystem_, file_path_,
205 PortableFileBackedProtoLog<DocumentProto>::Options(
206 /*compress_in=*/false, max_proto_size_, compression_level_)));
207 auto proto_log = std::move(create_result.proto_log);
208 ASSERT_FALSE(create_result.has_data_loss());
209
210 // Write the first proto
211 DocumentProto document1 =
212 DocumentBuilder().SetKey("namespace1", "uri1").Build();
213
214 ICING_ASSERT_OK_AND_ASSIGN(int written_position,
215 proto_log->WriteProto(document1));
216
217 int document1_offset = written_position;
218
219 // Check that what we read is what we wrote
220 ASSERT_THAT(proto_log->ReadProto(written_position),
221 IsOkAndHolds(EqualsProto(document1)));
222
223 // Write a second proto that's close to the max size. Leave some room for
224 // the rest of the proto properties.
225 std::string long_str(max_proto_size_ - 1024, 'a');
226 DocumentProto document2 = DocumentBuilder()
227 .SetKey("namespace2", "uri2")
228 .AddStringProperty("long_str", long_str)
229 .Build();
230
231 ICING_ASSERT_OK_AND_ASSIGN(written_position,
232 proto_log->WriteProto(document2));
233
234 int document2_offset = written_position;
235 last_offset = written_position;
236 ASSERT_GT(document2_offset, document1_offset);
237
238 // Check the second proto
239 ASSERT_THAT(proto_log->ReadProto(written_position),
240 IsOkAndHolds(EqualsProto(document2)));
241
242 ICING_ASSERT_OK(proto_log->PersistToDisk());
243 }
244
245 {
246 // Make a new proto_log with the same file_path, and make sure we
247 // can still write to the same underlying file.
248 ICING_ASSERT_OK_AND_ASSIGN(
249 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
250 PortableFileBackedProtoLog<DocumentProto>::Create(
251 &filesystem_, file_path_,
252 PortableFileBackedProtoLog<DocumentProto>::Options(
253 /*compress_in=*/false, max_proto_size_, compression_level_)));
254 auto recreated_proto_log = std::move(create_result.proto_log);
255 ASSERT_FALSE(create_result.has_data_loss());
256
257 // Write a third proto
258 DocumentProto document3 =
259 DocumentBuilder().SetKey("namespace3", "uri3").Build();
260
261 ASSERT_THAT(recreated_proto_log->WriteProto(document3),
262 IsOkAndHolds(Gt(last_offset)));
263 }
264 }
265
TEST_F(PortableFileBackedProtoLogTest,ReadWriteCompressedProto)266 TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
267 int last_offset;
268
269 {
270 ICING_ASSERT_OK_AND_ASSIGN(
271 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
272 PortableFileBackedProtoLog<DocumentProto>::Create(
273 &filesystem_, file_path_,
274 PortableFileBackedProtoLog<DocumentProto>::Options(
275 /*compress_in=*/true, max_proto_size_, compression_level_)));
276 auto proto_log = std::move(create_result.proto_log);
277 ASSERT_FALSE(create_result.has_data_loss());
278
279 // Write the first proto
280 DocumentProto document1 =
281 DocumentBuilder().SetKey("namespace1", "uri1").Build();
282
283 ICING_ASSERT_OK_AND_ASSIGN(int written_position,
284 proto_log->WriteProto(document1));
285
286 int document1_offset = written_position;
287
288 // Check that what we read is what we wrote
289 ASSERT_THAT(proto_log->ReadProto(written_position),
290 IsOkAndHolds(EqualsProto(document1)));
291
292 // Write a second proto that's close to the max size. Leave some room for
293 // the rest of the proto properties.
294 std::string long_str(max_proto_size_ - 1024, 'a');
295 DocumentProto document2 = DocumentBuilder()
296 .SetKey("namespace2", "uri2")
297 .AddStringProperty("long_str", long_str)
298 .Build();
299
300 ICING_ASSERT_OK_AND_ASSIGN(written_position,
301 proto_log->WriteProto(document2));
302
303 int document2_offset = written_position;
304 last_offset = written_position;
305 ASSERT_GT(document2_offset, document1_offset);
306
307 // Check the second proto
308 ASSERT_THAT(proto_log->ReadProto(written_position),
309 IsOkAndHolds(EqualsProto(document2)));
310
311 ICING_ASSERT_OK(proto_log->PersistToDisk());
312 }
313
314 {
315 // Make a new proto_log with the same file_path, and make sure we
316 // can still write to the same underlying file.
317 ICING_ASSERT_OK_AND_ASSIGN(
318 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
319 PortableFileBackedProtoLog<DocumentProto>::Create(
320 &filesystem_, file_path_,
321 PortableFileBackedProtoLog<DocumentProto>::Options(
322 /*compress_in=*/true, max_proto_size_, compression_level_)));
323 auto recreated_proto_log = std::move(create_result.proto_log);
324 ASSERT_FALSE(create_result.has_data_loss());
325
326 // Write a third proto
327 DocumentProto document3 =
328 DocumentBuilder().SetKey("namespace3", "uri3").Build();
329
330 ASSERT_THAT(recreated_proto_log->WriteProto(document3),
331 IsOkAndHolds(Gt(last_offset)));
332 }
333 }
334
TEST_F(PortableFileBackedProtoLogTest,ReadWriteDifferentCompressionLevel)335 TEST_F(PortableFileBackedProtoLogTest, ReadWriteDifferentCompressionLevel) {
336 int document1_offset;
337 int document2_offset;
338 int document3_offset;
339
340 // The first proto to write that's close to the max size. Leave some room for
341 // the rest of the proto properties.
342 std::string long_str(max_proto_size_ - 1024, 'a');
343 DocumentProto document1 = DocumentBuilder()
344 .SetKey("namespace1", "uri1")
345 .AddStringProperty("long_str", long_str)
346 .Build();
347 DocumentProto document2 =
348 DocumentBuilder().SetKey("namespace2", "uri2").Build();
349 DocumentProto document3 =
350 DocumentBuilder().SetKey("namespace3", "uri3").Build();
351
352 {
353 ICING_ASSERT_OK_AND_ASSIGN(
354 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
355 PortableFileBackedProtoLog<DocumentProto>::Create(
356 &filesystem_, file_path_,
357 PortableFileBackedProtoLog<DocumentProto>::Options(
358 /*compress_in=*/true, max_proto_size_,
359 /*compression_level_in=*/3)));
360 auto proto_log = std::move(create_result.proto_log);
361 ASSERT_FALSE(create_result.has_data_loss());
362
363 // Write the first proto
364 ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
365 proto_log->WriteProto(document1));
366
367 // Check that what we read is what we wrote
368 ASSERT_THAT(proto_log->ReadProto(document1_offset),
369 IsOkAndHolds(EqualsProto(document1)));
370
371 ICING_ASSERT_OK(proto_log->PersistToDisk());
372 }
373
374 // Make a new proto_log with the same file_path but different compression
375 // level, and make sure we can still read from and write to the same
376 // underlying file.
377 {
378 ICING_ASSERT_OK_AND_ASSIGN(
379 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
380 PortableFileBackedProtoLog<DocumentProto>::Create(
381 &filesystem_, file_path_,
382 PortableFileBackedProtoLog<DocumentProto>::Options(
383 /*compress_in=*/true, max_proto_size_,
384 /*compression_level_in=*/9)));
385 auto recreated_proto_log = std::move(create_result.proto_log);
386 ASSERT_FALSE(create_result.has_data_loss());
387
388 // Check the first proto
389 ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
390 IsOkAndHolds(EqualsProto(document1)));
391
392 // Write a second proto
393 ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
394 recreated_proto_log->WriteProto(document2));
395
396 ASSERT_GT(document2_offset, document1_offset);
397
398 // Check the second proto
399 ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
400 IsOkAndHolds(EqualsProto(document2)));
401
402 ICING_ASSERT_OK(recreated_proto_log->PersistToDisk());
403 }
404
405 // One more time but with 0 compression level
406 {
407 ICING_ASSERT_OK_AND_ASSIGN(
408 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
409 PortableFileBackedProtoLog<DocumentProto>::Create(
410 &filesystem_, file_path_,
411 PortableFileBackedProtoLog<DocumentProto>::Options(
412 /*compress_in=*/true, max_proto_size_,
413 /*compression_level=*/0)));
414 auto recreated_proto_log = std::move(create_result.proto_log);
415 ASSERT_FALSE(create_result.has_data_loss());
416
417 // Check the first proto
418 ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
419 IsOkAndHolds(EqualsProto(document1)));
420
421 // Check the second proto
422 ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
423 IsOkAndHolds(EqualsProto(document2)));
424
425 // Write a third proto
426 ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
427 recreated_proto_log->WriteProto(document3));
428
429 ASSERT_GT(document3_offset, document2_offset);
430
431 // Check the third proto
432 ASSERT_THAT(recreated_proto_log->ReadProto(document3_offset),
433 IsOkAndHolds(EqualsProto(document3)));
434 }
435 }
436
TEST_F(PortableFileBackedProtoLogTest,WriteDifferentCompressionLevelDifferentSizes)437 TEST_F(PortableFileBackedProtoLogTest,
438 WriteDifferentCompressionLevelDifferentSizes) {
439 int document_log_size_with_compression_3;
440 int document_log_size_with_no_compression;
441
442 // The first proto to write that's close to the max size. Leave some room for
443 // the rest of the proto properties.
444 std::string long_str(max_proto_size_ - 1024, 'a');
445 DocumentProto document1 = DocumentBuilder()
446 .SetKey("namespace1", "uri1")
447 .AddStringProperty("long_str", long_str)
448 .Build();
449
450 {
451 ICING_ASSERT_OK_AND_ASSIGN(
452 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
453 PortableFileBackedProtoLog<DocumentProto>::Create(
454 &filesystem_, file_path_,
455 PortableFileBackedProtoLog<DocumentProto>::Options(
456 /*compress_in=*/true, max_proto_size_,
457 /*compression_level_in=*/3)));
458 auto proto_log = std::move(create_result.proto_log);
459 ASSERT_FALSE(create_result.has_data_loss());
460
461 // Write the proto
462 ICING_ASSERT_OK(proto_log->WriteProto(document1));
463 ICING_ASSERT_OK(proto_log->PersistToDisk());
464
465 document_log_size_with_compression_3 =
466 filesystem_.GetFileSize(file_path_.c_str());
467 }
468
469 // Delete the proto_log so we can reuse the file_path
470 filesystem_.DeleteFile(file_path_.c_str());
471
472 {
473 ICING_ASSERT_OK_AND_ASSIGN(
474 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
475 PortableFileBackedProtoLog<DocumentProto>::Create(
476 &filesystem_, file_path_,
477 PortableFileBackedProtoLog<DocumentProto>::Options(
478 /*compress_in=*/true, max_proto_size_,
479 /*compression_level_in=*/0)));
480 auto proto_log = std::move(create_result.proto_log);
481 ASSERT_FALSE(create_result.has_data_loss());
482
483 // Write the proto
484 ICING_ASSERT_OK(proto_log->WriteProto(document1));
485 ICING_ASSERT_OK(proto_log->PersistToDisk());
486
487 document_log_size_with_no_compression =
488 filesystem_.GetFileSize(file_path_.c_str());
489
490 // Uncompressed document file size should be larger than original compressed
491 // document file size
492 ASSERT_GT(document_log_size_with_no_compression,
493 document_log_size_with_compression_3);
494 }
495 }
496
TEST_F(PortableFileBackedProtoLogTest,CorruptHeader)497 TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
498 {
499 ICING_ASSERT_OK_AND_ASSIGN(
500 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
501 PortableFileBackedProtoLog<DocumentProto>::Create(
502 &filesystem_, file_path_,
503 PortableFileBackedProtoLog<DocumentProto>::Options(
504 compress_, max_proto_size_, compression_level_)));
505 auto recreated_proto_log = std::move(create_result.proto_log);
506 EXPECT_FALSE(create_result.has_data_loss());
507 }
508
509 int corrupt_checksum = 24;
510
511 // Write the corrupted header
512 Header header = ReadHeader(filesystem_, file_path_);
513 header.SetHeaderChecksum(corrupt_checksum);
514 WriteHeader(filesystem_, file_path_, header);
515
516 {
517 // Reinitialize the same proto_log
518 ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
519 &filesystem_, file_path_,
520 PortableFileBackedProtoLog<DocumentProto>::Options(
521 compress_, max_proto_size_, compression_level_)),
522 StatusIs(libtextclassifier3::StatusCode::INTERNAL,
523 HasSubstr("Invalid header checksum")));
524 }
525 }
526
TEST_F(PortableFileBackedProtoLogTest,DifferentMagic)527 TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
528 {
529 ICING_ASSERT_OK_AND_ASSIGN(
530 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
531 PortableFileBackedProtoLog<DocumentProto>::Create(
532 &filesystem_, file_path_,
533 PortableFileBackedProtoLog<DocumentProto>::Options(
534 compress_, max_proto_size_, compression_level_)));
535 auto recreated_proto_log = std::move(create_result.proto_log);
536 EXPECT_FALSE(create_result.has_data_loss());
537
538 // Corrupt the magic that's stored at the beginning of the header.
539 int invalid_magic = -1;
540 ASSERT_THAT(invalid_magic, Not(Eq(Header::kMagic)));
541
542 // Write the corrupted header
543 Header header = ReadHeader(filesystem_, file_path_);
544 header.SetMagic(invalid_magic);
545 WriteHeader(filesystem_, file_path_, header);
546 }
547
548 {
549 // Reinitialize the same proto_log
550 ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
551 &filesystem_, file_path_,
552 PortableFileBackedProtoLog<DocumentProto>::Options(
553 compress_, max_proto_size_, compression_level_)),
554 StatusIs(libtextclassifier3::StatusCode::INTERNAL,
555 HasSubstr("Invalid header kMagic")));
556 }
557 }
558
TEST_F(PortableFileBackedProtoLogTest,UnableToDetectCorruptContentWithoutDirtyBit)559 TEST_F(PortableFileBackedProtoLogTest,
560 UnableToDetectCorruptContentWithoutDirtyBit) {
561 // This is intentional that we can't detect corruption. We're trading off
562 // earlier corruption detection for lower initialization latency. By not
563 // calculating the checksum on initialization, we can initialize much faster,
564 // but at the cost of detecting corruption. Note that even if we did detect
565 // corruption, there was nothing we could've done except throw an error to
566 // clients. We'll still do that, but at some later point when the log is
567 // attempting to be accessed and we can't actually deserialize a proto from
568 // it. See the description in cl/374278280 for more details.
569
570 {
571 ICING_ASSERT_OK_AND_ASSIGN(
572 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
573 PortableFileBackedProtoLog<DocumentProto>::Create(
574 &filesystem_, file_path_,
575 PortableFileBackedProtoLog<DocumentProto>::Options(
576 compress_, max_proto_size_, compression_level_)));
577 auto proto_log = std::move(create_result.proto_log);
578 EXPECT_FALSE(create_result.has_data_loss());
579
580 DocumentProto document =
581 DocumentBuilder().SetKey("namespace1", "uri1").Build();
582
583 // Write and persist an document.
584 ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
585 proto_log->WriteProto(document));
586 ICING_ASSERT_OK(proto_log->PersistToDisk());
587
588 // "Corrupt" the content written in the log.
589 document.set_uri("invalid");
590 std::string serialized_document = document.SerializeAsString();
591 ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), document_offset,
592 serialized_document.data(),
593 serialized_document.size()));
594 }
595
596 {
597 // We can recover, and we don't have data loss.
598 ICING_ASSERT_OK_AND_ASSIGN(
599 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
600 PortableFileBackedProtoLog<DocumentProto>::Create(
601 &filesystem_, file_path_,
602 PortableFileBackedProtoLog<DocumentProto>::Options(
603 compress_, max_proto_size_, compression_level_)));
604 auto proto_log = std::move(create_result.proto_log);
605 EXPECT_FALSE(create_result.has_data_loss());
606 EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
607 EXPECT_FALSE(create_result.recalculated_checksum);
608
609 // We still have the corrupted content in our file, we didn't throw
610 // everything out.
611 EXPECT_THAT(
612 filesystem_.GetFileSize(file_path_.c_str()),
613 Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
614 }
615 }
616
TEST_F(PortableFileBackedProtoLogTest,DetectAndThrowOutCorruptContentWithDirtyBit)617 TEST_F(PortableFileBackedProtoLogTest,
618 DetectAndThrowOutCorruptContentWithDirtyBit) {
619 {
620 ICING_ASSERT_OK_AND_ASSIGN(
621 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
622 PortableFileBackedProtoLog<DocumentProto>::Create(
623 &filesystem_, file_path_,
624 PortableFileBackedProtoLog<DocumentProto>::Options(
625 compress_, max_proto_size_, compression_level_)));
626 auto proto_log = std::move(create_result.proto_log);
627 ASSERT_FALSE(create_result.has_data_loss());
628
629 DocumentProto document =
630 DocumentBuilder()
631 .SetKey("namespace1", "uri1")
632 .AddStringProperty("string_property", "foo", "bar")
633 .Build();
634
635 // Write and persist the protos
636 ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
637 proto_log->WriteProto(document));
638
639 // Check that what we read is what we wrote
640 ASSERT_THAT(proto_log->ReadProto(document_offset),
641 IsOkAndHolds(EqualsProto(document)));
642 }
643
644 {
645 // "Corrupt" the content written in the log. Make the corrupt document
646 // smaller than our original one so we don't accidentally write past our
647 // file.
648 DocumentProto document =
649 DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
650 std::string serialized_document = document.SerializeAsString();
651 ASSERT_TRUE(filesystem_.PWrite(
652 file_path_.c_str(),
653 PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes,
654 serialized_document.data(), serialized_document.size()));
655
656 Header header = ReadHeader(filesystem_, file_path_);
657
658 // Set dirty bit to true to reflect that something changed in the log.
659 header.SetDirtyFlag(true);
660 header.SetHeaderChecksum(header.CalculateHeaderChecksum());
661
662 WriteHeader(filesystem_, file_path_, header);
663 }
664
665 {
666 ICING_ASSERT_OK_AND_ASSIGN(
667 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
668 PortableFileBackedProtoLog<DocumentProto>::Create(
669 &filesystem_, file_path_,
670 PortableFileBackedProtoLog<DocumentProto>::Options(
671 compress_, max_proto_size_, compression_level_)));
672 auto proto_log = std::move(create_result.proto_log);
673 EXPECT_TRUE(create_result.has_data_loss());
674 EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
675
676 // We had to recalculate the checksum to detect the corruption.
677 EXPECT_TRUE(create_result.recalculated_checksum);
678
679 // We lost everything, file size is back down to the header.
680 EXPECT_THAT(
681 filesystem_.GetFileSize(file_path_.c_str()),
682 Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
683
684 // At least the log is no longer dirty.
685 Header header = ReadHeader(filesystem_, file_path_);
686 EXPECT_FALSE(header.GetDirtyFlag());
687 }
688 }
689
TEST_F(PortableFileBackedProtoLogTest,DirtyBitFalseAlarmKeepsData)690 TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
691 DocumentProto document =
692 DocumentBuilder().SetKey("namespace1", "uri1").Build();
693 int64_t document_offset;
694 {
695 ICING_ASSERT_OK_AND_ASSIGN(
696 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
697 PortableFileBackedProtoLog<DocumentProto>::Create(
698 &filesystem_, file_path_,
699 PortableFileBackedProtoLog<DocumentProto>::Options(
700 compress_, max_proto_size_, compression_level_)));
701 auto proto_log = std::move(create_result.proto_log);
702 ASSERT_FALSE(create_result.has_data_loss());
703
704 // Write and persist the first proto
705 ICING_ASSERT_OK_AND_ASSIGN(document_offset,
706 proto_log->WriteProto(document));
707
708 // Check that what we read is what we wrote
709 ASSERT_THAT(proto_log->ReadProto(document_offset),
710 IsOkAndHolds(EqualsProto(document)));
711 }
712
713 {
714 Header header = ReadHeader(filesystem_, file_path_);
715
716 // Simulate the dirty flag set as true, but no data has been changed yet.
717 // Maybe we crashed between writing the dirty flag and erasing a proto.
718 header.SetDirtyFlag(true);
719 header.SetHeaderChecksum(header.CalculateHeaderChecksum());
720
721 WriteHeader(filesystem_, file_path_, header);
722 }
723
724 {
725 ICING_ASSERT_OK_AND_ASSIGN(
726 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
727 PortableFileBackedProtoLog<DocumentProto>::Create(
728 &filesystem_, file_path_,
729 PortableFileBackedProtoLog<DocumentProto>::Options(
730 compress_, max_proto_size_, compression_level_)));
731 auto proto_log = std::move(create_result.proto_log);
732 EXPECT_FALSE(create_result.has_data_loss());
733
734 // Even though nothing changed, the false alarm dirty bit should have
735 // triggered us to recalculate our checksum.
736 EXPECT_TRUE(create_result.recalculated_checksum);
737
738 // Check that our document still exists even though dirty bit was true.
739 EXPECT_THAT(proto_log->ReadProto(document_offset),
740 IsOkAndHolds(EqualsProto(document)));
741
742 Header header = ReadHeader(filesystem_, file_path_);
743 EXPECT_FALSE(header.GetDirtyFlag());
744 }
745 }
746
TEST_F(PortableFileBackedProtoLogTest,PersistToDiskKeepsPersistedDataAndTruncatesExtraData)747 TEST_F(PortableFileBackedProtoLogTest,
748 PersistToDiskKeepsPersistedDataAndTruncatesExtraData) {
749 DocumentProto document1 =
750 DocumentBuilder().SetKey("namespace1", "uri1").Build();
751 DocumentProto document2 =
752 DocumentBuilder().SetKey("namespace2", "uri2").Build();
753 int document1_offset, document2_offset;
754 int log_size;
755
756 {
757 ICING_ASSERT_OK_AND_ASSIGN(
758 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
759 PortableFileBackedProtoLog<DocumentProto>::Create(
760 &filesystem_, file_path_,
761 PortableFileBackedProtoLog<DocumentProto>::Options(
762 compress_, max_proto_size_, compression_level_)));
763 auto proto_log = std::move(create_result.proto_log);
764 ASSERT_FALSE(create_result.has_data_loss());
765
766 // Write and persist the first proto
767 ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
768 proto_log->WriteProto(document1));
769 ICING_ASSERT_OK(proto_log->PersistToDisk());
770
771 // Write, but don't explicitly persist the second proto
772 ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
773 proto_log->WriteProto(document2));
774
775 // Check that what we read is what we wrote
776 ASSERT_THAT(proto_log->ReadProto(document1_offset),
777 IsOkAndHolds(EqualsProto(document1)));
778 ASSERT_THAT(proto_log->ReadProto(document2_offset),
779 IsOkAndHolds(EqualsProto(document2)));
780
781 log_size = filesystem_.GetFileSize(file_path_.c_str());
782 ASSERT_GT(log_size, 0);
783
784 // PersistToDisk happens implicitly during the destructor.
785 }
786
787 {
788 // The header rewind position and checksum aren't updated in this "system
789 // crash" scenario.
790
791 std::string bad_proto =
792 "some incomplete proto that we didn't finish writing before the "
793 "system crashed";
794 filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
795 bad_proto.size());
796
797 // Double check that we actually wrote something to the underlying file
798 ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
799 }
800
801 {
802 // We can recover, but we have data loss
803 ICING_ASSERT_OK_AND_ASSIGN(
804 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
805 PortableFileBackedProtoLog<DocumentProto>::Create(
806 &filesystem_, file_path_,
807 PortableFileBackedProtoLog<DocumentProto>::Options(
808 compress_, max_proto_size_, compression_level_)));
809 auto proto_log = std::move(create_result.proto_log);
810 ASSERT_TRUE(create_result.has_data_loss());
811 ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
812 ASSERT_FALSE(create_result.recalculated_checksum);
813
814 // Check that everything was persisted across instances
815 ASSERT_THAT(proto_log->ReadProto(document1_offset),
816 IsOkAndHolds(EqualsProto(document1)));
817 ASSERT_THAT(proto_log->ReadProto(document2_offset),
818 IsOkAndHolds(EqualsProto(document2)));
819
820 // We correctly rewound to the last good state.
821 ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
822 }
823 }
824
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterPutAndPersistToDisk)825 TEST_F(PortableFileBackedProtoLogTest,
826 DirtyBitIsFalseAfterPutAndPersistToDisk) {
827 {
828 ICING_ASSERT_OK_AND_ASSIGN(
829 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
830 PortableFileBackedProtoLog<DocumentProto>::Create(
831 &filesystem_, file_path_,
832 PortableFileBackedProtoLog<DocumentProto>::Options(
833 compress_, max_proto_size_, compression_level_)));
834 auto proto_log = std::move(create_result.proto_log);
835 ASSERT_FALSE(create_result.has_data_loss());
836
837 DocumentProto document =
838 DocumentBuilder().SetKey("namespace1", "uri1").Build();
839
840 // Write and persist the first proto
841 ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
842 proto_log->WriteProto(document));
843 ICING_ASSERT_OK(proto_log->PersistToDisk());
844
845 // Check that what we read is what we wrote
846 ASSERT_THAT(proto_log->ReadProto(document_offset),
847 IsOkAndHolds(EqualsProto(document)));
848 }
849
850 {
851 ICING_ASSERT_OK_AND_ASSIGN(
852 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
853 PortableFileBackedProtoLog<DocumentProto>::Create(
854 &filesystem_, file_path_,
855 PortableFileBackedProtoLog<DocumentProto>::Options(
856 compress_, max_proto_size_, compression_level_)));
857
858 // We previously persisted to disk so everything should be in a perfect
859 // state.
860 EXPECT_FALSE(create_result.has_data_loss());
861 EXPECT_FALSE(create_result.recalculated_checksum);
862
863 Header header = ReadHeader(filesystem_, file_path_);
864 EXPECT_FALSE(header.GetDirtyFlag());
865 }
866 }
867
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterDeleteAndPersistToDisk)868 TEST_F(PortableFileBackedProtoLogTest,
869 DirtyBitIsFalseAfterDeleteAndPersistToDisk) {
870 {
871 ICING_ASSERT_OK_AND_ASSIGN(
872 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
873 PortableFileBackedProtoLog<DocumentProto>::Create(
874 &filesystem_, file_path_,
875 PortableFileBackedProtoLog<DocumentProto>::Options(
876 compress_, max_proto_size_, compression_level_)));
877 auto proto_log = std::move(create_result.proto_log);
878 ASSERT_FALSE(create_result.has_data_loss());
879
880 DocumentProto document =
881 DocumentBuilder().SetKey("namespace1", "uri1").Build();
882
883 // Write, delete, and persist the first proto
884 ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
885 proto_log->WriteProto(document));
886 ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
887 ICING_ASSERT_OK(proto_log->PersistToDisk());
888
889 // The proto has been erased.
890 ASSERT_THAT(proto_log->ReadProto(document_offset),
891 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
892 }
893
894 {
895 ICING_ASSERT_OK_AND_ASSIGN(
896 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
897 PortableFileBackedProtoLog<DocumentProto>::Create(
898 &filesystem_, file_path_,
899 PortableFileBackedProtoLog<DocumentProto>::Options(
900 compress_, max_proto_size_, compression_level_)));
901
902 // We previously persisted to disk so everything should be in a perfect
903 // state.
904 EXPECT_FALSE(create_result.has_data_loss());
905 EXPECT_FALSE(create_result.recalculated_checksum);
906
907 Header header = ReadHeader(filesystem_, file_path_);
908 EXPECT_FALSE(header.GetDirtyFlag());
909 }
910 }
911
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterPutAndDestructor)912 TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
913 {
914 ICING_ASSERT_OK_AND_ASSIGN(
915 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
916 PortableFileBackedProtoLog<DocumentProto>::Create(
917 &filesystem_, file_path_,
918 PortableFileBackedProtoLog<DocumentProto>::Options(
919 compress_, max_proto_size_, compression_level_)));
920 auto proto_log = std::move(create_result.proto_log);
921 ASSERT_FALSE(create_result.has_data_loss());
922
923 DocumentProto document =
924 DocumentBuilder().SetKey("namespace1", "uri1").Build();
925
926 // Write and persist the first proto
927 ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
928 proto_log->WriteProto(document));
929
930 // Check that what we read is what we wrote
931 ASSERT_THAT(proto_log->ReadProto(document_offset),
932 IsOkAndHolds(EqualsProto(document)));
933
934 // PersistToDisk is implicitly called as part of the destructor and
935 // PersistToDisk will clear the dirty bit.
936 }
937
938 {
939 ICING_ASSERT_OK_AND_ASSIGN(
940 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
941 PortableFileBackedProtoLog<DocumentProto>::Create(
942 &filesystem_, file_path_,
943 PortableFileBackedProtoLog<DocumentProto>::Options(
944 compress_, max_proto_size_, compression_level_)));
945
946 // We previously persisted to disk so everything should be in a perfect
947 // state.
948 EXPECT_FALSE(create_result.has_data_loss());
949 EXPECT_FALSE(create_result.recalculated_checksum);
950
951 Header header = ReadHeader(filesystem_, file_path_);
952 EXPECT_FALSE(header.GetDirtyFlag());
953 }
954 }
955
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterDeleteAndDestructor)956 TEST_F(PortableFileBackedProtoLogTest,
957 DirtyBitIsFalseAfterDeleteAndDestructor) {
958 {
959 ICING_ASSERT_OK_AND_ASSIGN(
960 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
961 PortableFileBackedProtoLog<DocumentProto>::Create(
962 &filesystem_, file_path_,
963 PortableFileBackedProtoLog<DocumentProto>::Options(
964 compress_, max_proto_size_, compression_level_)));
965 auto proto_log = std::move(create_result.proto_log);
966 ASSERT_FALSE(create_result.has_data_loss());
967
968 DocumentProto document =
969 DocumentBuilder().SetKey("namespace1", "uri1").Build();
970
971 // Write, delete, and persist the first proto
972 ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
973 proto_log->WriteProto(document));
974 ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
975
976 // The proto has been erased.
977 ASSERT_THAT(proto_log->ReadProto(document_offset),
978 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
979
980 // PersistToDisk is implicitly called as part of the destructor and
981 // PersistToDisk will clear the dirty bit.
982 }
983
984 {
985 ICING_ASSERT_OK_AND_ASSIGN(
986 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
987 PortableFileBackedProtoLog<DocumentProto>::Create(
988 &filesystem_, file_path_,
989 PortableFileBackedProtoLog<DocumentProto>::Options(
990 compress_, max_proto_size_, compression_level_)));
991
992 // We previously persisted to disk so everything should be in a perfect
993 // state.
994 EXPECT_FALSE(create_result.has_data_loss());
995 EXPECT_FALSE(create_result.recalculated_checksum);
996
997 Header header = ReadHeader(filesystem_, file_path_);
998 EXPECT_FALSE(header.GetDirtyFlag());
999 }
1000 }
1001
TEST_F(PortableFileBackedProtoLogTest,Iterator)1002 TEST_F(PortableFileBackedProtoLogTest, Iterator) {
1003 DocumentProto document1 =
1004 DocumentBuilder().SetKey("namespace", "uri1").Build();
1005 DocumentProto document2 =
1006 DocumentBuilder().SetKey("namespace", "uri2").Build();
1007
1008 ICING_ASSERT_OK_AND_ASSIGN(
1009 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1010 PortableFileBackedProtoLog<DocumentProto>::Create(
1011 &filesystem_, file_path_,
1012 PortableFileBackedProtoLog<DocumentProto>::Options(
1013 compress_, max_proto_size_, compression_level_)));
1014 auto proto_log = std::move(create_result.proto_log);
1015 ASSERT_FALSE(create_result.has_data_loss());
1016
1017 {
1018 // Empty iterator
1019 auto iterator = proto_log->GetIterator();
1020 ASSERT_THAT(iterator.Advance(),
1021 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1022 }
1023
1024 {
1025 // Iterates through some documents
1026 ICING_ASSERT_OK(proto_log->WriteProto(document1));
1027 ICING_ASSERT_OK(proto_log->WriteProto(document2));
1028 auto iterator = proto_log->GetIterator();
1029 // 1st proto
1030 ICING_ASSERT_OK(iterator.Advance());
1031 ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
1032 IsOkAndHolds(EqualsProto(document1)));
1033 // 2nd proto
1034 ICING_ASSERT_OK(iterator.Advance());
1035 ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
1036 IsOkAndHolds(EqualsProto(document2)));
1037 // Tries to advance
1038 ASSERT_THAT(iterator.Advance(),
1039 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1040 }
1041 }
1042
TEST_F(PortableFileBackedProtoLogTest,UpdateChecksum)1043 TEST_F(PortableFileBackedProtoLogTest, UpdateChecksum) {
1044 DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
1045 Crc32 checksum;
1046
1047 {
1048 ICING_ASSERT_OK_AND_ASSIGN(
1049 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1050 PortableFileBackedProtoLog<DocumentProto>::Create(
1051 &filesystem_, file_path_,
1052 PortableFileBackedProtoLog<DocumentProto>::Options(
1053 compress_, max_proto_size_, compression_level_)));
1054 auto proto_log = std::move(create_result.proto_log);
1055 ASSERT_FALSE(create_result.has_data_loss());
1056
1057 ICING_EXPECT_OK(proto_log->WriteProto(document));
1058
1059 ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->GetChecksum());
1060 EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Eq(checksum)));
1061 EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Eq(checksum)));
1062
1063 // Calling it twice with no changes should get us the same checksum
1064 EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Eq(checksum)));
1065 }
1066
1067 {
1068 ICING_ASSERT_OK_AND_ASSIGN(
1069 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1070 PortableFileBackedProtoLog<DocumentProto>::Create(
1071 &filesystem_, file_path_,
1072 PortableFileBackedProtoLog<DocumentProto>::Options(
1073 compress_, max_proto_size_, compression_level_)));
1074 auto proto_log = std::move(create_result.proto_log);
1075 ASSERT_FALSE(create_result.has_data_loss());
1076
1077 // Checksum should be consistent across instances
1078 ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->GetChecksum());
1079 EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Eq(checksum)));
1080 EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Eq(checksum)));
1081
1082 // PersistToDisk shouldn't affect the checksum value
1083 ICING_EXPECT_OK(proto_log->PersistToDisk());
1084 EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Eq(checksum)));
1085
1086 // Check that modifying the log leads to a different checksum
1087 ICING_EXPECT_OK(proto_log->WriteProto(document));
1088 EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Not(Eq(checksum))));
1089 EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Not(Eq(checksum))));
1090 EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Not(Eq(checksum))));
1091 }
1092 }
1093
TEST_F(PortableFileBackedProtoLogTest,EraseProtoShouldSetZero)1094 TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
1095 DocumentProto document1 =
1096 DocumentBuilder().SetKey("namespace", "uri1").Build();
1097
1098 ICING_ASSERT_OK_AND_ASSIGN(
1099 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1100 PortableFileBackedProtoLog<DocumentProto>::Create(
1101 &filesystem_, file_path_,
1102 PortableFileBackedProtoLog<DocumentProto>::Options(
1103 compress_, max_proto_size_, compression_level_)));
1104 auto proto_log = std::move(create_result.proto_log);
1105 ASSERT_FALSE(create_result.has_data_loss());
1106
1107 // Writes and erases proto
1108 ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
1109 proto_log->WriteProto(document1));
1110 ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
1111
1112 // Checks if the erased area is set to 0.
1113 int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
1114 ICING_ASSERT_OK_AND_ASSIGN(
1115 MemoryMappedFile mmapped_file,
1116 MemoryMappedFile::Create(filesystem_, file_path_,
1117 MemoryMappedFile::Strategy::READ_ONLY));
1118
1119 // document1_offset + sizeof(int) is the start byte of the proto where
1120 // sizeof(int) is the size of the proto metadata.
1121 ICING_ASSERT_OK(
1122 mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1));
1123 for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
1124 ASSERT_THAT(mmapped_file.region()[i], Eq(0));
1125 }
1126 }
1127
TEST_F(PortableFileBackedProtoLogTest,EraseProtoShouldReturnNotFound)1128 TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
1129 DocumentProto document1 =
1130 DocumentBuilder().SetKey("namespace", "uri1").Build();
1131 DocumentProto document2 =
1132 DocumentBuilder().SetKey("namespace", "uri2").Build();
1133
1134 ICING_ASSERT_OK_AND_ASSIGN(
1135 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1136 PortableFileBackedProtoLog<DocumentProto>::Create(
1137 &filesystem_, file_path_,
1138 PortableFileBackedProtoLog<DocumentProto>::Options(
1139 compress_, max_proto_size_, compression_level_)));
1140 auto proto_log = std::move(create_result.proto_log);
1141 ASSERT_FALSE(create_result.has_data_loss());
1142
1143 // Writes 2 protos
1144 ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
1145 proto_log->WriteProto(document1));
1146 ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset,
1147 proto_log->WriteProto(document2));
1148
1149 // Erases the first proto
1150 ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
1151
1152 // The first proto has been erased.
1153 ASSERT_THAT(proto_log->ReadProto(document1_offset),
1154 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
1155 // The second proto should be returned.
1156 ASSERT_THAT(proto_log->ReadProto(document2_offset),
1157 IsOkAndHolds(EqualsProto(document2)));
1158 }
1159
TEST_F(PortableFileBackedProtoLogTest,ChecksumShouldBeCorrectWithErasedProto)1160 TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
1161 DocumentProto document1 =
1162 DocumentBuilder().SetKey("namespace", "uri1").Build();
1163 DocumentProto document2 =
1164 DocumentBuilder().SetKey("namespace", "uri2").Build();
1165 DocumentProto document3 =
1166 DocumentBuilder().SetKey("namespace", "uri3").Build();
1167 DocumentProto document4 =
1168 DocumentBuilder().SetKey("namespace", "uri4").Build();
1169
1170 int64_t document2_offset;
1171 int64_t document3_offset;
1172
1173 {
1174 // Erase data after the rewind position. This won't update the checksum
1175 // immediately.
1176 ICING_ASSERT_OK_AND_ASSIGN(
1177 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1178 PortableFileBackedProtoLog<DocumentProto>::Create(
1179 &filesystem_, file_path_,
1180 PortableFileBackedProtoLog<DocumentProto>::Options(
1181 compress_, max_proto_size_, compression_level_)));
1182 auto proto_log = std::move(create_result.proto_log);
1183 ASSERT_FALSE(create_result.has_data_loss());
1184
1185 // Writes 3 protos
1186 ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
1187 proto_log->WriteProto(document1));
1188 ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
1189 proto_log->WriteProto(document2));
1190 ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
1191 proto_log->WriteProto(document3));
1192
1193 // Erases the 1st proto, checksum won't be updated immediately because the
1194 // rewind position is 0.
1195 ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
1196
1197 EXPECT_THAT(proto_log->UpdateChecksum(),
1198 IsOkAndHolds(Eq(Crc32(2175574628))));
1199 } // New checksum is updated in destructor.
1200
1201 {
1202 // Erase data before the rewind position. This will update the checksum
1203 // immediately.
1204 ICING_ASSERT_OK_AND_ASSIGN(
1205 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1206 PortableFileBackedProtoLog<DocumentProto>::Create(
1207 &filesystem_, file_path_,
1208 PortableFileBackedProtoLog<DocumentProto>::Options(
1209 compress_, max_proto_size_, compression_level_)));
1210 auto proto_log = std::move(create_result.proto_log);
1211 ASSERT_FALSE(create_result.has_data_loss());
1212
1213 // Erases the 2nd proto that is now before the rewind position. Checksum
1214 // is updated.
1215 ICING_ASSERT_OK(proto_log->EraseProto(document2_offset));
1216
1217 EXPECT_THAT(proto_log->UpdateChecksum(),
1218 IsOkAndHolds(Eq(Crc32(790877774))));
1219 }
1220
1221 {
1222 // Append data and erase data before the rewind position. This will update
1223 // the checksum twice: in EraseProto() and destructor.
1224 ICING_ASSERT_OK_AND_ASSIGN(
1225 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1226 PortableFileBackedProtoLog<DocumentProto>::Create(
1227 &filesystem_, file_path_,
1228 PortableFileBackedProtoLog<DocumentProto>::Options(
1229 compress_, max_proto_size_, compression_level_)));
1230 auto proto_log = std::move(create_result.proto_log);
1231 ASSERT_FALSE(create_result.has_data_loss());
1232
1233 // Append a new document which is after the rewind position.
1234 ICING_ASSERT_OK(proto_log->WriteProto(document4));
1235
1236 // Erases the 3rd proto that is now before the rewind position. Checksum
1237 // is updated.
1238 ICING_ASSERT_OK(proto_log->EraseProto(document3_offset));
1239
1240 EXPECT_THAT(proto_log->UpdateChecksum(),
1241 IsOkAndHolds(Eq(Crc32(2344803210))));
1242 } // Checksum is updated with the newly appended document.
1243
1244 {
1245 // A successful creation means that the checksum matches.
1246 ICING_ASSERT_OK_AND_ASSIGN(
1247 PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1248 PortableFileBackedProtoLog<DocumentProto>::Create(
1249 &filesystem_, file_path_,
1250 PortableFileBackedProtoLog<DocumentProto>::Options(
1251 compress_, max_proto_size_, compression_level_)));
1252 auto proto_log = std::move(create_result.proto_log);
1253 EXPECT_FALSE(create_result.has_data_loss());
1254 }
1255 }
1256
1257 } // namespace
1258 } // namespace lib
1259 } // namespace icing
1260