xref: /aosp_15_r20/external/icing/icing/file/portable-file-backed-proto-log_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/portable-file-backed-proto-log.h"
16 
17 #include <cstdint>
18 #include <cstdlib>
19 
20 #include "gmock/gmock.h"
21 #include "gtest/gtest.h"
22 #include "icing/document-builder.h"
23 #include "icing/file/filesystem.h"
24 #include "icing/file/mock-filesystem.h"
25 #include "icing/portable/equals-proto.h"
26 #include "icing/proto/document.pb.h"
27 #include "icing/testing/common-matchers.h"
28 #include "icing/testing/tmp-directory.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 namespace {
34 
35 using ::icing::lib::portable_equals_proto::EqualsProto;
36 using ::testing::A;
37 using ::testing::Eq;
38 using ::testing::Gt;
39 using ::testing::HasSubstr;
40 using ::testing::Not;
41 using ::testing::NotNull;
42 using ::testing::Pair;
43 using ::testing::Return;
44 
45 using Header = PortableFileBackedProtoLog<DocumentProto>::Header;
46 
ReadHeader(Filesystem filesystem,const std::string & file_path)47 Header ReadHeader(Filesystem filesystem, const std::string& file_path) {
48   Header header;
49   filesystem.PRead(file_path.c_str(), &header, sizeof(Header),
50                    /*offset=*/0);
51   return header;
52 }
53 
WriteHeader(Filesystem filesystem,const std::string & file_path,Header & header)54 void WriteHeader(Filesystem filesystem, const std::string& file_path,
55                  Header& header) {
56   filesystem.Write(file_path.c_str(), &header, sizeof(Header));
57 }
58 
59 class PortableFileBackedProtoLogTest : public ::testing::Test {
60  protected:
61   // Adds a user-defined default construct because a const member variable may
62   // make the compiler accidentally delete the default constructor.
63   // https://stackoverflow.com/a/47368753
PortableFileBackedProtoLogTest()64   PortableFileBackedProtoLogTest() {}
65 
SetUp()66   void SetUp() override {
67     file_path_ = GetTestTempDir() + "/proto_log";
68     filesystem_.DeleteFile(file_path_.c_str());
69   }
70 
TearDown()71   void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
72 
73   const Filesystem filesystem_;
74   std::string file_path_;
75   bool compress_ = true;
76   int32_t compression_level_ =
77       PortableFileBackedProtoLog<DocumentProto>::kDefaultCompressionLevel;
78   int64_t max_proto_size_ = 256 * 1024;  // 256 KiB
79 };
80 
TEST_F(PortableFileBackedProtoLogTest,Initialize)81 TEST_F(PortableFileBackedProtoLogTest, Initialize) {
82   ICING_ASSERT_OK_AND_ASSIGN(
83       PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
84       PortableFileBackedProtoLog<DocumentProto>::Create(
85           &filesystem_, file_path_,
86           PortableFileBackedProtoLog<DocumentProto>::Options(
87               compress_, max_proto_size_, compression_level_)));
88   EXPECT_THAT(create_result.proto_log, NotNull());
89   EXPECT_FALSE(create_result.has_data_loss());
90   EXPECT_FALSE(create_result.recalculated_checksum);
91 
92   // Can't recreate the same file with different options.
93   ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
94                   &filesystem_, file_path_,
95                   PortableFileBackedProtoLog<DocumentProto>::Options(
96                       !compress_, max_proto_size_, compression_level_)),
97               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
98 }
99 
TEST_F(PortableFileBackedProtoLogTest,InitializeValidatesOptions)100 TEST_F(PortableFileBackedProtoLogTest, InitializeValidatesOptions) {
101   // max_proto_size must be greater than 0
102   int invalid_max_proto_size = 0;
103   ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
104                   &filesystem_, file_path_,
105                   PortableFileBackedProtoLog<DocumentProto>::Options(
106                       compress_, invalid_max_proto_size, compression_level_)),
107               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
108 
109   // max_proto_size must be under 16 MiB
110   invalid_max_proto_size = 16 * 1024 * 1024;
111   ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
112                   &filesystem_, file_path_,
113                   PortableFileBackedProtoLog<DocumentProto>::Options(
114                       compress_, invalid_max_proto_size, compression_level_)),
115               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
116 
117   // compression_level must be between 0 and 9 inclusive
118   int invalid_compression_level = -1;
119   ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
120                   &filesystem_, file_path_,
121                   PortableFileBackedProtoLog<DocumentProto>::Options(
122                       compress_, max_proto_size_, invalid_compression_level)),
123               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
124 
125   // compression_level must be between 0 and 9 inclusive
126   invalid_compression_level = 10;
127   ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
128                   &filesystem_, file_path_,
129                   PortableFileBackedProtoLog<DocumentProto>::Options(
130                       compress_, max_proto_size_, invalid_compression_level)),
131               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
132 }
133 
TEST_F(PortableFileBackedProtoLogTest,ReservedSpaceForHeader)134 TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
135   ICING_ASSERT_OK_AND_ASSIGN(
136       PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
137       PortableFileBackedProtoLog<DocumentProto>::Create(
138           &filesystem_, file_path_,
139           PortableFileBackedProtoLog<DocumentProto>::Options(
140               compress_, max_proto_size_, compression_level_)));
141 
142   // With no protos written yet, the log should be minimum the size of the
143   // reserved header space.
144   ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
145             PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes);
146 }
147 
TEST_F(PortableFileBackedProtoLogTest,WriteProtoTooLarge)148 TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
149   int max_proto_size = 1;
150   ICING_ASSERT_OK_AND_ASSIGN(
151       PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
152       PortableFileBackedProtoLog<DocumentProto>::Create(
153           &filesystem_, file_path_,
154           PortableFileBackedProtoLog<DocumentProto>::Options(
155               compress_, max_proto_size, compression_level_)));
156   auto proto_log = std::move(create_result.proto_log);
157   ASSERT_FALSE(create_result.has_data_loss());
158 
159   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
160 
161   // Proto is too large for the max_proto_size_in
162   ASSERT_THAT(proto_log->WriteProto(document),
163               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
164 }
165 
TEST_F(PortableFileBackedProtoLogTest,ReadProtoWrongKProtoMagic)166 TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
167   ICING_ASSERT_OK_AND_ASSIGN(
168       PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
169       PortableFileBackedProtoLog<DocumentProto>::Create(
170           &filesystem_, file_path_,
171           PortableFileBackedProtoLog<DocumentProto>::Options(
172               compress_, max_proto_size_, compression_level_)));
173   auto proto_log = std::move(create_result.proto_log);
174   ASSERT_FALSE(create_result.has_data_loss());
175 
176   // Write a proto
177   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
178 
179   ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
180                              proto_log->WriteProto(document));
181 
182   // The 4 bytes of metadata that just doesn't have the same kProtoMagic
183   // specified in file-backed-proto-log.h
184   uint32_t wrong_magic = 0x7E000000;
185 
186   // Sanity check that we opened the file correctly
187   int fd = filesystem_.OpenForWrite(file_path_.c_str());
188   ASSERT_GT(fd, 0);
189 
190   // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
191   // a proto entry.
192   filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
193 
194   ASSERT_THAT(proto_log->ReadProto(file_offset),
195               StatusIs(libtextclassifier3::StatusCode::INTERNAL));
196 }
197 
TEST_F(PortableFileBackedProtoLogTest,ReadWriteUncompressedProto)198 TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
199   int last_offset;
200   {
201     ICING_ASSERT_OK_AND_ASSIGN(
202         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
203         PortableFileBackedProtoLog<DocumentProto>::Create(
204             &filesystem_, file_path_,
205             PortableFileBackedProtoLog<DocumentProto>::Options(
206                 /*compress_in=*/false, max_proto_size_, compression_level_)));
207     auto proto_log = std::move(create_result.proto_log);
208     ASSERT_FALSE(create_result.has_data_loss());
209 
210     // Write the first proto
211     DocumentProto document1 =
212         DocumentBuilder().SetKey("namespace1", "uri1").Build();
213 
214     ICING_ASSERT_OK_AND_ASSIGN(int written_position,
215                                proto_log->WriteProto(document1));
216 
217     int document1_offset = written_position;
218 
219     // Check that what we read is what we wrote
220     ASSERT_THAT(proto_log->ReadProto(written_position),
221                 IsOkAndHolds(EqualsProto(document1)));
222 
223     // Write a second proto that's close to the max size. Leave some room for
224     // the rest of the proto properties.
225     std::string long_str(max_proto_size_ - 1024, 'a');
226     DocumentProto document2 = DocumentBuilder()
227                                   .SetKey("namespace2", "uri2")
228                                   .AddStringProperty("long_str", long_str)
229                                   .Build();
230 
231     ICING_ASSERT_OK_AND_ASSIGN(written_position,
232                                proto_log->WriteProto(document2));
233 
234     int document2_offset = written_position;
235     last_offset = written_position;
236     ASSERT_GT(document2_offset, document1_offset);
237 
238     // Check the second proto
239     ASSERT_THAT(proto_log->ReadProto(written_position),
240                 IsOkAndHolds(EqualsProto(document2)));
241 
242     ICING_ASSERT_OK(proto_log->PersistToDisk());
243   }
244 
245   {
246     // Make a new proto_log with the same file_path, and make sure we
247     // can still write to the same underlying file.
248     ICING_ASSERT_OK_AND_ASSIGN(
249         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
250         PortableFileBackedProtoLog<DocumentProto>::Create(
251             &filesystem_, file_path_,
252             PortableFileBackedProtoLog<DocumentProto>::Options(
253                 /*compress_in=*/false, max_proto_size_, compression_level_)));
254     auto recreated_proto_log = std::move(create_result.proto_log);
255     ASSERT_FALSE(create_result.has_data_loss());
256 
257     // Write a third proto
258     DocumentProto document3 =
259         DocumentBuilder().SetKey("namespace3", "uri3").Build();
260 
261     ASSERT_THAT(recreated_proto_log->WriteProto(document3),
262                 IsOkAndHolds(Gt(last_offset)));
263   }
264 }
265 
TEST_F(PortableFileBackedProtoLogTest,ReadWriteCompressedProto)266 TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
267   int last_offset;
268 
269   {
270     ICING_ASSERT_OK_AND_ASSIGN(
271         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
272         PortableFileBackedProtoLog<DocumentProto>::Create(
273             &filesystem_, file_path_,
274             PortableFileBackedProtoLog<DocumentProto>::Options(
275                 /*compress_in=*/true, max_proto_size_, compression_level_)));
276     auto proto_log = std::move(create_result.proto_log);
277     ASSERT_FALSE(create_result.has_data_loss());
278 
279     // Write the first proto
280     DocumentProto document1 =
281         DocumentBuilder().SetKey("namespace1", "uri1").Build();
282 
283     ICING_ASSERT_OK_AND_ASSIGN(int written_position,
284                                proto_log->WriteProto(document1));
285 
286     int document1_offset = written_position;
287 
288     // Check that what we read is what we wrote
289     ASSERT_THAT(proto_log->ReadProto(written_position),
290                 IsOkAndHolds(EqualsProto(document1)));
291 
292     // Write a second proto that's close to the max size. Leave some room for
293     // the rest of the proto properties.
294     std::string long_str(max_proto_size_ - 1024, 'a');
295     DocumentProto document2 = DocumentBuilder()
296                                   .SetKey("namespace2", "uri2")
297                                   .AddStringProperty("long_str", long_str)
298                                   .Build();
299 
300     ICING_ASSERT_OK_AND_ASSIGN(written_position,
301                                proto_log->WriteProto(document2));
302 
303     int document2_offset = written_position;
304     last_offset = written_position;
305     ASSERT_GT(document2_offset, document1_offset);
306 
307     // Check the second proto
308     ASSERT_THAT(proto_log->ReadProto(written_position),
309                 IsOkAndHolds(EqualsProto(document2)));
310 
311     ICING_ASSERT_OK(proto_log->PersistToDisk());
312   }
313 
314   {
315     // Make a new proto_log with the same file_path, and make sure we
316     // can still write to the same underlying file.
317     ICING_ASSERT_OK_AND_ASSIGN(
318         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
319         PortableFileBackedProtoLog<DocumentProto>::Create(
320             &filesystem_, file_path_,
321             PortableFileBackedProtoLog<DocumentProto>::Options(
322                 /*compress_in=*/true, max_proto_size_, compression_level_)));
323     auto recreated_proto_log = std::move(create_result.proto_log);
324     ASSERT_FALSE(create_result.has_data_loss());
325 
326     // Write a third proto
327     DocumentProto document3 =
328         DocumentBuilder().SetKey("namespace3", "uri3").Build();
329 
330     ASSERT_THAT(recreated_proto_log->WriteProto(document3),
331                 IsOkAndHolds(Gt(last_offset)));
332   }
333 }
334 
TEST_F(PortableFileBackedProtoLogTest,ReadWriteDifferentCompressionLevel)335 TEST_F(PortableFileBackedProtoLogTest, ReadWriteDifferentCompressionLevel) {
336   int document1_offset;
337   int document2_offset;
338   int document3_offset;
339 
340   // The first proto to write that's close to the max size. Leave some room for
341   // the rest of the proto properties.
342   std::string long_str(max_proto_size_ - 1024, 'a');
343   DocumentProto document1 = DocumentBuilder()
344                                 .SetKey("namespace1", "uri1")
345                                 .AddStringProperty("long_str", long_str)
346                                 .Build();
347   DocumentProto document2 =
348       DocumentBuilder().SetKey("namespace2", "uri2").Build();
349   DocumentProto document3 =
350       DocumentBuilder().SetKey("namespace3", "uri3").Build();
351 
352   {
353     ICING_ASSERT_OK_AND_ASSIGN(
354         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
355         PortableFileBackedProtoLog<DocumentProto>::Create(
356             &filesystem_, file_path_,
357             PortableFileBackedProtoLog<DocumentProto>::Options(
358                 /*compress_in=*/true, max_proto_size_,
359                 /*compression_level_in=*/3)));
360     auto proto_log = std::move(create_result.proto_log);
361     ASSERT_FALSE(create_result.has_data_loss());
362 
363     // Write the first proto
364     ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
365                                proto_log->WriteProto(document1));
366 
367     // Check that what we read is what we wrote
368     ASSERT_THAT(proto_log->ReadProto(document1_offset),
369                 IsOkAndHolds(EqualsProto(document1)));
370 
371     ICING_ASSERT_OK(proto_log->PersistToDisk());
372   }
373 
374   // Make a new proto_log with the same file_path but different compression
375   // level, and make sure we can still read from and write to the same
376   // underlying file.
377   {
378     ICING_ASSERT_OK_AND_ASSIGN(
379         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
380         PortableFileBackedProtoLog<DocumentProto>::Create(
381             &filesystem_, file_path_,
382             PortableFileBackedProtoLog<DocumentProto>::Options(
383                 /*compress_in=*/true, max_proto_size_,
384                 /*compression_level_in=*/9)));
385     auto recreated_proto_log = std::move(create_result.proto_log);
386     ASSERT_FALSE(create_result.has_data_loss());
387 
388     // Check the first proto
389     ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
390                 IsOkAndHolds(EqualsProto(document1)));
391 
392     // Write a second proto
393     ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
394                                recreated_proto_log->WriteProto(document2));
395 
396     ASSERT_GT(document2_offset, document1_offset);
397 
398     // Check the second proto
399     ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
400                 IsOkAndHolds(EqualsProto(document2)));
401 
402     ICING_ASSERT_OK(recreated_proto_log->PersistToDisk());
403   }
404 
405   // One more time but with 0 compression level
406   {
407     ICING_ASSERT_OK_AND_ASSIGN(
408         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
409         PortableFileBackedProtoLog<DocumentProto>::Create(
410             &filesystem_, file_path_,
411             PortableFileBackedProtoLog<DocumentProto>::Options(
412                 /*compress_in=*/true, max_proto_size_,
413                 /*compression_level=*/0)));
414     auto recreated_proto_log = std::move(create_result.proto_log);
415     ASSERT_FALSE(create_result.has_data_loss());
416 
417     // Check the first proto
418     ASSERT_THAT(recreated_proto_log->ReadProto(document1_offset),
419                 IsOkAndHolds(EqualsProto(document1)));
420 
421     // Check the second proto
422     ASSERT_THAT(recreated_proto_log->ReadProto(document2_offset),
423                 IsOkAndHolds(EqualsProto(document2)));
424 
425     // Write a third proto
426     ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
427                                recreated_proto_log->WriteProto(document3));
428 
429     ASSERT_GT(document3_offset, document2_offset);
430 
431     // Check the third proto
432     ASSERT_THAT(recreated_proto_log->ReadProto(document3_offset),
433                 IsOkAndHolds(EqualsProto(document3)));
434   }
435 }
436 
TEST_F(PortableFileBackedProtoLogTest,WriteDifferentCompressionLevelDifferentSizes)437 TEST_F(PortableFileBackedProtoLogTest,
438        WriteDifferentCompressionLevelDifferentSizes) {
439   int document_log_size_with_compression_3;
440   int document_log_size_with_no_compression;
441 
442   // The first proto to write that's close to the max size. Leave some room for
443   // the rest of the proto properties.
444   std::string long_str(max_proto_size_ - 1024, 'a');
445   DocumentProto document1 = DocumentBuilder()
446                                 .SetKey("namespace1", "uri1")
447                                 .AddStringProperty("long_str", long_str)
448                                 .Build();
449 
450   {
451     ICING_ASSERT_OK_AND_ASSIGN(
452         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
453         PortableFileBackedProtoLog<DocumentProto>::Create(
454             &filesystem_, file_path_,
455             PortableFileBackedProtoLog<DocumentProto>::Options(
456                 /*compress_in=*/true, max_proto_size_,
457                 /*compression_level_in=*/3)));
458     auto proto_log = std::move(create_result.proto_log);
459     ASSERT_FALSE(create_result.has_data_loss());
460 
461     // Write the proto
462     ICING_ASSERT_OK(proto_log->WriteProto(document1));
463     ICING_ASSERT_OK(proto_log->PersistToDisk());
464 
465     document_log_size_with_compression_3 =
466         filesystem_.GetFileSize(file_path_.c_str());
467   }
468 
469   // Delete the proto_log so we can reuse the file_path
470   filesystem_.DeleteFile(file_path_.c_str());
471 
472   {
473     ICING_ASSERT_OK_AND_ASSIGN(
474         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
475         PortableFileBackedProtoLog<DocumentProto>::Create(
476             &filesystem_, file_path_,
477             PortableFileBackedProtoLog<DocumentProto>::Options(
478                 /*compress_in=*/true, max_proto_size_,
479                 /*compression_level_in=*/0)));
480     auto proto_log = std::move(create_result.proto_log);
481     ASSERT_FALSE(create_result.has_data_loss());
482 
483     // Write the proto
484     ICING_ASSERT_OK(proto_log->WriteProto(document1));
485     ICING_ASSERT_OK(proto_log->PersistToDisk());
486 
487     document_log_size_with_no_compression =
488         filesystem_.GetFileSize(file_path_.c_str());
489 
490     // Uncompressed document file size should be larger than original compressed
491     // document file size
492     ASSERT_GT(document_log_size_with_no_compression,
493               document_log_size_with_compression_3);
494   }
495 }
496 
TEST_F(PortableFileBackedProtoLogTest,CorruptHeader)497 TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
498   {
499     ICING_ASSERT_OK_AND_ASSIGN(
500         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
501         PortableFileBackedProtoLog<DocumentProto>::Create(
502             &filesystem_, file_path_,
503             PortableFileBackedProtoLog<DocumentProto>::Options(
504                 compress_, max_proto_size_, compression_level_)));
505     auto recreated_proto_log = std::move(create_result.proto_log);
506     EXPECT_FALSE(create_result.has_data_loss());
507   }
508 
509   int corrupt_checksum = 24;
510 
511   // Write the corrupted header
512   Header header = ReadHeader(filesystem_, file_path_);
513   header.SetHeaderChecksum(corrupt_checksum);
514   WriteHeader(filesystem_, file_path_, header);
515 
516   {
517     // Reinitialize the same proto_log
518     ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
519                     &filesystem_, file_path_,
520                     PortableFileBackedProtoLog<DocumentProto>::Options(
521                         compress_, max_proto_size_, compression_level_)),
522                 StatusIs(libtextclassifier3::StatusCode::INTERNAL,
523                          HasSubstr("Invalid header checksum")));
524   }
525 }
526 
TEST_F(PortableFileBackedProtoLogTest,DifferentMagic)527 TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
528   {
529     ICING_ASSERT_OK_AND_ASSIGN(
530         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
531         PortableFileBackedProtoLog<DocumentProto>::Create(
532             &filesystem_, file_path_,
533             PortableFileBackedProtoLog<DocumentProto>::Options(
534                 compress_, max_proto_size_, compression_level_)));
535     auto recreated_proto_log = std::move(create_result.proto_log);
536     EXPECT_FALSE(create_result.has_data_loss());
537 
538     // Corrupt the magic that's stored at the beginning of the header.
539     int invalid_magic = -1;
540     ASSERT_THAT(invalid_magic, Not(Eq(Header::kMagic)));
541 
542     // Write the corrupted header
543     Header header = ReadHeader(filesystem_, file_path_);
544     header.SetMagic(invalid_magic);
545     WriteHeader(filesystem_, file_path_, header);
546   }
547 
548   {
549     // Reinitialize the same proto_log
550     ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
551                     &filesystem_, file_path_,
552                     PortableFileBackedProtoLog<DocumentProto>::Options(
553                         compress_, max_proto_size_, compression_level_)),
554                 StatusIs(libtextclassifier3::StatusCode::INTERNAL,
555                          HasSubstr("Invalid header kMagic")));
556   }
557 }
558 
TEST_F(PortableFileBackedProtoLogTest,UnableToDetectCorruptContentWithoutDirtyBit)559 TEST_F(PortableFileBackedProtoLogTest,
560        UnableToDetectCorruptContentWithoutDirtyBit) {
561   // This is intentional that we can't detect corruption. We're trading off
562   // earlier corruption detection for lower initialization latency. By not
563   // calculating the checksum on initialization, we can initialize much faster,
564   // but at the cost of detecting corruption. Note that even if we did detect
565   // corruption, there was nothing we could've done except throw an error to
566   // clients. We'll still do that, but at some later point when the log is
567   // attempting to be accessed and we can't actually deserialize a proto from
568   // it. See the description in cl/374278280 for more details.
569 
570   {
571     ICING_ASSERT_OK_AND_ASSIGN(
572         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
573         PortableFileBackedProtoLog<DocumentProto>::Create(
574             &filesystem_, file_path_,
575             PortableFileBackedProtoLog<DocumentProto>::Options(
576                 compress_, max_proto_size_, compression_level_)));
577     auto proto_log = std::move(create_result.proto_log);
578     EXPECT_FALSE(create_result.has_data_loss());
579 
580     DocumentProto document =
581         DocumentBuilder().SetKey("namespace1", "uri1").Build();
582 
583     // Write and persist an document.
584     ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
585                                proto_log->WriteProto(document));
586     ICING_ASSERT_OK(proto_log->PersistToDisk());
587 
588     // "Corrupt" the content written in the log.
589     document.set_uri("invalid");
590     std::string serialized_document = document.SerializeAsString();
591     ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), document_offset,
592                                    serialized_document.data(),
593                                    serialized_document.size()));
594   }
595 
596   {
597     // We can recover, and we don't have data loss.
598     ICING_ASSERT_OK_AND_ASSIGN(
599         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
600         PortableFileBackedProtoLog<DocumentProto>::Create(
601             &filesystem_, file_path_,
602             PortableFileBackedProtoLog<DocumentProto>::Options(
603                 compress_, max_proto_size_, compression_level_)));
604     auto proto_log = std::move(create_result.proto_log);
605     EXPECT_FALSE(create_result.has_data_loss());
606     EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
607     EXPECT_FALSE(create_result.recalculated_checksum);
608 
609     // We still have the corrupted content in our file, we didn't throw
610     // everything out.
611     EXPECT_THAT(
612         filesystem_.GetFileSize(file_path_.c_str()),
613         Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
614   }
615 }
616 
TEST_F(PortableFileBackedProtoLogTest,DetectAndThrowOutCorruptContentWithDirtyBit)617 TEST_F(PortableFileBackedProtoLogTest,
618        DetectAndThrowOutCorruptContentWithDirtyBit) {
619   {
620     ICING_ASSERT_OK_AND_ASSIGN(
621         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
622         PortableFileBackedProtoLog<DocumentProto>::Create(
623             &filesystem_, file_path_,
624             PortableFileBackedProtoLog<DocumentProto>::Options(
625                 compress_, max_proto_size_, compression_level_)));
626     auto proto_log = std::move(create_result.proto_log);
627     ASSERT_FALSE(create_result.has_data_loss());
628 
629     DocumentProto document =
630         DocumentBuilder()
631             .SetKey("namespace1", "uri1")
632             .AddStringProperty("string_property", "foo", "bar")
633             .Build();
634 
635     // Write and persist the protos
636     ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
637                                proto_log->WriteProto(document));
638 
639     // Check that what we read is what we wrote
640     ASSERT_THAT(proto_log->ReadProto(document_offset),
641                 IsOkAndHolds(EqualsProto(document)));
642   }
643 
644   {
645     // "Corrupt" the content written in the log. Make the corrupt document
646     // smaller than our original one so we don't accidentally write past our
647     // file.
648     DocumentProto document =
649         DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
650     std::string serialized_document = document.SerializeAsString();
651     ASSERT_TRUE(filesystem_.PWrite(
652         file_path_.c_str(),
653         PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes,
654         serialized_document.data(), serialized_document.size()));
655 
656     Header header = ReadHeader(filesystem_, file_path_);
657 
658     // Set dirty bit to true to reflect that something changed in the log.
659     header.SetDirtyFlag(true);
660     header.SetHeaderChecksum(header.CalculateHeaderChecksum());
661 
662     WriteHeader(filesystem_, file_path_, header);
663   }
664 
665   {
666     ICING_ASSERT_OK_AND_ASSIGN(
667         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
668         PortableFileBackedProtoLog<DocumentProto>::Create(
669             &filesystem_, file_path_,
670             PortableFileBackedProtoLog<DocumentProto>::Options(
671                 compress_, max_proto_size_, compression_level_)));
672     auto proto_log = std::move(create_result.proto_log);
673     EXPECT_TRUE(create_result.has_data_loss());
674     EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
675 
676     // We had to recalculate the checksum to detect the corruption.
677     EXPECT_TRUE(create_result.recalculated_checksum);
678 
679     // We lost everything, file size is back down to the header.
680     EXPECT_THAT(
681         filesystem_.GetFileSize(file_path_.c_str()),
682         Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
683 
684     // At least the log is no longer dirty.
685     Header header = ReadHeader(filesystem_, file_path_);
686     EXPECT_FALSE(header.GetDirtyFlag());
687   }
688 }
689 
TEST_F(PortableFileBackedProtoLogTest,DirtyBitFalseAlarmKeepsData)690 TEST_F(PortableFileBackedProtoLogTest, DirtyBitFalseAlarmKeepsData) {
691   DocumentProto document =
692       DocumentBuilder().SetKey("namespace1", "uri1").Build();
693   int64_t document_offset;
694   {
695     ICING_ASSERT_OK_AND_ASSIGN(
696         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
697         PortableFileBackedProtoLog<DocumentProto>::Create(
698             &filesystem_, file_path_,
699             PortableFileBackedProtoLog<DocumentProto>::Options(
700                 compress_, max_proto_size_, compression_level_)));
701     auto proto_log = std::move(create_result.proto_log);
702     ASSERT_FALSE(create_result.has_data_loss());
703 
704     // Write and persist the first proto
705     ICING_ASSERT_OK_AND_ASSIGN(document_offset,
706                                proto_log->WriteProto(document));
707 
708     // Check that what we read is what we wrote
709     ASSERT_THAT(proto_log->ReadProto(document_offset),
710                 IsOkAndHolds(EqualsProto(document)));
711   }
712 
713   {
714     Header header = ReadHeader(filesystem_, file_path_);
715 
716     // Simulate the dirty flag set as true, but no data has been changed yet.
717     // Maybe we crashed between writing the dirty flag and erasing a proto.
718     header.SetDirtyFlag(true);
719     header.SetHeaderChecksum(header.CalculateHeaderChecksum());
720 
721     WriteHeader(filesystem_, file_path_, header);
722   }
723 
724   {
725     ICING_ASSERT_OK_AND_ASSIGN(
726         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
727         PortableFileBackedProtoLog<DocumentProto>::Create(
728             &filesystem_, file_path_,
729             PortableFileBackedProtoLog<DocumentProto>::Options(
730                 compress_, max_proto_size_, compression_level_)));
731     auto proto_log = std::move(create_result.proto_log);
732     EXPECT_FALSE(create_result.has_data_loss());
733 
734     // Even though nothing changed, the false alarm dirty bit should have
735     // triggered us to recalculate our checksum.
736     EXPECT_TRUE(create_result.recalculated_checksum);
737 
738     // Check that our document still exists even though dirty bit was true.
739     EXPECT_THAT(proto_log->ReadProto(document_offset),
740                 IsOkAndHolds(EqualsProto(document)));
741 
742     Header header = ReadHeader(filesystem_, file_path_);
743     EXPECT_FALSE(header.GetDirtyFlag());
744   }
745 }
746 
TEST_F(PortableFileBackedProtoLogTest,PersistToDiskKeepsPersistedDataAndTruncatesExtraData)747 TEST_F(PortableFileBackedProtoLogTest,
748        PersistToDiskKeepsPersistedDataAndTruncatesExtraData) {
749   DocumentProto document1 =
750       DocumentBuilder().SetKey("namespace1", "uri1").Build();
751   DocumentProto document2 =
752       DocumentBuilder().SetKey("namespace2", "uri2").Build();
753   int document1_offset, document2_offset;
754   int log_size;
755 
756   {
757     ICING_ASSERT_OK_AND_ASSIGN(
758         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
759         PortableFileBackedProtoLog<DocumentProto>::Create(
760             &filesystem_, file_path_,
761             PortableFileBackedProtoLog<DocumentProto>::Options(
762                 compress_, max_proto_size_, compression_level_)));
763     auto proto_log = std::move(create_result.proto_log);
764     ASSERT_FALSE(create_result.has_data_loss());
765 
766     // Write and persist the first proto
767     ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
768                                proto_log->WriteProto(document1));
769     ICING_ASSERT_OK(proto_log->PersistToDisk());
770 
771     // Write, but don't explicitly persist the second proto
772     ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
773                                proto_log->WriteProto(document2));
774 
775     // Check that what we read is what we wrote
776     ASSERT_THAT(proto_log->ReadProto(document1_offset),
777                 IsOkAndHolds(EqualsProto(document1)));
778     ASSERT_THAT(proto_log->ReadProto(document2_offset),
779                 IsOkAndHolds(EqualsProto(document2)));
780 
781     log_size = filesystem_.GetFileSize(file_path_.c_str());
782     ASSERT_GT(log_size, 0);
783 
784     // PersistToDisk happens implicitly during the destructor.
785   }
786 
787   {
788     // The header rewind position and checksum aren't updated in this "system
789     // crash" scenario.
790 
791     std::string bad_proto =
792         "some incomplete proto that we didn't finish writing before the "
793         "system crashed";
794     filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
795                        bad_proto.size());
796 
797     // Double check that we actually wrote something to the underlying file
798     ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
799   }
800 
801   {
802     // We can recover, but we have data loss
803     ICING_ASSERT_OK_AND_ASSIGN(
804         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
805         PortableFileBackedProtoLog<DocumentProto>::Create(
806             &filesystem_, file_path_,
807             PortableFileBackedProtoLog<DocumentProto>::Options(
808                 compress_, max_proto_size_, compression_level_)));
809     auto proto_log = std::move(create_result.proto_log);
810     ASSERT_TRUE(create_result.has_data_loss());
811     ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
812     ASSERT_FALSE(create_result.recalculated_checksum);
813 
814     // Check that everything was persisted across instances
815     ASSERT_THAT(proto_log->ReadProto(document1_offset),
816                 IsOkAndHolds(EqualsProto(document1)));
817     ASSERT_THAT(proto_log->ReadProto(document2_offset),
818                 IsOkAndHolds(EqualsProto(document2)));
819 
820     // We correctly rewound to the last good state.
821     ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
822   }
823 }
824 
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterPutAndPersistToDisk)825 TEST_F(PortableFileBackedProtoLogTest,
826        DirtyBitIsFalseAfterPutAndPersistToDisk) {
827   {
828     ICING_ASSERT_OK_AND_ASSIGN(
829         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
830         PortableFileBackedProtoLog<DocumentProto>::Create(
831             &filesystem_, file_path_,
832             PortableFileBackedProtoLog<DocumentProto>::Options(
833                 compress_, max_proto_size_, compression_level_)));
834     auto proto_log = std::move(create_result.proto_log);
835     ASSERT_FALSE(create_result.has_data_loss());
836 
837     DocumentProto document =
838         DocumentBuilder().SetKey("namespace1", "uri1").Build();
839 
840     // Write and persist the first proto
841     ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
842                                proto_log->WriteProto(document));
843     ICING_ASSERT_OK(proto_log->PersistToDisk());
844 
845     // Check that what we read is what we wrote
846     ASSERT_THAT(proto_log->ReadProto(document_offset),
847                 IsOkAndHolds(EqualsProto(document)));
848   }
849 
850   {
851     ICING_ASSERT_OK_AND_ASSIGN(
852         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
853         PortableFileBackedProtoLog<DocumentProto>::Create(
854             &filesystem_, file_path_,
855             PortableFileBackedProtoLog<DocumentProto>::Options(
856                 compress_, max_proto_size_, compression_level_)));
857 
858     // We previously persisted to disk so everything should be in a perfect
859     // state.
860     EXPECT_FALSE(create_result.has_data_loss());
861     EXPECT_FALSE(create_result.recalculated_checksum);
862 
863     Header header = ReadHeader(filesystem_, file_path_);
864     EXPECT_FALSE(header.GetDirtyFlag());
865   }
866 }
867 
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterDeleteAndPersistToDisk)868 TEST_F(PortableFileBackedProtoLogTest,
869        DirtyBitIsFalseAfterDeleteAndPersistToDisk) {
870   {
871     ICING_ASSERT_OK_AND_ASSIGN(
872         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
873         PortableFileBackedProtoLog<DocumentProto>::Create(
874             &filesystem_, file_path_,
875             PortableFileBackedProtoLog<DocumentProto>::Options(
876                 compress_, max_proto_size_, compression_level_)));
877     auto proto_log = std::move(create_result.proto_log);
878     ASSERT_FALSE(create_result.has_data_loss());
879 
880     DocumentProto document =
881         DocumentBuilder().SetKey("namespace1", "uri1").Build();
882 
883     // Write, delete, and persist the first proto
884     ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
885                                proto_log->WriteProto(document));
886     ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
887     ICING_ASSERT_OK(proto_log->PersistToDisk());
888 
889     // The proto has been erased.
890     ASSERT_THAT(proto_log->ReadProto(document_offset),
891                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
892   }
893 
894   {
895     ICING_ASSERT_OK_AND_ASSIGN(
896         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
897         PortableFileBackedProtoLog<DocumentProto>::Create(
898             &filesystem_, file_path_,
899             PortableFileBackedProtoLog<DocumentProto>::Options(
900                 compress_, max_proto_size_, compression_level_)));
901 
902     // We previously persisted to disk so everything should be in a perfect
903     // state.
904     EXPECT_FALSE(create_result.has_data_loss());
905     EXPECT_FALSE(create_result.recalculated_checksum);
906 
907     Header header = ReadHeader(filesystem_, file_path_);
908     EXPECT_FALSE(header.GetDirtyFlag());
909   }
910 }
911 
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterPutAndDestructor)912 TEST_F(PortableFileBackedProtoLogTest, DirtyBitIsFalseAfterPutAndDestructor) {
913   {
914     ICING_ASSERT_OK_AND_ASSIGN(
915         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
916         PortableFileBackedProtoLog<DocumentProto>::Create(
917             &filesystem_, file_path_,
918             PortableFileBackedProtoLog<DocumentProto>::Options(
919                 compress_, max_proto_size_, compression_level_)));
920     auto proto_log = std::move(create_result.proto_log);
921     ASSERT_FALSE(create_result.has_data_loss());
922 
923     DocumentProto document =
924         DocumentBuilder().SetKey("namespace1", "uri1").Build();
925 
926     // Write and persist the first proto
927     ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
928                                proto_log->WriteProto(document));
929 
930     // Check that what we read is what we wrote
931     ASSERT_THAT(proto_log->ReadProto(document_offset),
932                 IsOkAndHolds(EqualsProto(document)));
933 
934     // PersistToDisk is implicitly called as part of the destructor and
935     // PersistToDisk will clear the dirty bit.
936   }
937 
938   {
939     ICING_ASSERT_OK_AND_ASSIGN(
940         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
941         PortableFileBackedProtoLog<DocumentProto>::Create(
942             &filesystem_, file_path_,
943             PortableFileBackedProtoLog<DocumentProto>::Options(
944                 compress_, max_proto_size_, compression_level_)));
945 
946     // We previously persisted to disk so everything should be in a perfect
947     // state.
948     EXPECT_FALSE(create_result.has_data_loss());
949     EXPECT_FALSE(create_result.recalculated_checksum);
950 
951     Header header = ReadHeader(filesystem_, file_path_);
952     EXPECT_FALSE(header.GetDirtyFlag());
953   }
954 }
955 
TEST_F(PortableFileBackedProtoLogTest,DirtyBitIsFalseAfterDeleteAndDestructor)956 TEST_F(PortableFileBackedProtoLogTest,
957        DirtyBitIsFalseAfterDeleteAndDestructor) {
958   {
959     ICING_ASSERT_OK_AND_ASSIGN(
960         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
961         PortableFileBackedProtoLog<DocumentProto>::Create(
962             &filesystem_, file_path_,
963             PortableFileBackedProtoLog<DocumentProto>::Options(
964                 compress_, max_proto_size_, compression_level_)));
965     auto proto_log = std::move(create_result.proto_log);
966     ASSERT_FALSE(create_result.has_data_loss());
967 
968     DocumentProto document =
969         DocumentBuilder().SetKey("namespace1", "uri1").Build();
970 
971     // Write, delete, and persist the first proto
972     ICING_ASSERT_OK_AND_ASSIGN(int64_t document_offset,
973                                proto_log->WriteProto(document));
974     ICING_ASSERT_OK(proto_log->EraseProto(document_offset));
975 
976     // The proto has been erased.
977     ASSERT_THAT(proto_log->ReadProto(document_offset),
978                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
979 
980     // PersistToDisk is implicitly called as part of the destructor and
981     // PersistToDisk will clear the dirty bit.
982   }
983 
984   {
985     ICING_ASSERT_OK_AND_ASSIGN(
986         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
987         PortableFileBackedProtoLog<DocumentProto>::Create(
988             &filesystem_, file_path_,
989             PortableFileBackedProtoLog<DocumentProto>::Options(
990                 compress_, max_proto_size_, compression_level_)));
991 
992     // We previously persisted to disk so everything should be in a perfect
993     // state.
994     EXPECT_FALSE(create_result.has_data_loss());
995     EXPECT_FALSE(create_result.recalculated_checksum);
996 
997     Header header = ReadHeader(filesystem_, file_path_);
998     EXPECT_FALSE(header.GetDirtyFlag());
999   }
1000 }
1001 
TEST_F(PortableFileBackedProtoLogTest,Iterator)1002 TEST_F(PortableFileBackedProtoLogTest, Iterator) {
1003   DocumentProto document1 =
1004       DocumentBuilder().SetKey("namespace", "uri1").Build();
1005   DocumentProto document2 =
1006       DocumentBuilder().SetKey("namespace", "uri2").Build();
1007 
1008   ICING_ASSERT_OK_AND_ASSIGN(
1009       PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1010       PortableFileBackedProtoLog<DocumentProto>::Create(
1011           &filesystem_, file_path_,
1012           PortableFileBackedProtoLog<DocumentProto>::Options(
1013               compress_, max_proto_size_, compression_level_)));
1014   auto proto_log = std::move(create_result.proto_log);
1015   ASSERT_FALSE(create_result.has_data_loss());
1016 
1017   {
1018     // Empty iterator
1019     auto iterator = proto_log->GetIterator();
1020     ASSERT_THAT(iterator.Advance(),
1021                 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1022   }
1023 
1024   {
1025     // Iterates through some documents
1026     ICING_ASSERT_OK(proto_log->WriteProto(document1));
1027     ICING_ASSERT_OK(proto_log->WriteProto(document2));
1028     auto iterator = proto_log->GetIterator();
1029     // 1st proto
1030     ICING_ASSERT_OK(iterator.Advance());
1031     ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
1032                 IsOkAndHolds(EqualsProto(document1)));
1033     // 2nd proto
1034     ICING_ASSERT_OK(iterator.Advance());
1035     ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
1036                 IsOkAndHolds(EqualsProto(document2)));
1037     // Tries to advance
1038     ASSERT_THAT(iterator.Advance(),
1039                 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1040   }
1041 }
1042 
TEST_F(PortableFileBackedProtoLogTest,UpdateChecksum)1043 TEST_F(PortableFileBackedProtoLogTest, UpdateChecksum) {
1044   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
1045   Crc32 checksum;
1046 
1047   {
1048     ICING_ASSERT_OK_AND_ASSIGN(
1049         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1050         PortableFileBackedProtoLog<DocumentProto>::Create(
1051             &filesystem_, file_path_,
1052             PortableFileBackedProtoLog<DocumentProto>::Options(
1053                 compress_, max_proto_size_, compression_level_)));
1054     auto proto_log = std::move(create_result.proto_log);
1055     ASSERT_FALSE(create_result.has_data_loss());
1056 
1057     ICING_EXPECT_OK(proto_log->WriteProto(document));
1058 
1059     ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->GetChecksum());
1060     EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Eq(checksum)));
1061     EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Eq(checksum)));
1062 
1063     // Calling it twice with no changes should get us the same checksum
1064     EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Eq(checksum)));
1065   }
1066 
1067   {
1068     ICING_ASSERT_OK_AND_ASSIGN(
1069         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1070         PortableFileBackedProtoLog<DocumentProto>::Create(
1071             &filesystem_, file_path_,
1072             PortableFileBackedProtoLog<DocumentProto>::Options(
1073                 compress_, max_proto_size_, compression_level_)));
1074     auto proto_log = std::move(create_result.proto_log);
1075     ASSERT_FALSE(create_result.has_data_loss());
1076 
1077     // Checksum should be consistent across instances
1078     ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->GetChecksum());
1079     EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Eq(checksum)));
1080     EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Eq(checksum)));
1081 
1082     // PersistToDisk shouldn't affect the checksum value
1083     ICING_EXPECT_OK(proto_log->PersistToDisk());
1084     EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Eq(checksum)));
1085 
1086     // Check that modifying the log leads to a different checksum
1087     ICING_EXPECT_OK(proto_log->WriteProto(document));
1088     EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Not(Eq(checksum))));
1089     EXPECT_THAT(proto_log->UpdateChecksum(), IsOkAndHolds(Not(Eq(checksum))));
1090     EXPECT_THAT(proto_log->GetChecksum(), IsOkAndHolds(Not(Eq(checksum))));
1091   }
1092 }
1093 
TEST_F(PortableFileBackedProtoLogTest,EraseProtoShouldSetZero)1094 TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
1095   DocumentProto document1 =
1096       DocumentBuilder().SetKey("namespace", "uri1").Build();
1097 
1098   ICING_ASSERT_OK_AND_ASSIGN(
1099       PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1100       PortableFileBackedProtoLog<DocumentProto>::Create(
1101           &filesystem_, file_path_,
1102           PortableFileBackedProtoLog<DocumentProto>::Options(
1103               compress_, max_proto_size_, compression_level_)));
1104   auto proto_log = std::move(create_result.proto_log);
1105   ASSERT_FALSE(create_result.has_data_loss());
1106 
1107   // Writes and erases proto
1108   ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
1109                              proto_log->WriteProto(document1));
1110   ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
1111 
1112   // Checks if the erased area is set to 0.
1113   int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
1114   ICING_ASSERT_OK_AND_ASSIGN(
1115       MemoryMappedFile mmapped_file,
1116       MemoryMappedFile::Create(filesystem_, file_path_,
1117                                MemoryMappedFile::Strategy::READ_ONLY));
1118 
1119   // document1_offset + sizeof(int) is the start byte of the proto where
1120   // sizeof(int) is the size of the proto metadata.
1121   ICING_ASSERT_OK(
1122       mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1));
1123   for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
1124     ASSERT_THAT(mmapped_file.region()[i], Eq(0));
1125   }
1126 }
1127 
TEST_F(PortableFileBackedProtoLogTest,EraseProtoShouldReturnNotFound)1128 TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
1129   DocumentProto document1 =
1130       DocumentBuilder().SetKey("namespace", "uri1").Build();
1131   DocumentProto document2 =
1132       DocumentBuilder().SetKey("namespace", "uri2").Build();
1133 
1134   ICING_ASSERT_OK_AND_ASSIGN(
1135       PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1136       PortableFileBackedProtoLog<DocumentProto>::Create(
1137           &filesystem_, file_path_,
1138           PortableFileBackedProtoLog<DocumentProto>::Options(
1139               compress_, max_proto_size_, compression_level_)));
1140   auto proto_log = std::move(create_result.proto_log);
1141   ASSERT_FALSE(create_result.has_data_loss());
1142 
1143   // Writes 2 protos
1144   ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
1145                              proto_log->WriteProto(document1));
1146   ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset,
1147                              proto_log->WriteProto(document2));
1148 
1149   // Erases the first proto
1150   ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
1151 
1152   // The first proto has been erased.
1153   ASSERT_THAT(proto_log->ReadProto(document1_offset),
1154               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
1155   // The second proto should be returned.
1156   ASSERT_THAT(proto_log->ReadProto(document2_offset),
1157               IsOkAndHolds(EqualsProto(document2)));
1158 }
1159 
TEST_F(PortableFileBackedProtoLogTest,ChecksumShouldBeCorrectWithErasedProto)1160 TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
1161   DocumentProto document1 =
1162       DocumentBuilder().SetKey("namespace", "uri1").Build();
1163   DocumentProto document2 =
1164       DocumentBuilder().SetKey("namespace", "uri2").Build();
1165   DocumentProto document3 =
1166       DocumentBuilder().SetKey("namespace", "uri3").Build();
1167   DocumentProto document4 =
1168       DocumentBuilder().SetKey("namespace", "uri4").Build();
1169 
1170   int64_t document2_offset;
1171   int64_t document3_offset;
1172 
1173   {
1174     // Erase data after the rewind position. This won't update the checksum
1175     // immediately.
1176     ICING_ASSERT_OK_AND_ASSIGN(
1177         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1178         PortableFileBackedProtoLog<DocumentProto>::Create(
1179             &filesystem_, file_path_,
1180             PortableFileBackedProtoLog<DocumentProto>::Options(
1181                 compress_, max_proto_size_, compression_level_)));
1182     auto proto_log = std::move(create_result.proto_log);
1183     ASSERT_FALSE(create_result.has_data_loss());
1184 
1185     // Writes 3 protos
1186     ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
1187                                proto_log->WriteProto(document1));
1188     ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
1189                                proto_log->WriteProto(document2));
1190     ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
1191                                proto_log->WriteProto(document3));
1192 
1193     // Erases the 1st proto, checksum won't be updated immediately because the
1194     // rewind position is 0.
1195     ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
1196 
1197     EXPECT_THAT(proto_log->UpdateChecksum(),
1198                 IsOkAndHolds(Eq(Crc32(2175574628))));
1199   }  // New checksum is updated in destructor.
1200 
1201   {
1202     // Erase data before the rewind position. This will update the checksum
1203     // immediately.
1204     ICING_ASSERT_OK_AND_ASSIGN(
1205         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1206         PortableFileBackedProtoLog<DocumentProto>::Create(
1207             &filesystem_, file_path_,
1208             PortableFileBackedProtoLog<DocumentProto>::Options(
1209                 compress_, max_proto_size_, compression_level_)));
1210     auto proto_log = std::move(create_result.proto_log);
1211     ASSERT_FALSE(create_result.has_data_loss());
1212 
1213     // Erases the 2nd proto that is now before the rewind position. Checksum
1214     // is updated.
1215     ICING_ASSERT_OK(proto_log->EraseProto(document2_offset));
1216 
1217     EXPECT_THAT(proto_log->UpdateChecksum(),
1218                 IsOkAndHolds(Eq(Crc32(790877774))));
1219   }
1220 
1221   {
1222     // Append data and erase data before the rewind position. This will update
1223     // the checksum twice: in EraseProto() and destructor.
1224     ICING_ASSERT_OK_AND_ASSIGN(
1225         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1226         PortableFileBackedProtoLog<DocumentProto>::Create(
1227             &filesystem_, file_path_,
1228             PortableFileBackedProtoLog<DocumentProto>::Options(
1229                 compress_, max_proto_size_, compression_level_)));
1230     auto proto_log = std::move(create_result.proto_log);
1231     ASSERT_FALSE(create_result.has_data_loss());
1232 
1233     // Append a new document which is after the rewind position.
1234     ICING_ASSERT_OK(proto_log->WriteProto(document4));
1235 
1236     // Erases the 3rd proto that is now before the rewind position. Checksum
1237     // is updated.
1238     ICING_ASSERT_OK(proto_log->EraseProto(document3_offset));
1239 
1240     EXPECT_THAT(proto_log->UpdateChecksum(),
1241                 IsOkAndHolds(Eq(Crc32(2344803210))));
1242   }  // Checksum is updated with the newly appended document.
1243 
1244   {
1245     // A successful creation means that the checksum matches.
1246     ICING_ASSERT_OK_AND_ASSIGN(
1247         PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
1248         PortableFileBackedProtoLog<DocumentProto>::Create(
1249             &filesystem_, file_path_,
1250             PortableFileBackedProtoLog<DocumentProto>::Options(
1251                 compress_, max_proto_size_, compression_level_)));
1252     auto proto_log = std::move(create_result.proto_log);
1253     EXPECT_FALSE(create_result.has_data_loss());
1254   }
1255 }
1256 
1257 }  // namespace
1258 }  // namespace lib
1259 }  // namespace icing
1260