1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // File-backed log of protos with append-only writes and position based reads.
16 //
17 // The implementation in this file is deprecated and replaced by
18 // portable-file-backed-proto-log.h.
19 //
20 // This deprecated implementation has been made read-only for the purposes of
21 // migration; writing and erasing this format of log is no longer supported and
22 // the methods to accomplish this have been removed.
23 //
24 // The details of this format follow below:
25 // Each proto written to the file will have a metadata written just before it.
26 // The metadata consists of
27 // {
28 // 1 bytes of kProtoMagic;
29 // 3 bytes of the proto size
30 // n bytes of the proto itself
31 // }
32 // TODO(b/136514769): Add versioning to the header and a UpgradeToVersion
33 // migration method.
34 #ifndef ICING_FILE_FILE_BACKED_PROTO_LOG_H_
35 #define ICING_FILE_FILE_BACKED_PROTO_LOG_H_
36
37 #include <cstdint>
38 #include <memory>
39 #include <string>
40 #include <string_view>
41
42 #include "icing/text_classifier/lib3/utils/base/statusor.h"
43 #include "icing/absl_ports/canonical_errors.h"
44 #include "icing/absl_ports/str_cat.h"
45 #include "icing/file/constants.h"
46 #include "icing/file/filesystem.h"
47 #include "icing/file/memory-mapped-file.h"
48 #include "icing/legacy/core/icing-string-util.h"
49 #include "icing/portable/gzip_stream.h"
50 #include "icing/portable/platform.h"
51 #include "icing/portable/zlib.h"
52 #include "icing/util/crc32.h"
53 #include "icing/util/data-loss.h"
54 #include "icing/util/logging.h"
55 #include "icing/util/status-macros.h"
56 #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
57
58 namespace icing {
59 namespace lib {
60
61 template <typename ProtoT>
62 class FileBackedProtoLog {
63 public:
64 struct Options {
65 // Whether to compress each proto before writing to the proto log.
66 bool compress;
67
68 // Byte-size limit for each proto written to the store. This does not
69 // include the bytes needed for the metadata of each proto.
70 //
71 // NOTE: Currently, we only support protos up to 16MiB. We store the proto
72 // size in 3 bytes within the metadata.
73 //
74 // NOTE: This limit is only enforced for future writes. If the store
75 // previously had a higher limit, then reading older entries could return
76 // larger protos.
77 //
78 // NOTE: The max_proto_size is the upper limit for input protos into the
79 // ProtoLog. Even if the proto is larger than max_proto_size, but compresses
80 // to a smaller size, ProtoLog will not accept it. Protos that result in a
81 // compressed size larger than max_proto_size are also not accepted.
82 const int32_t max_proto_size;
83
84 // Must specify values for options.
85 Options() = delete;
86 explicit Options(bool compress_in,
87 const int32_t max_proto_size_in = constants::kMaxProtoSize)
compressOptions88 : compress(compress_in), max_proto_size(max_proto_size_in) {}
89 };
90
91 // Header stored at the beginning of the file before the rest of the log
92 // contents. Stores metadata on the log.
93 struct Header {
94 static constexpr int32_t kMagic = 0xf4c6f67a;
95
96 // Holds the magic as a quick sanity check against file corruption.
97 int32_t magic = kMagic;
98
99 // Whether to compress the protos before writing to the log.
100 bool compress = true;
101
102 // The maximum proto size that can be written to the log.
103 int32_t max_proto_size = 0;
104
105 // Checksum of the log elements, doesn't include the header fields.
106 uint32_t log_checksum = 0;
107
108 // Last known good offset at which the log and its checksum were updated.
109 // If we crash between writing to the log and updating the checksum, we can
110 // try to rewind the log to this offset and verify the checksum is still
111 // valid instead of throwing away the entire log.
112 int64_t rewind_offset = sizeof(Header);
113
114 // Must be at the end. Contains the crc checksum of the preceding fields.
115 uint32_t header_checksum = 0;
116
CalculateHeaderChecksumHeader117 uint32_t CalculateHeaderChecksum() const {
118 Crc32 crc;
119 std::string_view header_str(reinterpret_cast<const char*>(this),
120 offsetof(Header, header_checksum));
121 crc.Append(header_str);
122 return crc.Get();
123 }
124 };
125
126 struct CreateResult {
127 // A successfully initialized log.
128 std::unique_ptr<FileBackedProtoLog<ProtoT>> proto_log;
129
130 // The data status after initializing from a previous state. Data loss can
131 // happen if the file is corrupted or some previously added data was
132 // unpersisted. This may be used to signal that any derived data off of the
133 // proto log may need to be regenerated.
134 DataLoss data_loss;
135
has_data_lossCreateResult136 bool has_data_loss() {
137 return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
138 }
139 };
140
141 // Factory method to create, initialize, and return a FileBackedProtoLog. Will
142 // create the file if it doesn't exist.
143 //
144 // If on re-initialization the log detects disk corruption or some previously
145 // added data was unpersisted, the log will rewind to the last-good state. The
146 // log saves these checkpointed "good" states when PersistToDisk() is called
147 // or the log is safely destructed. If the log rewinds successfully to the
148 // last-good state, then the returned CreateResult.data_loss indicates
149 // whether it has a data loss and what kind of data loss it is (partial or
150 // complete) so that any derived data may know that it needs to be updated. If
151 // the log re-initializes successfully without any data loss,
152 // CreateResult.data_loss will be NONE.
153 //
154 // Params:
155 // filesystem: Handles system level calls
156 // file_path: Path of the underlying file. Directory of the file should
157 // already exist
158 // options: Configuration options for the proto log
159 //
160 // Returns:
161 // FileBackedProtoLog::CreateResult on success
162 // INVALID_ARGUMENT on an invalid option
163 // INTERNAL_ERROR on IO error
164 static libtextclassifier3::StatusOr<CreateResult> Create(
165 const Filesystem* filesystem, const std::string& file_path,
166 const Options& options);
167
168 // Not copyable
169 FileBackedProtoLog(const FileBackedProtoLog&) = delete;
170 FileBackedProtoLog& operator=(const FileBackedProtoLog&) = delete;
171
172 // Reads out a proto located at file_offset from the file.
173 //
174 // Returns:
175 // A proto on success
176 // NOT_FOUND if the proto at the given offset has been erased
177 // OUT_OF_RANGE_ERROR if file_offset exceeds file size
178 // INTERNAL_ERROR on IO error
179 libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
180
181 // An iterator helping to find offsets of all the protos in file.
182 // Example usage:
183 //
184 // while (iterator.Advance().ok()) {
185 // int64_t offset = iterator.GetOffset();
186 // // Do something
187 // }
188 class Iterator {
189 public:
190 explicit Iterator(const Filesystem& filesystem,
191 const std::string& file_path, int64_t initial_offset,
192 MemoryMappedFile&& mmapped_file);
193
194 // Advances to the position of next proto whether it has been erased or not.
195 //
196 // Returns:
197 // OK on success
198 // OUT_OF_RANGE_ERROR if it reaches the end
199 // INTERNAL_ERROR on IO error
200 libtextclassifier3::Status Advance();
201
202 // Returns the file offset of current proto.
203 int64_t GetOffset();
204
205 private:
206 static constexpr int64_t kInvalidOffset = -1;
207 // Used to read proto metadata
208 MemoryMappedFile mmapped_file_;
209 // Offset of first proto
210 int64_t initial_offset_;
211 int64_t current_offset_;
212 int64_t file_size_;
213 };
214
215 // Returns an iterator of current proto log. The caller needs to keep the
216 // proto log unchanged while using the iterator, otherwise unexpected
217 // behaviors could happen.
218 libtextclassifier3::StatusOr<Iterator> GetIterator();
219
220 private:
221 // Object can only be instantiated via the ::Create factory.
222 FileBackedProtoLog(const Filesystem* filesystem, const std::string& file_path,
223 std::unique_ptr<Header> header);
224
225 // Initializes a new proto log.
226 //
227 // Returns:
228 // std::unique_ptr<CreateResult> on success
229 // INTERNAL_ERROR on IO error
230 static libtextclassifier3::StatusOr<CreateResult> InitializeNewFile(
231 const Filesystem* filesystem, const std::string& file_path,
232 const Options& options);
233
234 // Verifies that the existing proto log is in a good state. If not in a good
235 // state, then the proto log may be truncated to the last good state and
236 // content will be lost.
237 //
238 // Returns:
239 // std::unique_ptr<CreateResult> on success
240 // INTERNAL_ERROR on IO error or internal inconsistencies in the file
241 // INVALID_ARGUMENT_ERROR if options aren't consistent with previous
242 // instances
243 static libtextclassifier3::StatusOr<CreateResult> InitializeExistingFile(
244 const Filesystem* filesystem, const std::string& file_path,
245 const Options& options, int64_t file_size);
246
247 // Takes an initial checksum and updates it with the content between `start`
248 // and `end` offsets in the file.
249 //
250 // Returns:
251 // Crc of the content between `start`, inclusive, and `end`, exclusive.
252 // INTERNAL_ERROR on IO error
253 // INVALID_ARGUMENT_ERROR if start and end aren't within the file size
254 static libtextclassifier3::StatusOr<Crc32> ComputeChecksum(
255 const Filesystem* filesystem, const std::string& file_path,
256 Crc32 initial_crc, int64_t start, int64_t end);
257
IsEmptyBuffer(const char * buffer,int size)258 static bool IsEmptyBuffer(const char* buffer, int size) {
259 return std::all_of(buffer, buffer + size,
260 [](const char byte) { return byte == 0; });
261 }
262
263 // Helper function to get stored proto size from the metadata.
264 // Metadata format: 8 bits magic + 24 bits size
GetProtoSize(int metadata)265 static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
266
267 // Helper function to get stored proto magic from the metadata.
268 // Metadata format: 8 bits magic + 24 bits size
GetProtoMagic(int metadata)269 static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
270
271 // Reads out the metadata of a proto located at file_offset from the file.
272 //
273 // Returns:
274 // Proto's metadata on success
275 // OUT_OF_RANGE_ERROR if file_offset exceeds file_size
276 // INTERNAL_ERROR if the metadata is invalid or any IO errors happen
277 static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
278 MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
279
280 // Magic number added in front of every proto. Used when reading out protos
281 // as a first check for corruption in each entry in the file. Even if there is
282 // a corruption, the best we can do is roll back to our last recovery point
283 // and throw away un-flushed data. We can discard/reuse this byte if needed so
284 // that we have 4 bytes to store the size of protos, and increase the size of
285 // protos we support.
286 static constexpr uint8_t kProtoMagic = 0x5C;
287
288 // Chunks of the file to mmap at a time, so we don't mmap the entire file.
289 // Only used on 32-bit devices
290 static constexpr int kMmapChunkSize = 4 * 1024 * 1024; // 4MiB
291
292 ScopedFd fd_;
293 const Filesystem* const filesystem_;
294 const std::string file_path_;
295 std::unique_ptr<Header> header_;
296 };
297
298 template <typename ProtoT>
FileBackedProtoLog(const Filesystem * filesystem,const std::string & file_path,std::unique_ptr<Header> header)299 FileBackedProtoLog<ProtoT>::FileBackedProtoLog(const Filesystem* filesystem,
300 const std::string& file_path,
301 std::unique_ptr<Header> header)
302 : filesystem_(filesystem),
303 file_path_(file_path),
304 header_(std::move(header)) {
305 fd_.reset(filesystem_->OpenForAppend(file_path.c_str()));
306 }
307
308 template <typename ProtoT>
309 libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
Create(const Filesystem * filesystem,const std::string & file_path,const Options & options)310 FileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
311 const std::string& file_path,
312 const Options& options) {
313 if (options.max_proto_size <= 0) {
314 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
315 "options.max_proto_size must be greater than 0, was %d",
316 options.max_proto_size));
317 }
318
319 // Since we store the proto_size in 3 bytes, we can only support protos of up
320 // to 16MiB.
321 if (options.max_proto_size > constants::kMaxProtoSize) {
322 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
323 "options.max_proto_size must be under 16MiB, was %d",
324 options.max_proto_size));
325 }
326
327 if (!filesystem->FileExists(file_path.c_str())) {
328 return InitializeNewFile(filesystem, file_path, options);
329 }
330
331 int64_t file_size = filesystem->GetFileSize(file_path.c_str());
332 if (file_size == Filesystem::kBadFileSize) {
333 return absl_ports::InternalError(
334 absl_ports::StrCat("Bad file size '", file_path, "'"));
335 }
336
337 if (file_size == 0) {
338 return InitializeNewFile(filesystem, file_path, options);
339 }
340
341 return InitializeExistingFile(filesystem, file_path, options, file_size);
342 }
343
344 template <typename ProtoT>
345 libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
InitializeNewFile(const Filesystem * filesystem,const std::string & file_path,const Options & options)346 FileBackedProtoLog<ProtoT>::InitializeNewFile(const Filesystem* filesystem,
347 const std::string& file_path,
348 const Options& options) {
349 // Create the header
350 std::unique_ptr<Header> header = std::make_unique<Header>();
351 header->compress = options.compress;
352 header->max_proto_size = options.max_proto_size;
353 header->header_checksum = header->CalculateHeaderChecksum();
354
355 if (!filesystem->Write(file_path.c_str(), header.get(), sizeof(Header))) {
356 return absl_ports::InternalError(
357 absl_ports::StrCat("Failed to write header for file: ", file_path));
358 }
359
360 CreateResult create_result = {
361 std::unique_ptr<FileBackedProtoLog<ProtoT>>(
362 new FileBackedProtoLog<ProtoT>(filesystem, file_path,
363 std::move(header))),
364 /*data_loss=*/DataLoss::NONE};
365
366 return create_result;
367 }
368
369 template <typename ProtoT>
370 libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
InitializeExistingFile(const Filesystem * filesystem,const std::string & file_path,const Options & options,int64_t file_size)371 FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
372 const std::string& file_path,
373 const Options& options,
374 int64_t file_size) {
375 if (file_size < sizeof(Header)) {
376 return absl_ports::InternalError(
377 absl_ports::StrCat("File header too short for: ", file_path));
378 }
379
380 std::unique_ptr<Header> header = std::make_unique<Header>();
381 if (!filesystem->PRead(file_path.c_str(), header.get(), sizeof(Header),
382 /*offset=*/0)) {
383 return absl_ports::InternalError(
384 absl_ports::StrCat("Failed to read header for file: ", file_path));
385 }
386
387 // Make sure the header is still valid before we use any of its values. This
388 // is covered by the header_checksum check below, but this is a quick check
389 // that can save us from an extra crc computation.
390 if (header->magic != Header::kMagic) {
391 return absl_ports::InternalError(
392 absl_ports::StrCat("Invalid header kMagic for file: ", file_path));
393 }
394
395 if (header->header_checksum != header->CalculateHeaderChecksum()) {
396 return absl_ports::InternalError(
397 absl_ports::StrCat("Invalid header checksum for: ", file_path));
398 }
399
400 if (header->compress != options.compress) {
401 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
402 "Inconsistent compress option, expected %d, actual %d",
403 header->compress, options.compress));
404 }
405
406 if (header->max_proto_size > options.max_proto_size) {
407 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
408 "Max proto size cannot be smaller than previous "
409 "instantiations, previous size %d, wanted size %d",
410 header->max_proto_size, options.max_proto_size));
411 }
412 header->max_proto_size = options.max_proto_size;
413
414 DataLoss data_loss = DataLoss::NONE;
415 ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
416 ComputeChecksum(filesystem, file_path, Crc32(),
417 sizeof(Header), file_size));
418
419 // Double check that the log checksum is the same as the one that was
420 // persisted last time. If not, we start recovery logic.
421 if (header->log_checksum != calculated_log_checksum.Get()) {
422 // Need to rewind the proto log since the checksums don't match.
423 // Worst case, we have to rewind the entire log back to just the header
424 int64_t last_known_good = sizeof(Header);
425
426 // Calculate the checksum of the log contents just up to the last rewind
427 // offset point. This will be valid if we just appended contents to the log
428 // without updating the checksum, and we can rewind back to this point
429 // safely.
430 ICING_ASSIGN_OR_RETURN(
431 calculated_log_checksum,
432 ComputeChecksum(filesystem, file_path, Crc32(), sizeof(Header),
433 header->rewind_offset));
434 if (header->log_checksum == calculated_log_checksum.Get()) {
435 // Check if it matches our last rewind state. If so, this becomes our last
436 // good state and we can safely truncate and recover from here.
437 last_known_good = header->rewind_offset;
438 data_loss = DataLoss::PARTIAL;
439 } else {
440 // Otherwise, we're going to truncate the entire log and this resets the
441 // checksum to an empty log state.
442 header->log_checksum = 0;
443 data_loss = DataLoss::COMPLETE;
444 }
445
446 if (!filesystem->Truncate(file_path.c_str(), last_known_good)) {
447 return absl_ports::InternalError(
448 absl_ports::StrCat("Error truncating file: ", file_path));
449 }
450
451 ICING_LOG(WARNING) << "Truncated '" << file_path << "' to size "
452 << last_known_good;
453 }
454
455 CreateResult create_result = {
456 std::unique_ptr<FileBackedProtoLog<ProtoT>>(
457 new FileBackedProtoLog<ProtoT>(filesystem, file_path,
458 std::move(header))),
459 data_loss};
460
461 return create_result;
462 }
463
464 template <typename ProtoT>
ComputeChecksum(const Filesystem * filesystem,const std::string & file_path,Crc32 initial_crc,int64_t start,int64_t end)465 libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
466 const Filesystem* filesystem, const std::string& file_path,
467 Crc32 initial_crc, int64_t start, int64_t end) {
468 ICING_ASSIGN_OR_RETURN(
469 MemoryMappedFile mmapped_file,
470 MemoryMappedFile::Create(*filesystem, file_path,
471 MemoryMappedFile::Strategy::READ_ONLY));
472 Crc32 new_crc(initial_crc.Get());
473
474 if (start < 0) {
475 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
476 "Starting checksum offset of file '%s' must be greater than 0, was "
477 "%lld",
478 file_path.c_str(), static_cast<long long>(start)));
479 }
480
481 if (end < start) {
482 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
483 "Ending checksum offset of file '%s' must be greater than start "
484 "'%lld', was '%lld'",
485 file_path.c_str(), static_cast<long long>(start),
486 static_cast<long long>(end)));
487 }
488
489 int64_t file_size = filesystem->GetFileSize(file_path.c_str());
490 if (end > file_size) {
491 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
492 "Ending checksum offset of file '%s' must be within "
493 "file size of %lld, was %lld",
494 file_path.c_str(), static_cast<long long>(file_size),
495 static_cast<long long>(end)));
496 }
497
498 Architecture architecture = GetArchitecture();
499 switch (architecture) {
500 case Architecture::BIT_64: {
501 // Don't mmap in chunks here since mmapping can be harmful on 64-bit
502 // devices where mmap/munmap calls need the mmap write semaphore, which
503 // blocks mmap/munmap/mprotect and all page faults from executing while
504 // they run. On 64-bit devices, this doesn't actually load into memory, it
505 // just makes the file faultable. So the whole file should be ok.
506 // b/185822878.
507 ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
508 auto mmap_str = std::string_view(mmapped_file.region(), end - start);
509 new_crc.Append(mmap_str);
510 break;
511 }
512 case Architecture::BIT_32:
513 [[fallthrough]];
514 case Architecture::UNKNOWN: {
515 // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
516 // much memory at once. If we're unknown, then also chunk it because we're
517 // not sure what the device can handle.
518 for (int i = start; i < end; i += kMmapChunkSize) {
519 // Don't read past the file size.
520 int next_chunk_size = kMmapChunkSize;
521 if ((i + kMmapChunkSize) >= end) {
522 next_chunk_size = end - i;
523 }
524
525 ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
526
527 auto mmap_str =
528 std::string_view(mmapped_file.region(), next_chunk_size);
529 new_crc.Append(mmap_str);
530 }
531 break;
532 }
533 }
534
535 return new_crc;
536 }
537
538 template <typename ProtoT>
ReadProto(int64_t file_offset)539 libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
540 int64_t file_offset) const {
541 int64_t file_size = filesystem_->GetFileSize(fd_.get());
542 ICING_ASSIGN_OR_RETURN(
543 MemoryMappedFile mmapped_file,
544 MemoryMappedFile::Create(*filesystem_, file_path_,
545 MemoryMappedFile::Strategy::READ_ONLY));
546 if (file_offset >= file_size) {
547 // file_size points to the next byte to write at, so subtract one to get
548 // the inclusive, actual size of file.
549 return absl_ports::OutOfRangeError(
550 IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
551 "out of range of the file size, %lld",
552 static_cast<long long>(file_offset),
553 static_cast<long long>(file_size - 1)));
554 }
555
556 // Read out the metadata
557 ICING_ASSIGN_OR_RETURN(
558 int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size));
559
560 // Copy out however many bytes it says the proto is
561 int stored_size = GetProtoSize(metadata);
562
563 ICING_RETURN_IF_ERROR(
564 mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
565
566 if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) {
567 return absl_ports::NotFoundError("The proto data has been erased.");
568 }
569
570 google::protobuf::io::ArrayInputStream proto_stream(mmapped_file.mutable_region(),
571 stored_size);
572
573 // Deserialize proto
574 ProtoT proto;
575 if (header_->compress) {
576 protobuf_ports::GzipInputStream decompress_stream(&proto_stream);
577 proto.ParseFromZeroCopyStream(&decompress_stream);
578 } else {
579 proto.ParseFromZeroCopyStream(&proto_stream);
580 }
581
582 return proto;
583 }
584
585 template <typename ProtoT>
Iterator(const Filesystem & filesystem,const std::string & file_path,int64_t initial_offset,MemoryMappedFile && mmapped_file)586 FileBackedProtoLog<ProtoT>::Iterator::Iterator(const Filesystem& filesystem,
587 const std::string& file_path,
588 int64_t initial_offset,
589 MemoryMappedFile&& mmapped_file)
590 : mmapped_file_(std::move(mmapped_file)),
591 initial_offset_(initial_offset),
592 current_offset_(kInvalidOffset),
593 file_size_(filesystem.GetFileSize(file_path.c_str())) {
594 if (file_size_ == Filesystem::kBadFileSize) {
595 // Fails all Advance() calls
596 file_size_ = 0;
597 }
598 }
599
600 template <typename ProtoT>
Advance()601 libtextclassifier3::Status FileBackedProtoLog<ProtoT>::Iterator::Advance() {
602 if (current_offset_ == kInvalidOffset) {
603 // First Advance() call
604 current_offset_ = initial_offset_;
605 } else {
606 // Jumps to the next proto position
607 ICING_ASSIGN_OR_RETURN(
608 int metadata,
609 ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
610 current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
611 }
612
613 if (current_offset_ < file_size_) {
614 return libtextclassifier3::Status::OK;
615 } else {
616 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
617 "The next proto offset, %lld, is out of file range [0, %lld)",
618 static_cast<long long>(current_offset_),
619 static_cast<long long>(file_size_)));
620 }
621 }
622
623 template <typename ProtoT>
GetOffset()624 int64_t FileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
625 return current_offset_;
626 }
627
628 template <typename ProtoT>
629 libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::Iterator>
GetIterator()630 FileBackedProtoLog<ProtoT>::GetIterator() {
631 ICING_ASSIGN_OR_RETURN(
632 MemoryMappedFile mmapped_file,
633 MemoryMappedFile::Create(*filesystem_, file_path_,
634 MemoryMappedFile::Strategy::READ_ONLY));
635 return Iterator(*filesystem_, file_path_,
636 /*initial_offset=*/sizeof(Header), std::move(mmapped_file));
637 }
638
639 template <typename ProtoT>
ReadProtoMetadata(MemoryMappedFile * mmapped_file,int64_t file_offset,int64_t file_size)640 libtextclassifier3::StatusOr<int> FileBackedProtoLog<ProtoT>::ReadProtoMetadata(
641 MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size) {
642 // Checks file_offset
643 if (file_offset >= file_size) {
644 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
645 "offset, %lld, is out of file range [0, %lld)",
646 static_cast<long long>(file_offset),
647 static_cast<long long>(file_size)));
648 }
649 int metadata;
650 int metadata_size = sizeof(metadata);
651 if (file_offset + metadata_size >= file_size) {
652 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
653 "Wrong metadata offset %lld, metadata doesn't fit in "
654 "with file range [0, %lld)",
655 static_cast<long long>(file_offset),
656 static_cast<long long>(file_size)));
657 }
658 // Reads metadata
659 ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
660 memcpy(&metadata, mmapped_file->region(), metadata_size);
661 // Checks magic number
662 uint8_t stored_k_proto_magic = GetProtoMagic(metadata);
663 if (stored_k_proto_magic != kProtoMagic) {
664 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
665 "Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic,
666 stored_k_proto_magic));
667 }
668 return metadata;
669 }
670
671 } // namespace lib
672 } // namespace icing
673
674 #endif // ICING_FILE_FILE_BACKED_PROTO_LOG_H_
675