xref: /aosp_15_r20/external/icing/icing/file/file-backed-vector.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // A file-backed vector that can store fixed-width elements. It provides
16 // built-in support for checksums to verify data integrity and an in-memory
17 // cache for fast read/writes.
18 //
19 // If the file is corrupted/in an invalid state, all contents are lost, i.e.
20 // there is no clear recovery path other than recreating/repopulating the
21 // contents.
22 //
23 // Note on Performance:
24 // The class keeps the vector in a mmapped area. This allows users to specify
25 // which MemoryMappedFile::Strategy they wish to use with this class. The vector
26 // will implicitly grow when the user tries to access an element beyond its
27 // current size. Growing happens in 16KiB chunks, up to a maximum size of 1MiB.
28 //
29 // Note on Checksumming:
30 // Checksumming happens lazily. We do tail checksums to avoid recalculating the
31 // checksum of the entire file on each modfification. A full checksum will be
32 // computed/verified at creation time, when persisting to disk, or whenever the
33 // user manually calls ComputeChecksum(). A separate header checksum is kept for
34 // a quick integrity check.
35 //
36 //
37 // Usage:
38 // RETURN_OR_ASSIGN(auto vector, FileBackedVector<char>::Create(...));
39 //
40 // ICING_RETURN_IF_ERROR(vector->Set(0, 'a'));
41 // ICING_RETURN_IF_ERROR(vector->Set(1, 'b'));
42 // ICING_RETURN_IF_ERROR(vector->Set(2, 'c'));
43 //
44 // vector->num_elements();  // Returns 3
45 //
46 // vector->At(2);  // Returns 'c'
47 //
48 // vector->TruncateTo(1);
49 // vector->num_elements();  // Returns 1
50 // vector->At(0);  // Returns 'a'
51 //
52 // vector->ComputeChecksum();  // Force a checksum update and gets the checksum
53 //
54 // vector->PersistToDisk();  // Persist contents to disk.
55 
56 #ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
57 #define ICING_FILE_FILE_BACKED_VECTOR_H_
58 
59 #include <sys/mman.h>
60 #include <unistd.h>
61 
62 #include <algorithm>
63 #include <cinttypes>
64 #include <cstdint>
65 #include <cstring>
66 #include <functional>
67 #include <limits>
68 #include <memory>
69 #include <string>
70 #include <utility>
71 #include <vector>
72 
73 #include "icing/text_classifier/lib3/utils/base/status.h"
74 #include "icing/text_classifier/lib3/utils/base/statusor.h"
75 #include "icing/absl_ports/canonical_errors.h"
76 #include "icing/absl_ports/str_cat.h"
77 #include "icing/file/filesystem.h"
78 #include "icing/file/memory-mapped-file.h"
79 #include "icing/legacy/core/icing-string-util.h"
80 #include "icing/portable/platform.h"
81 #include "icing/util/crc32.h"
82 #include "icing/util/logging.h"
83 #include "icing/util/math-util.h"
84 #include "icing/util/status-macros.h"
85 
86 namespace icing {
87 namespace lib {
88 
89 template <typename T>
90 class FileBackedVector {
91  public:
92   class MutableArrayView;
93   class MutableView;
94 
95   // Header stored at the beginning of the file before the rest of the vector
96   // elements. Stores metadata on the vector.
97   struct Header {
98     // Static assert constants.
99     static constexpr int32_t kHeaderSize = 24;
100     static constexpr int32_t kHeaderChecksumOffset = 16;
101 
102     static constexpr int32_t kMagic = 0x8bbbe237;
103 
104     // Holds the magic as quick sanity check against file corruption
105     int32_t magic;
106 
107     // Byte size of each element in the vector
108     int32_t element_size;
109 
110     // Number of elements currently in the vector
111     int32_t num_elements;
112 
113     // Checksum of the vector elements, doesn't include the header fields.
114     //
115     // TODO(cassiewang): Add a checksum state that can track if the checksum is
116     // fresh or stale. This lets us short circuit checksum computations if we
117     // know the checksum is fresh.
118     uint32_t vector_checksum;
119 
120     // Must be below all actual header content fields and above the padding
121     // field. Contains the crc checksum of the preceding fields.
122     uint32_t header_checksum;
123 
124     // This field has no actual meaning here but is just used as padding for the
125     // struct so the size of the struct can be a multiple of 8. Doing this makes
126     // the address right after the header a multiple of 8 and prevents a ubsan
127     // misalign-pointer-use error (go/ubsan).
128     //
129     // NOTE: please remove this when adding new fields and re-assert that the
130     // size is multiple of 8.
131     int32_t padding_for_ptr_alignment;
132 
CalculateHeaderChecksumHeader133     uint32_t CalculateHeaderChecksum() const {
134       // Sanity check that the memory layout matches the disk layout.
135       static_assert(std::is_standard_layout<FileBackedVector::Header>::value,
136                     "");
137       static_assert(sizeof(FileBackedVector::Header) == kHeaderSize, "");
138       static_assert(
139           sizeof(FileBackedVector::Header) % sizeof(void*) == 0,
140           "Header has insufficient padding for void* pointer alignment");
141       static_assert(offsetof(FileBackedVector::Header, header_checksum) ==
142                         kHeaderChecksumOffset,
143                     "");
144 
145       return Crc32(std::string_view(
146                        reinterpret_cast<const char*>(this),
147                        offsetof(FileBackedVector::Header, header_checksum)))
148           .Get();
149     }
150   };
151 
152   // Absolute max file size for FileBackedVector.
153   // - We memory map the whole file, so file size ~= memory size.
154   // - On 32-bit platform, the virtual memory address space is 4GB. To avoid
155   //   exhausting the memory, set smaller file size limit for 32-bit platform.
156 #ifdef ICING_ARCH_BIT_64
157   static constexpr int32_t kMaxFileSize =
158       std::numeric_limits<int32_t>::max();  // 2^31-1 Bytes, ~2.1 GB
159 #else
160   static constexpr int32_t kMaxFileSize =
161       (1 << 28) + Header::kHeaderSize;  // 2^28 + 12 Bytes, ~256 MiB
162 #endif
163 
164   // Size of element type T. The value is same as sizeof(T), while we should
165   // avoid using sizeof(T) in our codebase to prevent unexpected unsigned
166   // integer casting.
167   static constexpr int32_t kElementTypeSize = static_cast<int32_t>(sizeof(T));
168   static_assert(sizeof(T) <= (1 << 10));
169 
170   // Absolute max # of elements allowed. Since we are using int32_t to store
171   // num_elements, max value is 2^31-1. Still the actual max # of elements are
172   // determined by max_file_size, kMaxFileSize, kElementTypeSize, and
173   // Header::kHeaderSize.
174   static constexpr int32_t kMaxNumElements =
175       std::numeric_limits<int32_t>::max();
176 
177   // Creates a new FileBackedVector to read/write content to.
178   //
179   // filesystem: Object to make system level calls
180   // file_path : Specifies the file to persist the vector to; must be a path
181   //             within a directory that already exists.
182   // mmap_strategy : Strategy/optimizations to access the content in the vector,
183   //                 see MemoryMappedFile::Strategy for more details
184   // max_file_size: Maximum file size for FileBackedVector, default
185   //                kMaxFileSize. Note that this value won't be written into the
186   //                header, so maximum file size will always be specified in
187   //                runtime and the caller should make sure the value is correct
188   //                and reasonable. Also it will be cached in MemoryMappedFile
189   //                member, so we can always call mmapped_file_->max_file_size()
190   //                to get it.
191   //                The range should be in
192   //                [Header::kHeaderSize + kElementTypeSize, kMaxFileSize], and
193   //                (max_file_size - Header::kHeaderSize) / kElementTypeSize is
194   //                max # of elements that can be stored.
195   // pre_mapping_mmap_size: pre-mapping size of MemoryMappedFile, default 0.
196   //                        Pre-mapping a large memory region to the file and
197   //                        grow the underlying file later, so we can avoid
198   //                        remapping too frequently and reduce the cost of
199   //                        system call and memory paging after remap. The user
200   //                        should specify reasonable size to save remapping
201   //                        cost and avoid exhausting the memory at once in the
202   //                        beginning.
203   //                        Note: if the file exists and pre_mapping_mmap_size
204   //                        is smaller than file_size - Header::kHeaderSize,
205   //                        then it still pre-maps file_size -
206   //                        Header::kHeaderSize to make all existing elements
207   //                        available.
208   // TODO(b/247671531): figure out pre_mapping_mmap_size for each
209   //                    FileBackedVector use case.
210   //
211   // Return:
212   //   FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
213   //                             checksum.
214   //   INTERNAL_ERROR on I/O errors.
215   //   INVALID_ARGUMENT_ERROR if max_file_size is incorrect.
216   //   UNIMPLEMENTED_ERROR if created with strategy READ_WRITE_MANUAL_SYNC.
217   static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
218   Create(const Filesystem& filesystem, const std::string& file_path,
219          MemoryMappedFile::Strategy mmap_strategy,
220          int32_t max_file_size = kMaxFileSize,
221          int32_t pre_mapping_mmap_size = 0);
222 
223   // Deletes the FileBackedVector
224   //
225   // filesystem: Object to make system level calls
226   // file_path : Specifies the file the vector is persisted to.
227   static libtextclassifier3::Status Delete(const Filesystem& filesystem,
228                                            const std::string& file_path);
229 
230   // Not copyable
231   FileBackedVector(const FileBackedVector&) = delete;
232   FileBackedVector& operator=(const FileBackedVector&) = delete;
233 
234   // If the vector was created with
235   // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, then changes will be
236   // synced by the system and the checksum will be updated.
237   ~FileBackedVector();
238 
239   // Gets a copy of the element at idx.
240   //
241   // This is useful if you think the FileBackedVector may grow before you need
242   // to access this return value. When the FileBackedVector grows, the
243   // underlying mmap will be unmapped and remapped, which will invalidate any
244   // pointers to the previously mapped region. Getting a copy will avoid
245   // referencing the now-invalidated region.
246   //
247   // Returns:
248   //   OUT_OF_RANGE_ERROR if idx < 0 or idx >= num_elements()
249   libtextclassifier3::StatusOr<T> GetCopy(int32_t idx) const;
250 
251   // Gets an immutable pointer to the element at idx.
252   //
253   // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the pointer
254   // returned by Get.
255   //
256   // This is useful if you do not think the FileBackedVector will grow before
257   // you need to reference this value, and you want to avoid a copy. When the
258   // FileBackedVector grows, the underlying mmap will be unmapped and remapped,
259   // which will invalidate this pointer to the previously mapped region.
260   //
261   // Returns:
262   //   OUT_OF_RANGE_ERROR if idx < 0 or idx >= num_elements()
263   libtextclassifier3::StatusOr<const T*> Get(int32_t idx) const;
264 
265   // Gets a MutableView to the element at idx.
266   //
267   // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the
268   // reference returned by MutableView::Get().
269   //
270   // This is useful if you do not think the FileBackedVector will grow before
271   // you need to reference this value, and you want to mutate the underlying
272   // data directly. When the FileBackedVector grows, the underlying mmap will be
273   // unmapped and remapped, which will invalidate this MutableView to the
274   // previously mapped region.
275   //
276   // Returns:
277   //   OUT_OF_RANGE_ERROR if idx < 0 or idx >= num_elements()
278   libtextclassifier3::StatusOr<MutableView> GetMutable(int32_t idx);
279 
280   // Gets a MutableArrayView to the elements at range [idx, idx + len).
281   //
282   // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the
283   // reference/pointer returned by MutableArrayView::operator[]/data().
284   //
285   // This is useful if you do not think the FileBackedVector will grow before
286   // you need to reference this value, and you want to mutate the underlying
287   // data directly. When the FileBackedVector grows, the underlying mmap will be
288   // unmapped and remapped, which will invalidate this MutableArrayView to the
289   // previously mapped region.
290   //
291   // Returns:
292   //   OUT_OF_RANGE_ERROR if idx < 0 or idx + len > num_elements()
293   libtextclassifier3::StatusOr<MutableArrayView> GetMutable(int32_t idx,
294                                                             int32_t len);
295 
296   // Writes the value at idx.
297   //
298   // May grow the underlying file and mmapped region as needed to fit the new
299   // value. If it does grow, then any pointers/references to previous values
300   // returned from Get/GetMutable/Allocate may be invalidated.
301   //
302   // Returns:
303   //   OUT_OF_RANGE_ERROR if idx < 0 or idx > kMaxIndex or file cannot be grown
304   //                      to fit idx + 1 elements
305   libtextclassifier3::Status Set(int32_t idx, const T& value);
306 
307   // Set [idx, idx + len) to a single value.
308   //
309   // May grow the underlying file and mmapped region as needed to fit the new
310   // value. If it does grow, then any pointers/references to previous values
311   // returned from Get/GetMutable/Allocate may be invalidated.
312   //
313   // Returns:
314   //   OUT_OF_RANGE_ERROR if idx < 0 or idx + len > kMaxNumElements or file
315   //                      cannot be grown to fit idx + len elements
316   libtextclassifier3::Status Set(int32_t idx, int32_t len, const T& value);
317 
318   // Appends the value to the end of the vector.
319   //
320   // May grow the underlying file and mmapped region as needed to fit the new
321   // value. If it does grow, then any pointers/references to previous values
322   // returned from Get/GetMutable/Allocate may be invalidated.
323   //
324   // Returns:
325   //   OUT_OF_RANGE_ERROR if file cannot be grown (i.e. reach
326   //                      mmapped_file_->max_file_size())
Append(const T & value)327   libtextclassifier3::Status Append(const T& value) {
328     return Set(header()->num_elements, value);
329   }
330 
331   // Allocates spaces with given length in the end of the vector and returns a
332   // MutableArrayView to the space.
333   //
334   // May grow the underlying file and mmapped region as needed to fit the new
335   // value. If it does grow, then any pointers/references to previous values
336   // returned from Get/GetMutable/Allocate may be invalidated.
337   //
338   // WARNING: Subsequent calls to Set/Append/Allocate may invalidate the
339   // reference/pointer returned by MutableArrayView::operator[]/data().
340   //
341   // This is useful if you do not think the FileBackedVector will grow before
342   // you need to reference this value, and you want to allocate adjacent spaces
343   // for multiple elements and mutate the underlying data directly. When the
344   // FileBackedVector grows, the underlying mmap will be unmapped and remapped,
345   // which will invalidate this MutableArrayView to the previously mapped
346   // region.
347   //
348   // Returns:
349   //   OUT_OF_RANGE_ERROR if len <= 0 or file cannot be grown (i.e. reach
350   //                      mmapped_file_->max_file_size())
351   libtextclassifier3::StatusOr<MutableArrayView> Allocate(int32_t len);
352 
353   // Resizes to first len elements. The crc is cleared on truncation and will be
354   // updated on destruction, or once the client calls ComputeChecksum() or
355   // PersistToDisk().
356   //
357   // Returns:
358   //   OUT_OF_RANGE_ERROR if len < 0 or len >= num_elements()
359   libtextclassifier3::Status TruncateTo(int32_t new_num_elements);
360 
361   // Sorts the vector within range [begin_idx, end_idx).
362   // It handles SetDirty properly for the file-backed-vector.
363   //
364   // Returns:
365   //   OUT_OF_RANGE_ERROR if (0 <= begin_idx < end_idx <= num_elements()) does
366   //                      not hold
367   libtextclassifier3::Status Sort(int32_t begin_idx, int32_t end_idx);
368 
369   // Mark idx as changed iff idx < changes_end_, so later ComputeChecksum() can
370   // update checksum by the cached changes without going over [0, changes_end_).
371   //
372   // If the buffer size exceeds kPartialCrcLimitDiv, then clear all change
373   // buffers and set changes_end_ as 0, indicating that the checksum should be
374   // recomputed from idx 0 (starting from the beginning). Otherwise cache the
375   // change.
376   void SetDirty(int32_t idx);
377 
378   // Flushes content to underlying file.
379   //
380   // Returns:
381   //   OK on success
382   //   INTERNAL on I/O error
383   libtextclassifier3::Status PersistToDisk();
384 
385   // Calculates and returns the disk usage in bytes. Rounds up to the nearest
386   // block size.
387   //
388   // Returns:
389   //   Disk usage on success
390   //   INTERNAL_ERROR on IO error
391   libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
392 
393   // Returns the file size of the all the elements held in the vector. File size
394   // is in bytes. This excludes the size of any internal metadata of the vector,
395   // e.g. the vector's header.
396   //
397   // Returns:
398   //   File size on success
399   //   INTERNAL_ERROR on IO error
400   libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
401 
402   // Calculates the checksum of the vector contents and updates the header to
403   // hold this updated value.
404   //
405   // Returns:
406   //   Checksum of the vector on success
407   //   INTERNAL_ERROR on IO error
408   libtextclassifier3::StatusOr<Crc32> UpdateChecksum();
409 
410   // Calculates the checksum of the vector contents and returns it. Does NOT
411   // update the header.
412   //
413   // Returns:
414   //   Checksum of the vector on success
415   //   INTERNAL_ERROR on IO error
416   Crc32 GetChecksum() const;
417 
418   // Accessors.
array()419   const T* array() const {
420     return reinterpret_cast<const T*>(mmapped_file_->region() + sizeof(Header));
421   }
422 
num_elements()423   int32_t num_elements() const { return header()->num_elements; }
424 
425  public:
426   class MutableArrayView {
427    public:
428     const T& operator[](int32_t idx) const { return data_[idx]; }
429     T& operator[](int32_t idx) {
430       SetDirty(idx);
431       return data_[idx];
432     }
433 
data()434     const T* data() const { return data_; }
435 
size()436     int32_t size() const { return len_; }
437 
438     // Sets the mutable array slice (starting at idx) by the given element
439     // array. It handles SetDirty properly for the file-backed-vector when
440     // modifying elements.
441     //
442     // REQUIRES: arr is valid && arr_len >= 0 && idx >= 0 && idx + arr_len <=
443     //           size(), otherwise the behavior is undefined.
SetArray(int32_t idx,const T * arr,int32_t arr_len)444     void SetArray(int32_t idx, const T* arr, int32_t arr_len) {
445       for (int32_t i = 0; i < arr_len; ++i) {
446         SetDirty(idx + i);
447         data_[idx + i] = arr[i];
448       }
449     }
450 
451     // Fills the mutable array slice, starting at idx with the given length, by
452     // the given value. It handles SetDirty properly for the file-backed-vector
453     // when modifying elements.
454     //
455     // REQUIRES: len >= 0 && idx >= 0 && idx + len <= size(), otherwise the
456     //           behavior is undefined.
Fill(int32_t idx,int32_t len,const T & value)457     void Fill(int32_t idx, int32_t len, const T& value) {
458       for (int32_t i = 0; i < len; ++i) {
459         SetDirty(idx + i);
460         data_[idx + i] = value;
461       }
462     }
463 
464    private:
MutableArrayView(FileBackedVector<T> * vector,T * data,int32_t len)465     MutableArrayView(FileBackedVector<T>* vector, T* data, int32_t len)
466         : vector_(vector),
467           data_(data),
468           original_idx_(data - vector->array()),
469           len_(len) {}
470 
SetDirty(int32_t idx)471     void SetDirty(int32_t idx) { vector_->SetDirty(original_idx_ + idx); }
472 
473     // Does not own. For SetDirty only.
474     FileBackedVector<T>* vector_;
475 
476     // data_ points at vector_->mutable_array()[original_idx_]
477     T* data_;
478     int32_t original_idx_;
479     int32_t len_;
480 
481     friend class FileBackedVector;
482   };
483 
484   class MutableView {
485    public:
Get()486     const T& Get() const { return mutable_array_view_[0]; }
Get()487     T& Get() { return mutable_array_view_[0]; }
488 
489    private:
MutableView(FileBackedVector<T> * vector,T * data)490     MutableView(FileBackedVector<T>* vector, T* data)
491         : mutable_array_view_(vector, data, 1) {}
492 
493     MutableArrayView mutable_array_view_;
494 
495     friend class FileBackedVector;
496   };
497 
498  private:
499   // We track partial updates to the array for crc updating. This
500   // requires extra memory to keep track of original buffers but
501   // allows for much faster crc re-computation. This is the frac limit
502   // of byte len after which we will discard recorded changes and
503   // recompute the entire crc instead.
504   static constexpr int32_t kPartialCrcLimitDiv = 8;  // limit is 1/8th
505 
506   // Grow file by at least this many elements if array is growable.
507   static constexpr int64_t kGrowElements = 1u << 14;  // 16K
508 
509   // Absolute max index allowed.
510   static constexpr int32_t kMaxIndex = kMaxNumElements - 1;
511 
512   // Can only be created through the factory ::Create function
513   explicit FileBackedVector(const Filesystem& filesystem,
514                             const std::string& file_path,
515                             MemoryMappedFile&& mmapped_file);
516 
517   // Initialize a new FileBackedVector, and create the file.
518   static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
519   InitializeNewFile(const Filesystem& filesystem, const std::string& file_path,
520                     MemoryMappedFile::Strategy mmap_strategy,
521                     int32_t max_file_size, int32_t pre_mapping_mmap_size);
522 
523   // Initialize a FileBackedVector from an existing file.
524   static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
525   InitializeExistingFile(const Filesystem& filesystem,
526                          const std::string& file_path,
527                          MemoryMappedFile::Strategy mmap_strategy,
528                          int64_t file_size, int32_t max_file_size,
529                          int32_t pre_mapping_mmap_size);
530 
531   // Grows the underlying file to hold at least num_elements
532   //
533   // Returns:
534   //   OUT_OF_RANGE_ERROR if we can't grow to the specified size
535   libtextclassifier3::Status GrowIfNecessary(int32_t num_elements);
536 
mutable_array()537   T* mutable_array() const {
538     return reinterpret_cast<T*>(mmapped_file_->mutable_region() +
539                                 sizeof(Header));
540   }
541 
header()542   const Header* header() const {
543     return reinterpret_cast<const Header*>(mmapped_file_->region());
544   }
header()545   Header* header() {
546     return reinterpret_cast<Header*>(mmapped_file_->mutable_region());
547   }
548 
549   // Cached constructor params.
550   const Filesystem* const filesystem_;
551   const std::string file_path_;
552   std::unique_ptr<MemoryMappedFile> mmapped_file_;
553 
554   // Offset before which all the elements have been included in the calculation
555   // of crc at the time it was calculated.
556   int32_t changes_end_ = 0;
557 
558   // Offset of changes that have happened since the last crc update between [0,
559   // changes_end_).
560   std::vector<int32_t> changes_;
561 
562   // Buffer of the original elements that have been changed since the last crc
563   // update. Will be cleared if the size grows too big.
564   std::string saved_original_buffer_;
565 };
566 
567 template <typename T>
568 constexpr int32_t FileBackedVector<T>::kMaxFileSize;
569 
570 template <typename T>
571 constexpr int32_t FileBackedVector<T>::kElementTypeSize;
572 
573 template <typename T>
574 constexpr int32_t FileBackedVector<T>::kMaxNumElements;
575 
576 template <typename T>
577 constexpr int32_t FileBackedVector<T>::kPartialCrcLimitDiv;
578 
579 template <typename T>
580 constexpr int64_t FileBackedVector<T>::kGrowElements;
581 
582 template <typename T>
583 constexpr int32_t FileBackedVector<T>::kMaxIndex;
584 
585 template <typename T>
586 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
Create(const Filesystem & filesystem,const std::string & file_path,MemoryMappedFile::Strategy mmap_strategy,int32_t max_file_size,int32_t pre_mapping_mmap_size)587 FileBackedVector<T>::Create(const Filesystem& filesystem,
588                             const std::string& file_path,
589                             MemoryMappedFile::Strategy mmap_strategy,
590                             int32_t max_file_size,
591                             int32_t pre_mapping_mmap_size) {
592   if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
593     // FileBackedVector's behavior of growing the file underneath the mmap is
594     // inherently broken with MAP_PRIVATE. Growing the vector requires extending
595     // the file size, then unmapping and then re-mmapping over the new, larger
596     // file. But when we unmap, we lose all the vector's contents if they
597     // weren't manually persisted. Either don't allow READ_WRITE_MANUAL_SYNC
598     // vectors from growing, or make users aware of this somehow
599     return absl_ports::UnimplementedError(
600         "FileBackedVector currently doesn't support READ_WRITE_MANUAL_SYNC "
601         "mmap strategy.");
602   }
603 
604   if (max_file_size < Header::kHeaderSize + kElementTypeSize ||
605       max_file_size > kMaxFileSize) {
606     // FileBackedVector should be able to store at least 1 element, so
607     // max_file_size should be at least Header::kHeaderSize + kElementTypeSize.
608     return absl_ports::InvalidArgumentError(
609         "Invalid max file size for FileBackedVector");
610   }
611 
612   int64_t file_size = 0;
613   {
614     ScopedFd fd(filesystem.OpenForWrite(file_path.c_str()));
615     if (!fd.is_valid()) {
616       return absl_ports::InternalError(
617           absl_ports::StrCat("Failed to open ", file_path));
618     }
619 
620     file_size = filesystem.GetFileSize(fd.get());
621     if (file_size == Filesystem::kBadFileSize) {
622       return absl_ports::InternalError(
623           absl_ports::StrCat("Bad file size for file ", file_path));
624     }
625 
626     if (max_file_size < file_size) {
627       return absl_ports::InvalidArgumentError(
628           "Max file size should not be smaller than the existing file size");
629     }
630   }
631 
632   if (file_size == 0) {
633     return InitializeNewFile(filesystem, file_path, mmap_strategy,
634                              max_file_size, pre_mapping_mmap_size);
635   }
636   return InitializeExistingFile(filesystem, file_path, mmap_strategy, file_size,
637                                 max_file_size, pre_mapping_mmap_size);
638 }
639 
640 template <typename T>
641 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
InitializeNewFile(const Filesystem & filesystem,const std::string & file_path,MemoryMappedFile::Strategy mmap_strategy,int32_t max_file_size,int32_t pre_mapping_mmap_size)642 FileBackedVector<T>::InitializeNewFile(const Filesystem& filesystem,
643                                        const std::string& file_path,
644                                        MemoryMappedFile::Strategy mmap_strategy,
645                                        int32_t max_file_size,
646                                        int32_t pre_mapping_mmap_size) {
647   // Create header.
648   Header header = {FileBackedVector<T>::Header::kMagic, kElementTypeSize,
649                    /*num_elements=*/0, /*vector_checksum=*/0,
650                    /*header_checksum=*/0};
651   header.header_checksum = header.CalculateHeaderChecksum();
652 
653   // Create the mmapped file and write the new header to it.
654   // Determine the correct pre_mapping size. max_file_size is specified as the
655   // size of the whole file whereas pre_mapping_mmap_size is just the size of
656   // elements, not including the header.
657   int64_t pre_mapping_mmap_size_long =
658       std::min(max_file_size, pre_mapping_mmap_size + Header::kHeaderSize);
659   ICING_ASSIGN_OR_RETURN(
660       MemoryMappedFile mmapped_file,
661       MemoryMappedFile::Create(filesystem, file_path, mmap_strategy,
662                                max_file_size, /*pre_mapping_file_offset=*/0,
663                                pre_mapping_mmap_size_long));
664   ICING_RETURN_IF_ERROR(mmapped_file.GrowAndRemapIfNecessary(
665       /*new_file_offset=*/0, sizeof(Header)));
666   memcpy(mmapped_file.mutable_region(), &header, sizeof(header));
667 
668   return std::unique_ptr<FileBackedVector<T>>(
669       new FileBackedVector<T>(filesystem, file_path, std::move(mmapped_file)));
670 }
671 
672 template <typename T>
673 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
InitializeExistingFile(const Filesystem & filesystem,const std::string & file_path,MemoryMappedFile::Strategy mmap_strategy,int64_t file_size,int32_t max_file_size,int32_t pre_mapping_mmap_size)674 FileBackedVector<T>::InitializeExistingFile(
675     const Filesystem& filesystem, const std::string& file_path,
676     MemoryMappedFile::Strategy mmap_strategy, int64_t file_size,
677     int32_t max_file_size, int32_t pre_mapping_mmap_size) {
678   if (file_size < Header::kHeaderSize) {
679     return absl_ports::InternalError(
680         absl_ports::StrCat("File header too short for ", file_path));
681   }
682 
683   // Mmap the content of the file so its easier to access elements from the
684   // mmapped region. Although users can specify their own pre_mapping_mmap_size,
685   // we should make sure that the pre-map size is at least file_size to make all
686   // existing elements available.
687   // As noted above, pre_mapping_mmap_size is just the size of elements, not
688   // including the header. So we need to add the header size to make it
689   // comparable to file size.
690   int64_t pre_mapping_mmap_size_long = std::max(
691       file_size,
692       static_cast<int64_t>(std::min(
693           max_file_size, pre_mapping_mmap_size + Header::kHeaderSize)));
694   ICING_ASSIGN_OR_RETURN(MemoryMappedFile mmapped_file,
695                          MemoryMappedFile::Create(filesystem, file_path,
696                                                   mmap_strategy, max_file_size,
697                                                   /*pre_mapping_file_offset=*/0,
698                                                   pre_mapping_mmap_size_long));
699 
700   const Header* header = reinterpret_cast<const Header*>(mmapped_file.region());
701   // Make sure the header is still valid before we use any of its values. This
702   // should technically be included in the header_checksum check below, but this
703   // is a quick/fast check that can save us from an extra crc computation.
704   if (header->kMagic != FileBackedVector<T>::Header::kMagic) {
705     return absl_ports::InternalError(
706         absl_ports::StrCat("Invalid header kMagic for ", file_path));
707   }
708 
709   // Check header
710   if (header->header_checksum != header->CalculateHeaderChecksum()) {
711     return absl_ports::FailedPreconditionError(
712         absl_ports::StrCat("Invalid header crc for ", file_path));
713   }
714 
715   if (header->element_size != kElementTypeSize) {
716     return absl_ports::InternalError(IcingStringUtil::StringPrintf(
717         "Inconsistent element size, expected %d, actual %d", kElementTypeSize,
718         header->element_size));
719   }
720 
721   int64_t min_file_size =
722       static_cast<int64_t>(header->num_elements) * kElementTypeSize +
723       Header::kHeaderSize;
724   if (min_file_size > file_size) {
725     return absl_ports::InternalError(IcingStringUtil::StringPrintf(
726         "Inconsistent file size, expected %" PRId64 ", actual %" PRId64,
727         min_file_size, file_size));
728   }
729 
730   // Check vector contents
731   const char* vector_contents =
732       reinterpret_cast<const char*>(mmapped_file.region() + sizeof(Header));
733   Crc32 vector_checksum(std::string_view(
734       vector_contents, header->num_elements * kElementTypeSize));
735 
736   if (vector_checksum.Get() != header->vector_checksum) {
737     return absl_ports::FailedPreconditionError(
738         absl_ports::StrCat("Invalid vector contents for ", file_path));
739   }
740 
741   return std::unique_ptr<FileBackedVector<T>>(
742       new FileBackedVector<T>(filesystem, file_path, std::move(mmapped_file)));
743 }
744 
745 template <typename T>
Delete(const Filesystem & filesystem,const std::string & file_path)746 libtextclassifier3::Status FileBackedVector<T>::Delete(
747     const Filesystem& filesystem, const std::string& file_path) {
748   if (!filesystem.DeleteFile(file_path.c_str())) {
749     return absl_ports::InternalError(
750         absl_ports::StrCat("Failed to delete file: ", file_path));
751   }
752   return libtextclassifier3::Status::OK;
753 }
754 
755 template <typename T>
FileBackedVector(const Filesystem & filesystem,const std::string & file_path,MemoryMappedFile && mmapped_file)756 FileBackedVector<T>::FileBackedVector(const Filesystem& filesystem,
757                                       const std::string& file_path,
758                                       MemoryMappedFile&& mmapped_file)
759     : filesystem_(&filesystem),
760       file_path_(file_path),
761       mmapped_file_(
762           std::make_unique<MemoryMappedFile>(std::move(mmapped_file))),
763       changes_end_(header()->num_elements) {}
764 
765 template <typename T>
~FileBackedVector()766 FileBackedVector<T>::~FileBackedVector() {
767   if (mmapped_file_->strategy() ==
768       MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
769     if (!PersistToDisk().ok()) {
770       ICING_LOG(WARNING)
771           << "Failed to persist vector to disk while destructing "
772           << file_path_;
773     }
774   }
775 }
776 
777 template <typename T>
GetCopy(int32_t idx)778 libtextclassifier3::StatusOr<T> FileBackedVector<T>::GetCopy(
779     int32_t idx) const {
780   ICING_ASSIGN_OR_RETURN(const T* value, Get(idx));
781   return *value;
782 }
783 
784 template <typename T>
Get(int32_t idx)785 libtextclassifier3::StatusOr<const T*> FileBackedVector<T>::Get(
786     int32_t idx) const {
787   if (idx < 0) {
788     return absl_ports::OutOfRangeError(
789         IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
790   }
791 
792   if (idx >= header()->num_elements) {
793     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
794         "Index, %d, was greater than vector size, %d", idx,
795         header()->num_elements));
796   }
797 
798   return &array()[idx];
799 }
800 
801 template <typename T>
802 libtextclassifier3::StatusOr<typename FileBackedVector<T>::MutableView>
GetMutable(int32_t idx)803 FileBackedVector<T>::GetMutable(int32_t idx) {
804   if (idx < 0) {
805     return absl_ports::OutOfRangeError(
806         IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
807   }
808 
809   if (idx >= header()->num_elements) {
810     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
811         "Index, %d, was greater than vector size, %d", idx,
812         header()->num_elements));
813   }
814 
815   return MutableView(this, &mutable_array()[idx]);
816 }
817 
818 template <typename T>
819 libtextclassifier3::StatusOr<typename FileBackedVector<T>::MutableArrayView>
GetMutable(int32_t idx,int32_t len)820 FileBackedVector<T>::GetMutable(int32_t idx, int32_t len) {
821   if (idx < 0) {
822     return absl_ports::OutOfRangeError(
823         IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
824   }
825 
826   if (idx > header()->num_elements - len) {
827     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
828         "Index with len, %d %d, was greater than vector size, %d", idx, len,
829         header()->num_elements));
830   }
831 
832   return MutableArrayView(this, &mutable_array()[idx], len);
833 }
834 
835 template <typename T>
Set(int32_t idx,const T & value)836 libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx,
837                                                     const T& value) {
838   return Set(idx, 1, value);
839 }
840 
841 template <typename T>
Set(int32_t idx,int32_t len,const T & value)842 libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx, int32_t len,
843                                                     const T& value) {
844   if (idx < 0) {
845     return absl_ports::OutOfRangeError(
846         IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
847   }
848 
849   if (len <= 0) {
850     return absl_ports::OutOfRangeError("Invalid set length");
851   }
852 
853   if (idx > kMaxNumElements - len) {
854     return absl_ports::OutOfRangeError(
855         IcingStringUtil::StringPrintf("Length %d (with index %d), was too long "
856                                       "for max num elements allowed, %d",
857                                       len, idx, kMaxNumElements));
858   }
859 
860   ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + len));
861 
862   if (idx + len > header()->num_elements) {
863     header()->num_elements = idx + len;
864   }
865 
866   for (int32_t i = 0; i < len; ++i) {
867     if (array()[idx + i] == value) {
868       // No need to update
869       continue;
870     }
871 
872     SetDirty(idx + i);
873     mutable_array()[idx + i] = value;
874   }
875 
876   return libtextclassifier3::Status::OK;
877 }
878 
879 template <typename T>
880 libtextclassifier3::StatusOr<typename FileBackedVector<T>::MutableArrayView>
Allocate(int32_t len)881 FileBackedVector<T>::Allocate(int32_t len) {
882   if (len <= 0) {
883     return absl_ports::OutOfRangeError("Invalid allocate length");
884   }
885 
886   if (len > kMaxNumElements - header()->num_elements) {
887     return absl_ports::OutOfRangeError(
888         IcingStringUtil::StringPrintf("Cannot allocate %d elements", len));
889   }
890 
891   // Although header()->num_elements + len doesn't exceed kMaxNumElements, the
892   // actual max # of elements are determined by mmapped_file_->max_file_size(),
893   // kElementTypeSize, and kHeaderSize. Thus, it is still possible to fail to
894   // grow the file.
895   ICING_RETURN_IF_ERROR(GrowIfNecessary(header()->num_elements + len));
896 
897   int32_t start_idx = header()->num_elements;
898   header()->num_elements += len;
899 
900   return MutableArrayView(this, &mutable_array()[start_idx], len);
901 }
902 
903 template <typename T>
GrowIfNecessary(int32_t num_elements)904 libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
905     int32_t num_elements) {
906   if (kElementTypeSize == 0) {
907     // Growing is a no-op
908     return libtextclassifier3::Status::OK;
909   }
910 
911   if (num_elements <= header()->num_elements) {
912     return libtextclassifier3::Status::OK;
913   }
914 
915   if (num_elements > (mmapped_file_->max_file_size() - Header::kHeaderSize) /
916                          kElementTypeSize) {
917     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
918         "%d elements total size exceed maximum bytes of elements allowed, "
919         "%" PRId64 " bytes",
920         num_elements, mmapped_file_->max_file_size() - Header::kHeaderSize));
921   }
922 
923   int32_t least_file_size_needed =
924       Header::kHeaderSize + num_elements * kElementTypeSize;  // Won't overflow
925   if (least_file_size_needed <= mmapped_file_->available_size()) {
926     return libtextclassifier3::Status::OK;
927   }
928 
929   int64_t round_up_file_size_needed = math_util::RoundUpTo(
930       static_cast<int64_t>(least_file_size_needed),
931       static_cast<int64_t>(FileBackedVector<T>::kGrowElements) * kElementTypeSize);
932 
933   // Call GrowAndRemapIfNecessary. It handles file growth internally and remaps
934   // intelligently.
935   // We've ensured that least_file_size_needed (for num_elements) doesn't exceed
936   // mmapped_file_->max_file_size(), but it is still possible that
937   // round_up_file_size_needed exceeds it, so use the smaller value of them as
938   // new_mmap_size.
939   ICING_RETURN_IF_ERROR(mmapped_file_->GrowAndRemapIfNecessary(
940       /*new_file_offset=*/0,
941       /*new_mmap_size=*/std::min(round_up_file_size_needed,
942                                  mmapped_file_->max_file_size())));
943 
944   return libtextclassifier3::Status::OK;
945 }
946 
947 template <typename T>
TruncateTo(int32_t new_num_elements)948 libtextclassifier3::Status FileBackedVector<T>::TruncateTo(
949     int32_t new_num_elements) {
950   if (new_num_elements < 0) {
951     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
952         "Truncated length %d must be >= 0", new_num_elements));
953   }
954 
955   if (new_num_elements >= header()->num_elements) {
956     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
957         "Truncated length %d must be less than the current size %d",
958         new_num_elements, header()->num_elements));
959   }
960 
961   ICING_VLOG(2)
962       << "FileBackedVector truncating, need to recalculate entire checksum";
963   changes_.clear();
964   saved_original_buffer_.clear();
965   changes_end_ = 0;
966   header()->vector_checksum = 0;
967 
968   header()->num_elements = new_num_elements;
969   return libtextclassifier3::Status::OK;
970 }
971 
972 template <typename T>
Sort(int32_t begin_idx,int32_t end_idx)973 libtextclassifier3::Status FileBackedVector<T>::Sort(int32_t begin_idx,
974                                                      int32_t end_idx) {
975   if (begin_idx < 0 || begin_idx >= end_idx ||
976       end_idx > header()->num_elements) {
977     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
978         "Invalid sort index, %d, %d", begin_idx, end_idx));
979   }
980   for (int32_t i = begin_idx; i < end_idx; ++i) {
981     SetDirty(i);
982   }
983   std::sort(mutable_array() + begin_idx, mutable_array() + end_idx);
984   return libtextclassifier3::Status::OK;
985 }
986 
987 template <typename T>
SetDirty(int32_t idx)988 void FileBackedVector<T>::SetDirty(int32_t idx) {
989   // Cache original value to update crcs.
990   if (idx >= 0 && idx < changes_end_) {
991     // If we exceed kPartialCrcLimitDiv, clear changes_end_ to
992     // revert to full CRC.
993     if ((saved_original_buffer_.size() + kElementTypeSize) *
994             FileBackedVector<T>::kPartialCrcLimitDiv >
995         changes_end_ * kElementTypeSize) {
996       ICING_VLOG(2) << "FileBackedVector change tracking limit exceeded";
997       changes_.clear();
998       saved_original_buffer_.clear();
999       changes_end_ = 0;
1000       header()->vector_checksum = 0;
1001     } else {
1002       int32_t start_byte = idx * kElementTypeSize;
1003 
1004       changes_.push_back(idx);
1005       saved_original_buffer_.append(
1006           reinterpret_cast<char*>(const_cast<T*>(array())) + start_byte,
1007           kElementTypeSize);
1008     }
1009   }
1010 }
1011 
1012 template <typename T>
UpdateChecksum()1013 libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::UpdateChecksum() {
1014   // First apply the modified area. Keep a bitmap of already updated
1015   // regions so we don't double-update.
1016   std::vector<bool> updated(changes_end_);
1017   uint32_t cur_offset = 0;
1018   Crc32 cur_crc(header()->vector_checksum);
1019   int num_partial_crcs = 0;
1020   int num_truncated = 0;
1021   int num_overlapped = 0;
1022   int num_duplicate = 0;
1023   for (const int32_t change_offset : changes_) {
1024     if (change_offset > changes_end_) {
1025       return absl_ports::InternalError(IcingStringUtil::StringPrintf(
1026           "Failed to update crc, change offset %d, changes_end_ %d",
1027           change_offset, changes_end_));
1028     }
1029 
1030     // Skip truncated tracked changes.
1031     if (change_offset >= header()->num_elements) {
1032       ++num_truncated;
1033       continue;
1034     }
1035 
1036     // Turn change buffer into change^original.
1037     const char* buffer_end =
1038         &saved_original_buffer_[cur_offset + kElementTypeSize];
1039     const char* cur_array = reinterpret_cast<const char*>(array()) +
1040                             change_offset * kElementTypeSize;
1041     // Now xor in. SSE acceleration please?
1042     for (char* cur = &saved_original_buffer_[cur_offset]; cur < buffer_end;
1043          cur++, cur_array++) {
1044       *cur ^= *cur_array;
1045     }
1046 
1047     // Skip over already updated bytes by setting update to 0.
1048     bool new_update = false;
1049     bool overlap = false;
1050     uint32_t cur_element = change_offset;
1051     for (char* cur = &saved_original_buffer_[cur_offset]; cur < buffer_end;
1052          cur_element++, cur += kElementTypeSize) {
1053       if (updated[cur_element]) {
1054         memset(cur, 0, kElementTypeSize);
1055         overlap = true;
1056       } else {
1057         updated[cur_element] = true;
1058         new_update = true;
1059       }
1060     }
1061 
1062     // Apply update to crc.
1063     if (new_update) {
1064       // Explicitly create the string_view with length
1065       std::string_view xored_str(buffer_end - kElementTypeSize,
1066                                  kElementTypeSize);
1067       if (!cur_crc
1068                .UpdateWithXor(xored_str, changes_end_ * kElementTypeSize,
1069                               change_offset * kElementTypeSize)
1070                .ok()) {
1071         return absl_ports::InternalError(IcingStringUtil::StringPrintf(
1072             "Failed to update crc, change offset %d, change "
1073             "length %zd changes_end_ %d",
1074             change_offset, xored_str.length(), changes_end_));
1075       }
1076       num_partial_crcs++;
1077       if (overlap) {
1078         num_overlapped++;
1079       }
1080     } else {
1081       num_duplicate++;
1082     }
1083     cur_offset += kElementTypeSize;
1084   }
1085 
1086   if (!changes_.empty()) {
1087     ICING_VLOG(2) << IcingStringUtil::StringPrintf(
1088         "Array update partial crcs %d truncated %d overlapped %d duplicate %d",
1089         num_partial_crcs, num_truncated, num_overlapped, num_duplicate);
1090   }
1091 
1092   // Now update with grown area.
1093   if (changes_end_ < header()->num_elements) {
1094     // Explicitly create the string_view with length
1095     std::string_view update_str(
1096         reinterpret_cast<const char*>(array()) +
1097             changes_end_ * kElementTypeSize,
1098         (header()->num_elements - changes_end_) * kElementTypeSize);
1099     cur_crc.Append(update_str);
1100     ICING_VLOG(2) << IcingStringUtil::StringPrintf(
1101         "Array update tail crc offset %d -> %d", changes_end_,
1102         header()->num_elements);
1103   }
1104 
1105   // Clear, now that we've applied changes.
1106   changes_.clear();
1107   saved_original_buffer_.clear();
1108   changes_end_ = header()->num_elements;
1109 
1110   // Commit new crc.
1111   header()->vector_checksum = cur_crc.Get();
1112   header()->header_checksum = header()->CalculateHeaderChecksum();
1113   return cur_crc;
1114 }
1115 
1116 template <typename T>
GetChecksum()1117 Crc32 FileBackedVector<T>::GetChecksum() const {
1118   if (changes_.empty() && changes_end_ == header()->num_elements) {
1119     // No changes, just return the checksum cached in the header.
1120     return Crc32(header()->vector_checksum);
1121   }
1122   // TODO(b/352778910): Mirror the same logic in UpdateChecksum() to reduce the
1123   // cost of GetChecksum.
1124   Crc32 cur_crc(std::string_view(reinterpret_cast<const char*>(array()),
1125                                  header()->num_elements * kElementTypeSize));
1126   return cur_crc;
1127 }
1128 
1129 template <typename T>
PersistToDisk()1130 libtextclassifier3::Status FileBackedVector<T>::PersistToDisk() {
1131   // Update and write the header
1132   ICING_RETURN_IF_ERROR(UpdateChecksum());
1133   if (mmapped_file_->strategy() ==
1134       MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
1135     // Changes should have been applied to the underlying file, but call msync()
1136     // as an extra safety step to ensure they are written out.
1137     ICING_RETURN_IF_ERROR(mmapped_file_->PersistToDisk());
1138   }
1139 
1140   return libtextclassifier3::Status::OK;
1141 }
1142 
1143 template <typename T>
GetDiskUsage()1144 libtextclassifier3::StatusOr<int64_t> FileBackedVector<T>::GetDiskUsage()
1145     const {
1146   int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
1147   if (size == Filesystem::kBadFileSize) {
1148     return absl_ports::InternalError(
1149         "Failed to get disk usage of file-backed vector");
1150   }
1151   return size;
1152 }
1153 
1154 template <typename T>
GetElementsFileSize()1155 libtextclassifier3::StatusOr<int64_t> FileBackedVector<T>::GetElementsFileSize()
1156     const {
1157   int64_t total_file_size = filesystem_->GetFileSize(file_path_.c_str());
1158   if (total_file_size == Filesystem::kBadFileSize) {
1159     return absl_ports::InternalError(
1160         "Failed to get file size of elements in the file-backed vector");
1161   }
1162   if (total_file_size < Header::kHeaderSize) {
1163     return absl_ports::InternalError(
1164         "File size should not be smaller than header size");
1165   }
1166   return total_file_size - Header::kHeaderSize;
1167 }
1168 
1169 }  // namespace lib
1170 }  // namespace icing
1171 
1172 #endif  // ICING_FILE_FILE_BACKED_VECTOR_H_
1173