xref: /aosp_15_r20/external/icing/icing/file/memory-mapped-file-backed-proto-log.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_FILE_MEMORY_MAPPED_FILE_BACKED_PROTO_LOG_H_
16 #define ICING_FILE_MEMORY_MAPPED_FILE_BACKED_PROTO_LOG_H_
17 
18 #include <cinttypes>
19 #include <cstdint>
20 #include <cstring>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/file/constants.h"
29 #include "icing/file/file-backed-vector.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/file/memory-mapped-file.h"
32 #include "icing/legacy/core/icing-string-util.h"
33 #include "icing/util/crc32.h"
34 #include "icing/util/status-macros.h"
35 
36 namespace icing {
37 namespace lib {
38 
39 // Memory-mapped-file backed proto log for append-only writes and position based
40 // reads.
41 //
42 // This class is built on top of the FileBackedVector class, which handles the
43 // underlying files related operations, such as checksums, flushing to disk.
44 //
45 // This class is NOT thread-safe.
46 template <typename ProtoT>
47 class MemoryMappedFileBackedProtoLog {
48  public:
49   // Creates a new MemoryMappedFileBackedProtoLog to read/write content to.
50   //
51   // filesystem: Object to make system level calls
52   // file_path : Specifies the file to persist the log to; must be a path
53   //             within a directory that already exists.
54   //
55   // Return:
56   //   FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
57   //                             checksum.
58   //   INTERNAL_ERROR on I/O errors.
59   static libtextclassifier3::StatusOr<
60       std::unique_ptr<MemoryMappedFileBackedProtoLog<ProtoT>>>
61   Create(const Filesystem& filesystem, const std::string& file_path);
62 
63   // Deletes the underlying file.
64   static libtextclassifier3::Status Delete(const Filesystem& filesystem,
65                                            const std::string& file_path);
66 
67   // Delete copy constructor and assignment operator.
68   MemoryMappedFileBackedProtoLog(const MemoryMappedFileBackedProtoLog&) =
69       delete;
70   MemoryMappedFileBackedProtoLog& operator=(
71       const MemoryMappedFileBackedProtoLog&) = delete;
72 
73   // Calculates the checksum of the log contents and returns it. Does NOT
74   // update the header.
75   //
76   // Returns:
77   //   Checksum of the log contents.
78   Crc32 GetChecksum() const;
79 
80   // Calculates the checksum of the log contents and updates the header to
81   // hold this updated value.
82   //
83   // Returns:
84   //   Checksum on success
85   //   INTERNAL_ERROR on IO error
86   libtextclassifier3::StatusOr<Crc32> UpdateChecksum();
87 
88   // Calculates and returns the disk usage in bytes. Rounds up to the nearest
89   // block size.
90   //
91   // Returns:
92   //   Disk usage on success
93   //   INTERNAL_ERROR on IO error
94   libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
95 
96   // Returns the file size of the all the elements held in the log. File size
97   // is in bytes. This excludes the size of the header of the log file.
98   //
99   // Returns:
100   //   File size on success
101   //   INTERNAL_ERROR on IO error
102   libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
103 
104   // Reads the proto at the given index.
105   //
106   // Returns:
107   //   proto on success
108   //   INTERNAL_ERROR if the index points to an invalid position.
109   //   OUT_OF_RANGE_ERROR if:
110   //     - index < 0 or index >= num_elements - sizeof(ProtoMetadata)
111   libtextclassifier3::StatusOr<ProtoT> Read(int32_t index) const;
112 
113   // Appends the proto to the end of the log.
114   //
115   // Returns:
116   //   Index of the newly appended proto, on success.
117   //   INVALID_ARGUMENT if the proto size exceeds the max size limit, 16MiB.
118   libtextclassifier3::StatusOr<int32_t> Write(const ProtoT& proto);
119 
120   // Flushes content to underlying file.
121   //
122   // Returns:
123   //   OK on success
124   //   INTERNAL_ERROR on I/O errors
125   libtextclassifier3::Status PersistToDisk();
126 
127  private:
128   // The metadata of the proto, it contains 4 bytes, with the most significant
129   // byte being the magic number, and remaining three bytes being the proto
130   // size.
131   // It is stored in front of every proto.
132   using ProtoMetadata = int32_t;
133 
134   // Magic number encoded in the most significant byte of the proto metadata.
135   static constexpr uint8_t kProtoMagic = 0x55;
136 
137   // Validates the proto metadata and extracts the proto size from it.
138   //
139   // Returns:
140   //       INTERNAL_ERROR if the magic number stored in the metadata is
141   //       invalid.
142   static libtextclassifier3::StatusOr<int32_t> ValidateAndGetProtoSize(
143       ProtoMetadata proto_metadata);
144 
145   explicit MemoryMappedFileBackedProtoLog(
146       std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv);
147 
148   std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv_;
149 };
150 
151 template <typename ProtoT>
MemoryMappedFileBackedProtoLog(std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv)152 MemoryMappedFileBackedProtoLog<ProtoT>::MemoryMappedFileBackedProtoLog(
153     std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv)
154     : proto_fbv_(std::move(proto_fbv)) {}
155 
156 template <typename ProtoT>
157 libtextclassifier3::StatusOr<int32_t>
ValidateAndGetProtoSize(ProtoMetadata proto_metadata)158 MemoryMappedFileBackedProtoLog<ProtoT>::ValidateAndGetProtoSize(
159     ProtoMetadata proto_metadata) {
160   uint8_t magic_number = proto_metadata >> 24;
161   if (magic_number != kProtoMagic) {
162     return absl_ports::InvalidArgumentError(
163         "Proto metadata has invalid magic number");
164   }
165   return proto_metadata & 0x00FFFFFF;
166 }
167 
168 template <typename ProtoT>
GetChecksum()169 Crc32 MemoryMappedFileBackedProtoLog<ProtoT>::GetChecksum() const {
170   return proto_fbv_->GetChecksum();
171 }
172 
173 template <typename ProtoT>
174 libtextclassifier3::StatusOr<Crc32>
UpdateChecksum()175 MemoryMappedFileBackedProtoLog<ProtoT>::UpdateChecksum() {
176   return proto_fbv_->UpdateChecksum();
177 }
178 
179 template <typename ProtoT>
180 libtextclassifier3::StatusOr<int64_t>
GetDiskUsage()181 MemoryMappedFileBackedProtoLog<ProtoT>::GetDiskUsage() const {
182   return proto_fbv_->GetDiskUsage();
183 }
184 
185 template <typename ProtoT>
186 libtextclassifier3::StatusOr<int64_t>
GetElementsFileSize()187 MemoryMappedFileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
188   return proto_fbv_->GetElementsFileSize();
189 }
190 
191 template <typename ProtoT>
192 libtextclassifier3::StatusOr<
193     std::unique_ptr<MemoryMappedFileBackedProtoLog<ProtoT>>>
Create(const Filesystem & filesystem,const std::string & file_path)194 MemoryMappedFileBackedProtoLog<ProtoT>::Create(const Filesystem& filesystem,
195                                                const std::string& file_path) {
196   ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv,
197                          FileBackedVector<uint8_t>::Create(
198                              filesystem, file_path,
199                              MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
200 
201   return std::unique_ptr<MemoryMappedFileBackedProtoLog<ProtoT>>(
202       new MemoryMappedFileBackedProtoLog<ProtoT>(std::move(proto_fbv)));
203 }
204 
205 template <typename ProtoT>
Delete(const Filesystem & filesystem,const std::string & file_path)206 libtextclassifier3::Status MemoryMappedFileBackedProtoLog<ProtoT>::Delete(
207     const Filesystem& filesystem, const std::string& file_path) {
208   return FileBackedVector<uint8_t>::Delete(filesystem, file_path);
209 }
210 
211 template <typename ProtoT>
212 libtextclassifier3::StatusOr<ProtoT>
Read(int32_t index)213 MemoryMappedFileBackedProtoLog<ProtoT>::Read(int32_t index) const {
214   if (index < 0) {
215     return absl_ports::OutOfRangeError(
216         IcingStringUtil::StringPrintf("Index, %d, is less than 0", index));
217   }
218   if (index + sizeof(ProtoMetadata) >= proto_fbv_->num_elements()) {
219     uint64_t upper_index = proto_fbv_->num_elements() - sizeof(ProtoMetadata);
220     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
221         "Index, %" PRId32 ", is greater/equal than the upper bound, %" PRIu64,
222         index,  upper_index));
223   }
224 
225   ProtoMetadata proto_metadata;
226   std::memcpy(&proto_metadata, proto_fbv_->array() + index,
227               sizeof(ProtoMetadata));
228 
229   ICING_ASSIGN_OR_RETURN(int32_t proto_size,
230                          ValidateAndGetProtoSize(proto_metadata));
231   ProtoT proto_data;
232   if (!proto_data.ParseFromArray(
233           proto_fbv_->array() + index + sizeof(ProtoMetadata), proto_size)) {
234     return absl_ports::InternalError(
235         "Failed to parse proto from MemoryMappedFileBackedProtoLog");
236   }
237   return proto_data;
238 }
239 
240 template <typename ProtoT>
241 libtextclassifier3::StatusOr<int32_t>
Write(const ProtoT & proto)242 MemoryMappedFileBackedProtoLog<ProtoT>::Write(const ProtoT& proto) {
243   int32_t proto_byte_size = proto.ByteSizeLong();
244   if (proto_byte_size > constants::kMaxProtoSize) {
245     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
246         "Proto data size must be under 16MiB, was %d", proto_byte_size));
247   }
248 
249   int32_t index_of_new_proto = proto_fbv_->num_elements();
250   ICING_ASSIGN_OR_RETURN(
251       FileBackedVector<uint8_t>::MutableArrayView mutable_array_view,
252       proto_fbv_->Allocate(sizeof(ProtoMetadata) + proto_byte_size));
253 
254   ProtoMetadata proto_metadata = (kProtoMagic << 24) | proto_byte_size;
255   uint8_t* byte_ptr = reinterpret_cast<uint8_t*>(&proto_metadata);
256   mutable_array_view.SetArray(/*idx=*/0, byte_ptr, sizeof(ProtoMetadata));
257   proto.SerializeWithCachedSizesToArray(
258       &mutable_array_view[sizeof(ProtoMetadata)]);
259 
260   return index_of_new_proto;
261 }
262 
263 template <typename ProtoT>
264 libtextclassifier3::Status
PersistToDisk()265 MemoryMappedFileBackedProtoLog<ProtoT>::PersistToDisk() {
266   return proto_fbv_->PersistToDisk();
267 }
268 
269 }  // namespace lib
270 }  // namespace icing
271 
272 #endif  // ICING_FILE_MEMORY_MAPPED_FILE_BACKED_PROTO_LOG_H_
273