1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_FILE_MEMORY_MAPPED_FILE_BACKED_PROTO_LOG_H_
16 #define ICING_FILE_MEMORY_MAPPED_FILE_BACKED_PROTO_LOG_H_
17
18 #include <cinttypes>
19 #include <cstdint>
20 #include <cstring>
21 #include <memory>
22 #include <string>
23 #include <utility>
24
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/file/constants.h"
29 #include "icing/file/file-backed-vector.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/file/memory-mapped-file.h"
32 #include "icing/legacy/core/icing-string-util.h"
33 #include "icing/util/crc32.h"
34 #include "icing/util/status-macros.h"
35
36 namespace icing {
37 namespace lib {
38
39 // Memory-mapped-file backed proto log for append-only writes and position based
40 // reads.
41 //
42 // This class is built on top of the FileBackedVector class, which handles the
43 // underlying files related operations, such as checksums, flushing to disk.
44 //
45 // This class is NOT thread-safe.
46 template <typename ProtoT>
47 class MemoryMappedFileBackedProtoLog {
48 public:
49 // Creates a new MemoryMappedFileBackedProtoLog to read/write content to.
50 //
51 // filesystem: Object to make system level calls
52 // file_path : Specifies the file to persist the log to; must be a path
53 // within a directory that already exists.
54 //
55 // Return:
56 // FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
57 // checksum.
58 // INTERNAL_ERROR on I/O errors.
59 static libtextclassifier3::StatusOr<
60 std::unique_ptr<MemoryMappedFileBackedProtoLog<ProtoT>>>
61 Create(const Filesystem& filesystem, const std::string& file_path);
62
63 // Deletes the underlying file.
64 static libtextclassifier3::Status Delete(const Filesystem& filesystem,
65 const std::string& file_path);
66
67 // Delete copy constructor and assignment operator.
68 MemoryMappedFileBackedProtoLog(const MemoryMappedFileBackedProtoLog&) =
69 delete;
70 MemoryMappedFileBackedProtoLog& operator=(
71 const MemoryMappedFileBackedProtoLog&) = delete;
72
73 // Calculates the checksum of the log contents and returns it. Does NOT
74 // update the header.
75 //
76 // Returns:
77 // Checksum of the log contents.
78 Crc32 GetChecksum() const;
79
80 // Calculates the checksum of the log contents and updates the header to
81 // hold this updated value.
82 //
83 // Returns:
84 // Checksum on success
85 // INTERNAL_ERROR on IO error
86 libtextclassifier3::StatusOr<Crc32> UpdateChecksum();
87
88 // Calculates and returns the disk usage in bytes. Rounds up to the nearest
89 // block size.
90 //
91 // Returns:
92 // Disk usage on success
93 // INTERNAL_ERROR on IO error
94 libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
95
96 // Returns the file size of the all the elements held in the log. File size
97 // is in bytes. This excludes the size of the header of the log file.
98 //
99 // Returns:
100 // File size on success
101 // INTERNAL_ERROR on IO error
102 libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
103
104 // Reads the proto at the given index.
105 //
106 // Returns:
107 // proto on success
108 // INTERNAL_ERROR if the index points to an invalid position.
109 // OUT_OF_RANGE_ERROR if:
110 // - index < 0 or index >= num_elements - sizeof(ProtoMetadata)
111 libtextclassifier3::StatusOr<ProtoT> Read(int32_t index) const;
112
113 // Appends the proto to the end of the log.
114 //
115 // Returns:
116 // Index of the newly appended proto, on success.
117 // INVALID_ARGUMENT if the proto size exceeds the max size limit, 16MiB.
118 libtextclassifier3::StatusOr<int32_t> Write(const ProtoT& proto);
119
120 // Flushes content to underlying file.
121 //
122 // Returns:
123 // OK on success
124 // INTERNAL_ERROR on I/O errors
125 libtextclassifier3::Status PersistToDisk();
126
127 private:
128 // The metadata of the proto, it contains 4 bytes, with the most significant
129 // byte being the magic number, and remaining three bytes being the proto
130 // size.
131 // It is stored in front of every proto.
132 using ProtoMetadata = int32_t;
133
134 // Magic number encoded in the most significant byte of the proto metadata.
135 static constexpr uint8_t kProtoMagic = 0x55;
136
137 // Validates the proto metadata and extracts the proto size from it.
138 //
139 // Returns:
140 // INTERNAL_ERROR if the magic number stored in the metadata is
141 // invalid.
142 static libtextclassifier3::StatusOr<int32_t> ValidateAndGetProtoSize(
143 ProtoMetadata proto_metadata);
144
145 explicit MemoryMappedFileBackedProtoLog(
146 std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv);
147
148 std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv_;
149 };
150
151 template <typename ProtoT>
MemoryMappedFileBackedProtoLog(std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv)152 MemoryMappedFileBackedProtoLog<ProtoT>::MemoryMappedFileBackedProtoLog(
153 std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv)
154 : proto_fbv_(std::move(proto_fbv)) {}
155
156 template <typename ProtoT>
157 libtextclassifier3::StatusOr<int32_t>
ValidateAndGetProtoSize(ProtoMetadata proto_metadata)158 MemoryMappedFileBackedProtoLog<ProtoT>::ValidateAndGetProtoSize(
159 ProtoMetadata proto_metadata) {
160 uint8_t magic_number = proto_metadata >> 24;
161 if (magic_number != kProtoMagic) {
162 return absl_ports::InvalidArgumentError(
163 "Proto metadata has invalid magic number");
164 }
165 return proto_metadata & 0x00FFFFFF;
166 }
167
168 template <typename ProtoT>
GetChecksum()169 Crc32 MemoryMappedFileBackedProtoLog<ProtoT>::GetChecksum() const {
170 return proto_fbv_->GetChecksum();
171 }
172
173 template <typename ProtoT>
174 libtextclassifier3::StatusOr<Crc32>
UpdateChecksum()175 MemoryMappedFileBackedProtoLog<ProtoT>::UpdateChecksum() {
176 return proto_fbv_->UpdateChecksum();
177 }
178
179 template <typename ProtoT>
180 libtextclassifier3::StatusOr<int64_t>
GetDiskUsage()181 MemoryMappedFileBackedProtoLog<ProtoT>::GetDiskUsage() const {
182 return proto_fbv_->GetDiskUsage();
183 }
184
185 template <typename ProtoT>
186 libtextclassifier3::StatusOr<int64_t>
GetElementsFileSize()187 MemoryMappedFileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
188 return proto_fbv_->GetElementsFileSize();
189 }
190
191 template <typename ProtoT>
192 libtextclassifier3::StatusOr<
193 std::unique_ptr<MemoryMappedFileBackedProtoLog<ProtoT>>>
Create(const Filesystem & filesystem,const std::string & file_path)194 MemoryMappedFileBackedProtoLog<ProtoT>::Create(const Filesystem& filesystem,
195 const std::string& file_path) {
196 ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<uint8_t>> proto_fbv,
197 FileBackedVector<uint8_t>::Create(
198 filesystem, file_path,
199 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
200
201 return std::unique_ptr<MemoryMappedFileBackedProtoLog<ProtoT>>(
202 new MemoryMappedFileBackedProtoLog<ProtoT>(std::move(proto_fbv)));
203 }
204
205 template <typename ProtoT>
Delete(const Filesystem & filesystem,const std::string & file_path)206 libtextclassifier3::Status MemoryMappedFileBackedProtoLog<ProtoT>::Delete(
207 const Filesystem& filesystem, const std::string& file_path) {
208 return FileBackedVector<uint8_t>::Delete(filesystem, file_path);
209 }
210
211 template <typename ProtoT>
212 libtextclassifier3::StatusOr<ProtoT>
Read(int32_t index)213 MemoryMappedFileBackedProtoLog<ProtoT>::Read(int32_t index) const {
214 if (index < 0) {
215 return absl_ports::OutOfRangeError(
216 IcingStringUtil::StringPrintf("Index, %d, is less than 0", index));
217 }
218 if (index + sizeof(ProtoMetadata) >= proto_fbv_->num_elements()) {
219 uint64_t upper_index = proto_fbv_->num_elements() - sizeof(ProtoMetadata);
220 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
221 "Index, %" PRId32 ", is greater/equal than the upper bound, %" PRIu64,
222 index, upper_index));
223 }
224
225 ProtoMetadata proto_metadata;
226 std::memcpy(&proto_metadata, proto_fbv_->array() + index,
227 sizeof(ProtoMetadata));
228
229 ICING_ASSIGN_OR_RETURN(int32_t proto_size,
230 ValidateAndGetProtoSize(proto_metadata));
231 ProtoT proto_data;
232 if (!proto_data.ParseFromArray(
233 proto_fbv_->array() + index + sizeof(ProtoMetadata), proto_size)) {
234 return absl_ports::InternalError(
235 "Failed to parse proto from MemoryMappedFileBackedProtoLog");
236 }
237 return proto_data;
238 }
239
240 template <typename ProtoT>
241 libtextclassifier3::StatusOr<int32_t>
Write(const ProtoT & proto)242 MemoryMappedFileBackedProtoLog<ProtoT>::Write(const ProtoT& proto) {
243 int32_t proto_byte_size = proto.ByteSizeLong();
244 if (proto_byte_size > constants::kMaxProtoSize) {
245 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
246 "Proto data size must be under 16MiB, was %d", proto_byte_size));
247 }
248
249 int32_t index_of_new_proto = proto_fbv_->num_elements();
250 ICING_ASSIGN_OR_RETURN(
251 FileBackedVector<uint8_t>::MutableArrayView mutable_array_view,
252 proto_fbv_->Allocate(sizeof(ProtoMetadata) + proto_byte_size));
253
254 ProtoMetadata proto_metadata = (kProtoMagic << 24) | proto_byte_size;
255 uint8_t* byte_ptr = reinterpret_cast<uint8_t*>(&proto_metadata);
256 mutable_array_view.SetArray(/*idx=*/0, byte_ptr, sizeof(ProtoMetadata));
257 proto.SerializeWithCachedSizesToArray(
258 &mutable_array_view[sizeof(ProtoMetadata)]);
259
260 return index_of_new_proto;
261 }
262
263 template <typename ProtoT>
264 libtextclassifier3::Status
PersistToDisk()265 MemoryMappedFileBackedProtoLog<ProtoT>::PersistToDisk() {
266 return proto_fbv_->PersistToDisk();
267 }
268
269 } // namespace lib
270 } // namespace icing
271
272 #endif // ICING_FILE_MEMORY_MAPPED_FILE_BACKED_PROTO_LOG_H_
273