1 // Copyright (C) 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_FILE_PERSISTENT_STORAGE_H_ 16 #define ICING_FILE_PERSISTENT_STORAGE_H_ 17 18 #include <cstdint> 19 #include <string> 20 #include <string_view> 21 22 #include "icing/text_classifier/lib3/utils/base/status.h" 23 #include "icing/text_classifier/lib3/utils/base/statusor.h" 24 #include "icing/absl_ports/canonical_errors.h" 25 #include "icing/absl_ports/str_cat.h" 26 #include "icing/file/filesystem.h" 27 #include "icing/util/crc32.h" 28 #include "icing/util/status-macros.h" 29 30 namespace icing { 31 namespace lib { 32 33 // PersistentStorage: an abstract class for all persistent data structures. 34 // - It provides some common persistent file methods, e.g. PersistToDisk. 35 // - It encapsulates most of the checksum handling logics (including update and 36 // validation). 37 // 38 // Terminology: 39 // - Crcs: checksum section 40 // - Info: (custom) information for derived class 41 // - Metadata: Crcs + Info 42 // 43 // Usually a persistent data structure will have its own custom Info and 44 // storages (single or composite storages) definition. To create a new 45 // persistent data structure via PersistentStorage: 46 // - Decide what type the working path is (single file or directory). See 47 // working_path_ and WorkingPathType for more details. 48 // - Create a new class that inherits PersistentStorage: 49 // - Declare custom Info and design the metadata section layout. 50 // Usually the layout is <Crcs><Info>, and there are 2 common ways to 51 // manage metadata section: 52 // - Have a separate file for metadata. In this case, the new persistent 53 // data structure contains multiple files, so working path should be used 54 // as directory path and multiple files will be stored under it. Example: 55 // PersistentHashMap. 56 // - Have a single file for both metadata and storage data. In this case, 57 // the file layout should be <Crcs><Info><Storage Data>, and 58 // working path should be used as file path. Example: FileBackedVector. 59 // - Handle working path file/directory creation and deletion. 60 // PersistentStorage only provides static Discard() method to use. The 61 // derived class should implement other logics, e.g. working path (file 62 // /directory) creation, check condition to discard working path and start 63 // over new file(s). 64 // - Implement all pure virtual methods: 65 // - PersistStoragesToDisk: persist all (composite) storages. In general, 66 // the implementation will be calling PersistToDisk for all composite 67 // storages. 68 // - PersistMetadataToDisk: persist metadata, including Crcs and Info. 69 // - If the derived class maintains a concrete Crc and (custom) Info 70 // instance, then it should perform write/pwrite into the metadata 71 // section. 72 // - If the derived class uses memory-mapped region directly for metadata, 73 // then it should call MemoryMappedFile::PersistToDisk. 74 // - See crcs() for more details. 75 // - GetInfoChecksum: compute the checksum for custom Info. 76 // - GetStoragesChecksum: compute the (combined) checksum for all 77 // (composite) storages. In general, the implementation will be calling 78 // GetChecksum for all composite storages and XOR all checksums. 79 // - UpdateStoragesChecksum: update the (combined) checksum for all 80 // (composite) storages. In general, the implementation will be calling 81 // UpdateChecksum for all composite storages and XOR all checksums. 82 // - crcs(): provide the reference for PersistentStorage to write checksums. 83 // The derived class can either maintain a concrete Crcs instance, or 84 // reinterpret_cast the memory-mapped region to Crcs reference. Either 85 // choice is fine as long as PersistMetadataToDisk flushes it to disk 86 // correctly. 87 // - Call either InitializeNewStorage or InitializeExistingStorage when creating 88 // and initializing an instance, depending on initializing new storage or from 89 // existing file(s). 90 class PersistentStorage { 91 public: 92 enum class WorkingPathType { 93 kSingleFile, 94 kDirectory, 95 kDummy, 96 }; 97 98 // Crcs and Info will be written into the metadata section. Info is defined by 99 // the actual implementation of each persistent storage. Usually the Metadata 100 // layout is: <Crcs><Info> 101 struct Crcs { 102 struct ComponentCrcs { 103 uint32_t info_crc; 104 uint32_t storages_crc; 105 106 bool operator==(const ComponentCrcs& other) const { 107 return info_crc == other.info_crc && storages_crc == other.storages_crc; 108 } 109 GetChecksumCrcs::ComponentCrcs110 Crc32 GetChecksum() const { 111 return Crc32(std::string_view(reinterpret_cast<const char*>(this), 112 sizeof(ComponentCrcs))); 113 } 114 } __attribute__((packed)); 115 116 bool operator==(const Crcs& other) const { 117 return all_crc == other.all_crc && component_crcs == other.component_crcs; 118 } 119 120 uint32_t all_crc; 121 ComponentCrcs component_crcs; 122 } __attribute__((packed)); 123 static_assert(sizeof(Crcs) == 12, ""); 124 125 // Deletes working_path according to its type. 126 // 127 // Returns: 128 // - OK on success 129 // - INTERNAL_ERROR on I/O error 130 // - INVALID_ARGUMENT_ERROR if working_path_type is unknown type 131 static libtextclassifier3::Status Discard(const Filesystem& filesystem, 132 const std::string& working_path, 133 WorkingPathType working_path_type); 134 135 virtual ~PersistentStorage() = default; 136 137 // Initializes new persistent storage. It computes the initial checksums and 138 // writes into the metadata file. 139 // 140 // Note: either InitializeNewStorage or InitializeExistingStorage should be 141 // invoked after creating a PersistentStorage instance before using, otherwise 142 // an uninitialized instance will fail to use persistent storage features, 143 // e.g. PersistToDisk, UpdateChecksums. 144 // 145 // Returns: 146 // - OK on success or already initialized 147 // - Any errors from GetInfoChecksum, UpdateStoragesChecksum, depending on 148 // actual implementation InitializeNewStorage()149 libtextclassifier3::Status InitializeNewStorage() { 150 if (is_initialized_) { 151 return libtextclassifier3::Status::OK; 152 } 153 154 ICING_RETURN_IF_ERROR(UpdateChecksumsInternal()); 155 ICING_RETURN_IF_ERROR(PersistStoragesToDisk()); 156 ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); 157 158 is_initialized_ = true; 159 return libtextclassifier3::Status::OK; 160 } 161 162 // Initializes persistent storage from existing file(s). 163 // 164 // It enforces the following check(s): 165 // - Validate checksums. 166 // 167 // Note: either InitializeNewStorage or InitializeExistingStorage should be 168 // invoked after creating a PersistentStorage instance before using. 169 // 170 // Returns: 171 // - OK on success or already initialized 172 // - FAILED_PRECONDITION_ERROR if checksum validation fails. 173 // - Any errors from GetInfoChecksum, GetStoragesChecksum, depending on 174 // actual implementation InitializeExistingStorage()175 libtextclassifier3::Status InitializeExistingStorage() { 176 if (is_initialized_) { 177 return libtextclassifier3::Status::OK; 178 } 179 180 ICING_RETURN_IF_ERROR(ValidateChecksums()); 181 182 is_initialized_ = true; 183 return libtextclassifier3::Status::OK; 184 } 185 186 // Flushes contents to underlying files. 187 // 1) Flushes storages. 188 // 2) Updates all checksums by new data. 189 // 3) Flushes metadata. 190 // 191 // Returns: 192 // - OK on success 193 // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized 194 // - Any errors from PersistStoragesToDisk, UpdateChecksums, 195 // PersistMetadataToDisk, depending on actual implementation PersistToDisk()196 libtextclassifier3::Status PersistToDisk() { 197 if (!is_initialized_) { 198 return absl_ports::FailedPreconditionError(absl_ports::StrCat( 199 "PersistentStorage ", working_path_, " not initialized")); 200 } 201 202 ICING_RETURN_IF_ERROR(UpdateChecksumsInternal()); 203 ICING_RETURN_IF_ERROR(PersistStoragesToDisk()); 204 ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); 205 return libtextclassifier3::Status::OK; 206 } 207 208 // Updates checksums of all components and returns the overall crc (all_crc) 209 // of the persistent storage. 210 // 211 // Returns: 212 // - Overall crc of the persistent storage on success 213 // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized 214 // - Any errors from UpdateInfoChecksum, UpdateStoragesChecksum, depending 215 // on actual implementation UpdateChecksums()216 libtextclassifier3::StatusOr<Crc32> UpdateChecksums() { 217 if (!is_initialized_) { 218 return absl_ports::FailedPreconditionError(absl_ports::StrCat( 219 "PersistentStorage ", working_path_, " not initialized")); 220 } 221 222 ICING_ASSIGN_OR_RETURN(Crc32 crc, UpdateChecksumsInternal()); 223 ICING_RETURN_IF_ERROR(WriteMetadata()); 224 return crc; 225 } 226 227 // Calculates and returns the overall crc (all_crc) of the persistent storage. 228 // 229 // Returns: 230 // - Overall crc of the persistent storage on success 231 // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized 232 // - Any errors from GetInfoChecksum, GetStoragesChecksum, depending on 233 // actual implementation GetChecksum()234 libtextclassifier3::StatusOr<Crc32> GetChecksum() const { 235 if (!is_initialized_) { 236 return absl_ports::FailedPreconditionError(absl_ports::StrCat( 237 "PersistentStorage ", working_path_, " not initialized")); 238 } 239 240 ICING_ASSIGN_OR_RETURN(Crc32 info_crc, GetInfoChecksum()); 241 ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, GetStoragesChecksum()); 242 Crcs::ComponentCrcs crcs = {info_crc.Get(), storages_crc.Get()}; 243 return crcs.GetChecksum(); 244 } 245 246 protected: PersistentStorage(const Filesystem & filesystem,std::string working_path,WorkingPathType working_path_type)247 explicit PersistentStorage(const Filesystem& filesystem, 248 std::string working_path, 249 WorkingPathType working_path_type) 250 : filesystem_(filesystem), 251 working_path_(std::move(working_path)), 252 working_path_type_(working_path_type), 253 is_initialized_(false) {} 254 255 // Flushes contents of metadata. The implementation should flush Crcs and Info 256 // correctly, depending on whether they're using memory-mapped regions or 257 // concrete instances in the derived class. 258 // 259 // It is valid to call this function even when is_initialized_ is false. 260 // 261 // Returns: 262 // - OK on success 263 // - Any other errors, depending on actual implementation 264 virtual libtextclassifier3::Status PersistMetadataToDisk() = 0; 265 266 // Flushes contents of all storages to underlying files. 267 // 268 // It is valid to call this function even when is_initialized_ is false. 269 // 270 // Returns: 271 // - OK on success 272 // - Any other errors, depending on actual implementation 273 virtual libtextclassifier3::Status PersistStoragesToDisk() = 0; 274 275 // Writes the contents of the metadata, if necessary. Unlike 276 // PersistMetadataToDisk this method does not explicitly flush the metadata to 277 // disk. 278 // 279 // Returns: 280 // - OK on success 281 // - Any other errors, depending on actual implementation 282 virtual libtextclassifier3::Status WriteMetadata() = 0; 283 284 // Computes and updates all storages checksums and returns a combined checksum 285 // of all storages. If there are multiple storages, usually we XOR their 286 // checksums together to a single checksum. 287 // 288 // This function will be mainly called by UpdateChecksums. 289 // 290 // It is valid to call this function even when is_initialized_ is false. 291 // 292 // Returns: 293 // - Crc of all storages on success 294 // - Any other errors from depending on actual implementation 295 virtual libtextclassifier3::StatusOr<Crc32> UpdateStoragesChecksum() = 0; 296 297 // Computes and returns Info checksum. 298 // 299 // This function will be mainly called by GetChecksum. 300 // 301 // It is valid to call this function even when is_initialized_ is false. 302 // 303 // Returns: 304 // - Crc of the Info on success 305 // - Any other errors, depending on actual implementation 306 virtual libtextclassifier3::StatusOr<Crc32> GetInfoChecksum() const = 0; 307 308 // Computes and returns all storages checksum. If there are multiple storages, 309 // usually we XOR their checksums together to a single checksum. 310 // 311 // This function will be mainly called by GetChecksum. 312 // 313 // It is valid to call this function even when is_initialized_ is false. 314 // 315 // Returns: 316 // - Crc of all storages on success 317 // - Any other errors, depending on actual implementation 318 virtual libtextclassifier3::StatusOr<Crc32> GetStoragesChecksum() const = 0; 319 320 // Returns the Crcs instance reference. The derived class can either own a 321 // concrete Crcs instance, or reinterpret_cast the memory-mapped region to 322 // Crcs reference. PersistMetadataToDisk should flush it to disk correctly. 323 virtual Crcs& crcs() = 0; 324 virtual const Crcs& crcs() const = 0; 325 326 const Filesystem& filesystem_; // Does not own 327 // Path to the storage. It can be a single file path or a directory path 328 // depending on the implementation of the derived class. 329 // 330 // Note that the derived storage class will take full ownership and of 331 // working_path_, including creation/deletion. It is the caller's 332 // responsibility to specify correct working path and avoid mixing different 333 // persistent storages together under the same path. Also the caller has the 334 // ownership for the parent directory of working_path_, and it is responsible 335 // for parent directory creation/deletion. 336 std::string working_path_; 337 WorkingPathType working_path_type_; 338 339 bool is_initialized_; 340 341 private: 342 // Updates checksums of all components and returns the overall crc (all_crc) 343 // of the persistent storage. Different from UpdateChecksums, it won't check 344 // if PersistentStorage is initialized or not. 345 // 346 // Returns: 347 // - Overall crc of the persistent storage on success 348 // - Any errors from GetInfoChecksum, UpdateStoragesChecksum, depending on 349 // actual implementation UpdateChecksumsInternal()350 libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal() { 351 Crcs& crcs_ref = crcs(); 352 // Compute and update storages + info checksums. 353 ICING_ASSIGN_OR_RETURN(Crc32 info_crc, GetInfoChecksum()); 354 ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, UpdateStoragesChecksum()); 355 if (crcs_ref.component_crcs.info_crc == info_crc.Get() && 356 crcs_ref.component_crcs.storages_crc == storages_crc.Get()) { 357 // If info and storages crc haven't changed, then we don't have to update 358 // checksums. 359 return Crc32(crcs_ref.all_crc); 360 } 361 362 crcs_ref.component_crcs.info_crc = info_crc.Get(); 363 crcs_ref.component_crcs.storages_crc = storages_crc.Get(); 364 365 // Finally compute and update overall checksum. 366 Crc32 all_crc = crcs_ref.component_crcs.GetChecksum(); 367 crcs_ref.all_crc = all_crc.Get(); 368 return all_crc; 369 } 370 371 // Validates all checksums of the persistent storage. 372 // 373 // Returns: 374 // - OK on success 375 // - FAILED_PRECONDITION_ERROR if any checksum is incorrect. 376 // - Any errors from GetInfoChecksum, GetStoragesChecksum, depending on 377 // actual implementation ValidateChecksums()378 libtextclassifier3::Status ValidateChecksums() const { 379 const Crcs& crcs_ref = crcs(); 380 if (crcs_ref.all_crc != crcs_ref.component_crcs.GetChecksum().Get()) { 381 return absl_ports::FailedPreconditionError("Invalid all crc"); 382 } 383 384 ICING_ASSIGN_OR_RETURN(Crc32 info_crc, GetInfoChecksum()); 385 if (crcs_ref.component_crcs.info_crc != info_crc.Get()) { 386 return absl_ports::FailedPreconditionError("Invalid info crc"); 387 } 388 389 ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, GetStoragesChecksum()); 390 if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) { 391 return absl_ports::FailedPreconditionError("Invalid storages crc"); 392 } 393 return libtextclassifier3::Status::OK; 394 } 395 }; 396 397 } // namespace lib 398 } // namespace icing 399 400 #endif // ICING_FILE_PERSISTENT_STORAGE_H_ 401