1 // Copyright (C) 2024 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_STORE_BLOB_STORE_H_ 16 #define ICING_STORE_BLOB_STORE_H_ 17 18 #include <cstdint> 19 #include <string> 20 #include <unordered_map> 21 #include <unordered_set> 22 #include <utility> 23 24 #include "icing/text_classifier/lib3/utils/base/status.h" 25 #include "icing/text_classifier/lib3/utils/base/statusor.h" 26 #include "icing/file/filesystem.h" 27 #include "icing/file/portable-file-backed-proto-log.h" 28 #include "icing/proto/blob.pb.h" 29 #include "icing/proto/document.pb.h" 30 #include "icing/proto/storage.pb.h" 31 #include "icing/util/clock.h" 32 33 namespace icing { 34 namespace lib { 35 36 // Provides storage interfaces for Blobs. 37 // 38 // The BlobStore is responsible for storing blobs in a directory and for 39 // ensuring that the directory is in a consistent state. 40 // 41 // A blob is a file that is stored in the BlobStore. A blob is identified by 42 // a blob handle, which is a unique identifier for the blob. 43 // 44 // Any blob that is written to the BlobStore must be committed before it can be 45 // read. A blob can be committed only once. After a blob is committed, it is 46 // not allowed to be updated. 47 // 48 // The BlobStore is not thread-safe. 49 class BlobStore { 50 public: 51 // Builds a string representation of a blob handle. 52 // The string is used as the key in the key mapper. 53 static std::string BuildBlobHandleStr( 54 const PropertyProto::BlobHandleProto& blob_handle); 55 56 // Factory function to create a BlobStore instance. The base directory is 57 // used to persist blobs. If a blob store was previously created with 58 // this directory, it will reload the files saved by the last instance. 59 // 60 // The callers must create the base directory before calling this function. 61 // 62 // Returns: 63 // A BlobStore on success 64 // FAILED_PRECONDITION_ERROR on any null pointer input 65 // INTERNAL_ERROR on I/O error 66 static libtextclassifier3::StatusOr<BlobStore> Create( 67 const Filesystem* filesystem, std::string base_dir, const Clock* clock, 68 int64_t orphan_blob_time_to_live_ms, int32_t compression_level); 69 70 // Gets or creates a file for write only purpose for the given blob handle. 71 // To mark the blob is completed written, CommitBlob must be called. Once 72 // CommitBlob is called, the blob is sealed and rewrite is not allowed. 73 // 74 // It is the user's responsibility to close the file descriptor after writing 75 // is done and should operate on the file descriptor after commit or remove 76 // it. 77 // 78 // Returns: 79 // File descriptor (writable) on success 80 // INVALID_ARGUMENT_ERROR on invalid blob handle 81 // FAILED_PRECONDITION_ERROR on blob is already opened for write 82 // ALREADY_EXISTS_ERROR if the blob has already been committed 83 // INTERNAL_ERROR on IO error 84 libtextclassifier3::StatusOr<int> OpenWrite( 85 const PropertyProto::BlobHandleProto& blob_handle); 86 87 // Removes a blob file and blob handle from the blob store. 88 // 89 // This will remove the blob on any state. No matter it's committed or not or 90 // it has reference document links or not. 91 // 92 // Returns: 93 // INVALID_ARGUMENT_ERROR on invalid blob handle 94 // NOT_FOUND_ERROR on blob is not found 95 // INTERNAL_ERROR on IO error 96 libtextclassifier3::Status RemoveBlob( 97 const PropertyProto::BlobHandleProto& blob_handle); 98 99 // Gets a file for read only purpose for the given blob handle. 100 // Will only succeed for blobs that were committed by calling CommitBlob. 101 // 102 // It is the user's responsibility to close the file descriptor after reading. 103 // 104 // Returns: 105 // File descriptor (read only) on success 106 // INVALID_ARGUMENT_ERROR on invalid blob handle 107 // NOT_FOUND_ERROR on blob is not found or is not committed 108 libtextclassifier3::StatusOr<int> OpenRead( 109 const PropertyProto::BlobHandleProto& blob_handle); 110 111 // Commits the given blob, if the blob is finished wrote via OpenWrite. 112 // Before the blob is committed, it is not visible to any reader via OpenRead. 113 // After the blob is committed, it is not allowed to rewrite or update the 114 // content. 115 // 116 // Returns: 117 // OK on the blob is successfully committed. 118 // ALREADY_EXISTS_ERROR on the blob is already committed, this is no op. 119 // INVALID_ARGUMENT_ERROR on invalid blob handle or digest is mismatch with 120 // file content. 121 // NOT_FOUND_ERROR on blob is not found. 122 libtextclassifier3::Status CommitBlob( 123 const PropertyProto::BlobHandleProto& blob_handle); 124 125 // Persists the blobs to disk. 126 libtextclassifier3::Status PersistToDisk(); 127 128 // Gets the potentially optimizable blob handles. 129 // 130 // A blob will be consider as a potentially optimizable blob if it created 131 // before the orphan_blob_time_to_live_ms. And the blob should be removed if 132 // it has no reference document links to it. 133 std::unordered_set<std::string> GetPotentiallyOptimizableBlobHandles(); 134 135 // Optimize the blob store and remove dead blob files. 136 // 137 // A blob will be consider as a dead blob and removed if it meets BOTH of 138 // following conditions 139 // 1: has no reference document links to it 140 // 2: It's mature. 141 // 142 // Returns: 143 // OK on success 144 // INTERNAL_ERROR on IO error 145 libtextclassifier3::Status Optimize( 146 const std::unordered_set<std::string>& dead_blob_handles); 147 148 // Calculates the StorageInfo for the Blob Store. 149 // 150 // Returns: 151 // Vector of NamespaceBlobStorageInfoProto contains size of each namespace. 152 // INTERNAL_ERROR on I/O error 153 libtextclassifier3::StatusOr<std::vector<NamespaceBlobStorageInfoProto>> 154 GetStorageInfo() const; 155 156 private: BlobStore(const Filesystem * filesystem,std::string base_dir,const Clock * clock,int64_t orphan_blob_time_to_live_ms,int32_t compression_level,std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log,std::unordered_map<std::string,int32_t> blob_handle_to_offset,std::unordered_set<std::string> known_file_names)157 explicit BlobStore( 158 const Filesystem* filesystem, std::string base_dir, const Clock* clock, 159 int64_t orphan_blob_time_to_live_ms, int32_t compression_level, 160 std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log, 161 std::unordered_map<std::string, int32_t> blob_handle_to_offset, 162 std::unordered_set<std::string> known_file_names) 163 : filesystem_(*filesystem), 164 base_dir_(std::move(base_dir)), 165 clock_(*clock), 166 orphan_blob_time_to_live_ms_(orphan_blob_time_to_live_ms), 167 compression_level_(compression_level), 168 blob_info_log_(std::move(blob_info_log)), 169 blob_handle_to_offset_(std::move(blob_handle_to_offset)), 170 known_file_names_(std::move(known_file_names)) {} 171 172 libtextclassifier3::StatusOr<BlobInfoProto> GetOrCreateBlobInfo( 173 const std::string& blob_handle_str, 174 const PropertyProto::BlobHandleProto& blob_handle); 175 176 const Filesystem& filesystem_; 177 std::string base_dir_; 178 const Clock& clock_; 179 int64_t orphan_blob_time_to_live_ms_; 180 int32_t compression_level_; 181 182 // The ground truth blob info log file, which is used to read/write/erase 183 // BlobInfoProto. 184 std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log_; 185 186 // The map for BlobHandle string to the offset of BlobInfoProto in the 187 // BlobInfoProto log file. 188 // The keys are the Encoded CString from BlobHandleProto. 189 std::unordered_map<std::string, int32_t> blob_handle_to_offset_; 190 191 // The set of used file names to store blobs in the blob store. 192 std::unordered_set<std::string> known_file_names_; 193 194 bool has_mutated_ = false; 195 }; 196 197 } // namespace lib 198 } // namespace icing 199 200 #endif // ICING_STORE_BLOB_STORE_H_ 201