1 // Copyright (C) 2024 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_EMBED_POSTING_LIST_EMBEDDING_HIT_ACCESSOR_H_ 16 #define ICING_INDEX_EMBED_POSTING_LIST_EMBEDDING_HIT_ACCESSOR_H_ 17 18 #include <memory> 19 #include <utility> 20 #include <vector> 21 22 #include "icing/text_classifier/lib3/utils/base/status.h" 23 #include "icing/text_classifier/lib3/utils/base/statusor.h" 24 #include "icing/file/posting_list/flash-index-storage.h" 25 #include "icing/file/posting_list/posting-list-accessor.h" 26 #include "icing/file/posting_list/posting-list-identifier.h" 27 #include "icing/file/posting_list/posting-list-used.h" 28 #include "icing/index/embed/embedding-hit.h" 29 #include "icing/index/embed/posting-list-embedding-hit-serializer.h" 30 31 namespace icing { 32 namespace lib { 33 34 // This class is used to provide a simple abstraction for adding hits to posting 35 // lists. PostingListEmbeddingHitAccessor handles 1) selection of properly-sized 36 // posting lists for the accumulated hits during Finalize() and 2) chaining of 37 // max-sized posting lists. 38 class PostingListEmbeddingHitAccessor : public PostingListAccessor { 39 public: 40 // Creates an empty PostingListEmbeddingHitAccessor. 41 // 42 // RETURNS: 43 // - On success, a valid unique_ptr instance of 44 // PostingListEmbeddingHitAccessor 45 // - INVALID_ARGUMENT error if storage has an invalid block_size. 46 static libtextclassifier3::StatusOr< 47 std::unique_ptr<PostingListEmbeddingHitAccessor>> 48 Create(FlashIndexStorage* storage, 49 PostingListEmbeddingHitSerializer* serializer); 50 51 // Create a PostingListEmbeddingHitAccessor with an existing posting list 52 // identified by existing_posting_list_id. 53 // 54 // The PostingListEmbeddingHitAccessor will add hits to this posting list 55 // until it is necessary either to 1) chain the posting list (if it is 56 // max-sized) or 2) move its hits to a larger posting list. 57 // 58 // RETURNS: 59 // - On success, a valid unique_ptr instance of 60 // PostingListEmbeddingHitAccessor 61 // - INVALID_ARGUMENT if storage has an invalid block_size. 62 static libtextclassifier3::StatusOr< 63 std::unique_ptr<PostingListEmbeddingHitAccessor>> 64 CreateFromExisting(FlashIndexStorage* storage, 65 PostingListEmbeddingHitSerializer* serializer, 66 PostingListIdentifier existing_posting_list_id); 67 GetSerializer()68 PostingListSerializer* GetSerializer() override { return serializer_; } 69 70 // Retrieve the next batch of hits for the posting list chain 71 // 72 // RETURNS: 73 // - On success, a vector of hits in the posting list chain 74 // - INTERNAL if called on an instance of PostingListEmbeddingHitAccessor 75 // that was created via PostingListEmbeddingHitAccessor::Create, if unable 76 // to read the next posting list in the chain or if the posting list has 77 // been corrupted somehow. 78 libtextclassifier3::StatusOr<std::vector<EmbeddingHit>> GetNextHitsBatch(); 79 80 // Prepend one hit. This may result in flushing the posting list to disk (if 81 // the PostingListEmbeddingHitAccessor holds a max-sized posting list that is 82 // full) or freeing a pre-existing posting list if it is too small to fit all 83 // hits necessary. 84 // 85 // RETURNS: 86 // - OK, on success 87 // - INVALID_ARGUMENT if !hit.is_valid() or if hit is not less than the 88 // previously added hit. 89 // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new 90 // posting list. 91 libtextclassifier3::Status PrependHit(const EmbeddingHit& hit); 92 93 private: PostingListEmbeddingHitAccessor(FlashIndexStorage * storage,PostingListEmbeddingHitSerializer * serializer,PostingListUsed in_memory_posting_list)94 explicit PostingListEmbeddingHitAccessor( 95 FlashIndexStorage* storage, PostingListEmbeddingHitSerializer* serializer, 96 PostingListUsed in_memory_posting_list) 97 : PostingListAccessor(storage, std::move(in_memory_posting_list)), 98 serializer_(serializer) {} 99 100 PostingListEmbeddingHitSerializer* serializer_; // Does not own. 101 }; 102 103 } // namespace lib 104 } // namespace icing 105 106 #endif // ICING_INDEX_EMBED_POSTING_LIST_EMBEDDING_HIT_ACCESSOR_H_ 107