1 // Copyright (C) 2024 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_EMBEDDING_INDEXING_HANDLER_H_ 16 #define ICING_INDEX_EMBEDDING_INDEXING_HANDLER_H_ 17 18 #include <memory> 19 20 #include "icing/text_classifier/lib3/utils/base/status.h" 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 #include "icing/index/data-indexing-handler.h" 23 #include "icing/index/embed/embedding-index.h" 24 #include "icing/store/document-id.h" 25 #include "icing/util/clock.h" 26 #include "icing/util/tokenized-document.h" 27 28 namespace icing { 29 namespace lib { 30 31 class EmbeddingIndexingHandler : public DataIndexingHandler { 32 public: 33 ~EmbeddingIndexingHandler() override = default; 34 35 // Creates an EmbeddingIndexingHandler instance which does not take 36 // ownership of any input components. All pointers must refer to valid objects 37 // that outlive the created EmbeddingIndexingHandler instance. 38 // 39 // Returns: 40 // - An EmbeddingIndexingHandler instance on success 41 // - FAILED_PRECONDITION_ERROR if any of the input pointer is null 42 static libtextclassifier3::StatusOr<std::unique_ptr<EmbeddingIndexingHandler>> 43 Create(const Clock* clock, EmbeddingIndex* embedding_index, 44 bool enable_embedding_index); 45 46 // Handles the embedding indexing process: add hits into the embedding index 47 // for all contents in tokenized_document.vector_sections. 48 // 49 // Parameter old_document_id is unused since there is no need to migrate data 50 // from old_document_id to (new) document_id. 51 // 52 // Returns: 53 // - OK on success. 54 // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less 55 // than or equal to the document_id of a previously indexed document in 56 // non recovery mode. 57 // - INTERNAL_ERROR if any other errors occur. 58 // - Any embedding index errors. 59 libtextclassifier3::Status Handle( 60 const TokenizedDocument& tokenized_document, DocumentId document_id, 61 DocumentId /*old_document_id*/ _, bool recovery_mode, 62 PutDocumentStatsProto* put_document_stats) override; 63 64 private: EmbeddingIndexingHandler(const Clock * clock,EmbeddingIndex * embedding_index,bool enable_embedding_index)65 explicit EmbeddingIndexingHandler(const Clock* clock, 66 EmbeddingIndex* embedding_index, 67 bool enable_embedding_index) 68 : DataIndexingHandler(clock), 69 embedding_index_(*embedding_index), 70 enable_embedding_index_(enable_embedding_index) {} 71 72 EmbeddingIndex& embedding_index_; 73 bool enable_embedding_index_; 74 }; 75 76 } // namespace lib 77 } // namespace icing 78 79 #endif // ICING_INDEX_EMBEDDING_INDEXING_HANDLER_H_ 80