xref: /aosp_15_r20/external/icing/icing/index/embedding-indexing-handler.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_EMBEDDING_INDEXING_HANDLER_H_
16 #define ICING_INDEX_EMBEDDING_INDEXING_HANDLER_H_
17 
18 #include <memory>
19 
20 #include "icing/text_classifier/lib3/utils/base/status.h"
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/index/data-indexing-handler.h"
23 #include "icing/index/embed/embedding-index.h"
24 #include "icing/store/document-id.h"
25 #include "icing/util/clock.h"
26 #include "icing/util/tokenized-document.h"
27 
28 namespace icing {
29 namespace lib {
30 
31 class EmbeddingIndexingHandler : public DataIndexingHandler {
32  public:
33   ~EmbeddingIndexingHandler() override = default;
34 
35   // Creates an EmbeddingIndexingHandler instance which does not take
36   // ownership of any input components. All pointers must refer to valid objects
37   // that outlive the created EmbeddingIndexingHandler instance.
38   //
39   // Returns:
40   //   - An EmbeddingIndexingHandler instance on success
41   //   - FAILED_PRECONDITION_ERROR if any of the input pointer is null
42   static libtextclassifier3::StatusOr<std::unique_ptr<EmbeddingIndexingHandler>>
43   Create(const Clock* clock, EmbeddingIndex* embedding_index,
44          bool enable_embedding_index);
45 
46   // Handles the embedding indexing process: add hits into the embedding index
47   // for all contents in tokenized_document.vector_sections.
48   //
49   // Parameter old_document_id is unused since there is no need to migrate data
50   // from old_document_id to (new) document_id.
51   //
52   // Returns:
53   //   - OK on success.
54   //   - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
55   //     than or equal to the document_id of a previously indexed document in
56   //     non recovery mode.
57   //   - INTERNAL_ERROR if any other errors occur.
58   //   - Any embedding index errors.
59   libtextclassifier3::Status Handle(
60       const TokenizedDocument& tokenized_document, DocumentId document_id,
61       DocumentId /*old_document_id*/ _, bool recovery_mode,
62       PutDocumentStatsProto* put_document_stats) override;
63 
64  private:
EmbeddingIndexingHandler(const Clock * clock,EmbeddingIndex * embedding_index,bool enable_embedding_index)65   explicit EmbeddingIndexingHandler(const Clock* clock,
66                                     EmbeddingIndex* embedding_index,
67                                     bool enable_embedding_index)
68       : DataIndexingHandler(clock),
69         embedding_index_(*embedding_index),
70         enable_embedding_index_(enable_embedding_index) {}
71 
72   EmbeddingIndex& embedding_index_;
73   bool enable_embedding_index_;
74 };
75 
76 }  // namespace lib
77 }  // namespace icing
78 
79 #endif  // ICING_INDEX_EMBEDDING_INDEXING_HANDLER_H_
80