xref: /aosp_15_r20/external/icing/icing/index/data-indexing-handler.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_DATA_INDEXING_HANDLER_H_
16 #define ICING_INDEX_DATA_INDEXING_HANDLER_H_
17 
18 #include "icing/text_classifier/lib3/utils/base/status.h"
19 #include "icing/proto/logging.pb.h"
20 #include "icing/store/document-id.h"
21 #include "icing/util/clock.h"
22 #include "icing/util/tokenized-document.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 // Parent class for indexing different types of data in TokenizedDocument.
28 class DataIndexingHandler {
29  public:
DataIndexingHandler(const Clock * clock)30   explicit DataIndexingHandler(const Clock* clock) : clock_(*clock) {}
31 
32   virtual ~DataIndexingHandler() = default;
33 
34   // Handles the indexing process: add data into the specific type index (e.g.
35   // term index, integer index, qualified id type joinable index, embedding
36   // index) for all contents in the corresponding type of data in
37   // tokenized_document. For example, IntegerSectionIndexingHandler::Handle
38   // should add data into integer index for all contents in
39   // tokenized_document.integer_sections.
40   //
41   // old_document_id is provided. If valid, then it means the document with
42   // the same (namespace, uri) exists previously, and it is updated with new
43   // contents at this round. Each indexing handler should decide whether
44   // migrating existing data from old_document_id to (new) document_id according
45   // to each index's data logic.
46   //
47   // Also it should handle last added DocumentId properly (based on
48   // recovery_mode_) to avoid adding previously indexed documents.
49   //
50   // tokenized_document: document object with different types of tokenized data.
51   // document_id:        id of the document.
52   // old_document_id:    id of the document before the update. If it is a new
53   //                     document, then it will be kInvalidDocumentId.
54   // recovery_mode:      decides how to handle document_id <=
55   //                     last_added_document_id. If in recovery_mode, then
56   //                     Handle() will simply return OK immediately. Otherwise,
57   //                     returns INVALID_ARGUMENT_ERROR.
58   // put_document_stats: object for collecting stats during indexing. It can be
59   //                     nullptr.
60   //
61   /// Returns:
62   //   - OK on success.
63   //   - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less
64   //     than or equal to the document_id of a previously indexed document in
65   //     non recovery mode.
66   //   - Any other errors. It depends on each implementation.
67   virtual libtextclassifier3::Status Handle(
68       const TokenizedDocument& tokenized_document, DocumentId document_id,
69       DocumentId old_document_id, bool recovery_mode,
70       PutDocumentStatsProto* put_document_stats) = 0;
71 
72  protected:
73   const Clock& clock_;  // Does not own.
74 };
75 
76 }  // namespace lib
77 }  // namespace icing
78 
79 #endif  // ICING_INDEX_DATA_INDEXING_HANDLER_H_
80