1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_DATA_INDEXING_HANDLER_H_ 16 #define ICING_INDEX_DATA_INDEXING_HANDLER_H_ 17 18 #include "icing/text_classifier/lib3/utils/base/status.h" 19 #include "icing/proto/logging.pb.h" 20 #include "icing/store/document-id.h" 21 #include "icing/util/clock.h" 22 #include "icing/util/tokenized-document.h" 23 24 namespace icing { 25 namespace lib { 26 27 // Parent class for indexing different types of data in TokenizedDocument. 28 class DataIndexingHandler { 29 public: DataIndexingHandler(const Clock * clock)30 explicit DataIndexingHandler(const Clock* clock) : clock_(*clock) {} 31 32 virtual ~DataIndexingHandler() = default; 33 34 // Handles the indexing process: add data into the specific type index (e.g. 35 // term index, integer index, qualified id type joinable index, embedding 36 // index) for all contents in the corresponding type of data in 37 // tokenized_document. For example, IntegerSectionIndexingHandler::Handle 38 // should add data into integer index for all contents in 39 // tokenized_document.integer_sections. 40 // 41 // old_document_id is provided. If valid, then it means the document with 42 // the same (namespace, uri) exists previously, and it is updated with new 43 // contents at this round. Each indexing handler should decide whether 44 // migrating existing data from old_document_id to (new) document_id according 45 // to each index's data logic. 46 // 47 // Also it should handle last added DocumentId properly (based on 48 // recovery_mode_) to avoid adding previously indexed documents. 49 // 50 // tokenized_document: document object with different types of tokenized data. 51 // document_id: id of the document. 52 // old_document_id: id of the document before the update. If it is a new 53 // document, then it will be kInvalidDocumentId. 54 // recovery_mode: decides how to handle document_id <= 55 // last_added_document_id. If in recovery_mode, then 56 // Handle() will simply return OK immediately. Otherwise, 57 // returns INVALID_ARGUMENT_ERROR. 58 // put_document_stats: object for collecting stats during indexing. It can be 59 // nullptr. 60 // 61 /// Returns: 62 // - OK on success. 63 // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less 64 // than or equal to the document_id of a previously indexed document in 65 // non recovery mode. 66 // - Any other errors. It depends on each implementation. 67 virtual libtextclassifier3::Status Handle( 68 const TokenizedDocument& tokenized_document, DocumentId document_id, 69 DocumentId old_document_id, bool recovery_mode, 70 PutDocumentStatsProto* put_document_stats) = 0; 71 72 protected: 73 const Clock& clock_; // Does not own. 74 }; 75 76 } // namespace lib 77 } // namespace icing 78 79 #endif // ICING_INDEX_DATA_INDEXING_HANDLER_H_ 80