1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_ 16 #define ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_ 17 18 #include <cstdint> 19 #include <memory> 20 #include <optional> 21 #include <string> 22 #include <unordered_map> 23 #include <unordered_set> 24 #include <vector> 25 26 #include "icing/text_classifier/lib3/utils/base/status.h" 27 #include "icing/text_classifier/lib3/utils/base/statusor.h" 28 #include "icing/monkey_test/monkey-test-util.h" 29 #include "icing/monkey_test/monkey-tokenized-document.h" 30 #include "icing/proto/document.pb.h" 31 #include "icing/proto/schema.pb.h" 32 #include "icing/proto/search.pb.h" 33 #include "icing/proto/term.pb.h" 34 #include "icing/store/document-id.h" 35 36 namespace icing { 37 namespace lib { 38 39 class InMemoryIcingSearchEngine { 40 public: 41 struct PickDocumentResult { 42 std::string name_space; 43 std::string uri; 44 // document is empty if and only if such (name_space, uri) is not alive 45 // in the in-memory icing. 46 std::optional<DocumentProto> document; 47 }; 48 InMemoryIcingSearchEngine(MonkeyTestRandomEngine * random)49 InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random) : random_(random) {} 50 GetNumAliveDocuments()51 uint32_t GetNumAliveDocuments() const { return existing_doc_ids_.size(); } 52 GetSchema()53 const SchemaProto *GetSchema() const { return schema_.get(); } 54 55 void SetSchema(SchemaProto &&schema); 56 57 // Randomly pick a document from the in-memory Icing for monkey testing. 58 // 59 // p_alive: chance of getting an alive document. 60 // p_all: chance of getting a document that has ever been "Put" before, 61 // including already "Delete"d documents. 62 // p_other: chance of getting a random namespace + uri that has never been 63 // "Put" before. 64 // 65 // p_alive, p_all, and p_other is required to be positive and sum to 1. 66 // Otherwise, they will be normalized to ensure this. 67 // 68 // Returns an instance of PickDocumentResult. 69 PickDocumentResult RandomPickDocument(float p_alive, float p_all, 70 float p_other) const; 71 72 // Puts the document into the in-memory Icing. If the (namespace, uri) pair 73 // already exists, the old document will be overwritten. 74 void Put(const MonkeyTokenizedDocument &document); 75 76 std::unordered_set<std::string> GetAllNamespaces() const; 77 78 // Deletes the Document specified by the given (namespace, uri) pair. 79 // 80 // Returns: 81 // OK on success 82 // NOT_FOUND if no document exists with namespace, uri 83 libtextclassifier3::Status Delete(const std::string &name_space, 84 const std::string &uri); 85 86 // Deletes all Documents belonging to the specified namespace. 87 // 88 // Returns: 89 // The number of deleted documents on success 90 // INTERNAL_ERROR if there are inconsistencies in the in-memory Icing 91 libtextclassifier3::StatusOr<uint32_t> DeleteByNamespace( 92 const std::string &name_space); 93 94 // Deletes all Documents belonging to the specified type 95 // 96 // Returns: 97 // The number of deleted documents on success 98 // INTERNAL_ERROR if there are inconsistencies in the in-memory Icing 99 libtextclassifier3::StatusOr<uint32_t> DeleteBySchemaType( 100 const std::string &schema_type); 101 102 // Deletes all Documents that match the query specified in search_spec. 103 // Check the comments of Search() for the supported query types. 104 // 105 // Returns: 106 // The number of deleted documents on success 107 // INTERNAL_ERROR if there are inconsistencies in the in-memory Icing 108 libtextclassifier3::StatusOr<uint32_t> DeleteByQuery( 109 const SearchSpecProto &search_spec); 110 111 // Retrieves documents according to search_spec. 112 // Currently, only the "query", "term_match_type", "embedding_query_vectors", 113 // and "embedding_query_metric_type" fields are recognized by the in-memory 114 // Icing. 115 // 116 // For term based queries, only single term queries with possible section 117 // restrictions are supported. 118 // 119 // For embedding based queries, only the fixed format of 120 // `semanticSearch(getEmbeddingParameter(0), low, high)` is supported, where 121 // `low` and `high` are floating point numbers that specify the score range. 122 // Section restrictions are also recognized. 123 libtextclassifier3::StatusOr<std::vector<DocumentProto>> Search( 124 const SearchSpecProto &search_spec) const; 125 126 private: 127 // Does not own. 128 MonkeyTestRandomEngine *random_; 129 130 std::vector<MonkeyTokenizedDocument> documents_; 131 std::vector<DocumentId> existing_doc_ids_; 132 // A map from namespaces to uris and then from uris to internal document ids, 133 // which is used for fast lookups. 134 std::unordered_map<std::string, std::unordered_map<std::string, DocumentId>> 135 namespace_uri_docid_map; 136 137 std::unique_ptr<SchemaProto> schema_; 138 // A map that maps from (schema_type, property_name) to the corresponding 139 // PropertyConfigProto. 140 std::unordered_map< 141 std::string, std::unordered_map<std::string, const PropertyConfigProto &>> 142 property_config_map_; 143 144 // Finds and returns the internal document id for the document identified by 145 // the given key (namespace, uri) 146 // 147 // Returns: 148 // The document id found on success 149 // NOT_FOUND if the key doesn't exist or doc has been deleted 150 libtextclassifier3::StatusOr<DocumentId> InternalGet( 151 const std::string &name_space, const std::string &uri) const; 152 153 // A helper method for DeleteByQuery and Search to get matched internal doc 154 // ids. 155 libtextclassifier3::StatusOr<std::vector<DocumentId>> InternalSearch( 156 const SearchSpecProto &search_spec) const; 157 158 libtextclassifier3::StatusOr<const PropertyConfigProto *> GetPropertyConfig( 159 const std::string &schema_type, const std::string &property_name) const; 160 161 struct PropertyIndexInfo { 162 // Whether the property is indexable. 163 bool indexable; 164 // The term match type if the property is of type string. 165 TermMatchType::Code term_match_type = 166 TermMatchType::Code::TermMatchType_Code_UNKNOWN; 167 }; 168 libtextclassifier3::StatusOr<PropertyIndexInfo> GetPropertyIndexInfo( 169 const std::string &schema_type, 170 const MonkeyTokenizedSection §ion) const; 171 172 libtextclassifier3::StatusOr<bool> DoesDocumentMatchQuery( 173 const MonkeyTokenizedDocument &document, 174 const SearchSpecProto &search_spec) const; 175 }; 176 177 } // namespace lib 178 } // namespace icing 179 180 #endif // ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_ 181