xref: /aosp_15_r20/external/icing/icing/monkey_test/in-memory-icing-search-engine.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
16 #define ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <optional>
21 #include <string>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "icing/monkey_test/monkey-test-util.h"
29 #include "icing/monkey_test/monkey-tokenized-document.h"
30 #include "icing/proto/document.pb.h"
31 #include "icing/proto/schema.pb.h"
32 #include "icing/proto/search.pb.h"
33 #include "icing/proto/term.pb.h"
34 #include "icing/store/document-id.h"
35 
36 namespace icing {
37 namespace lib {
38 
39 class InMemoryIcingSearchEngine {
40  public:
41   struct PickDocumentResult {
42     std::string name_space;
43     std::string uri;
44     // document is empty if and only if such (name_space, uri) is not alive
45     // in the in-memory icing.
46     std::optional<DocumentProto> document;
47   };
48 
InMemoryIcingSearchEngine(MonkeyTestRandomEngine * random)49   InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random) : random_(random) {}
50 
GetNumAliveDocuments()51   uint32_t GetNumAliveDocuments() const { return existing_doc_ids_.size(); }
52 
GetSchema()53   const SchemaProto *GetSchema() const { return schema_.get(); }
54 
55   void SetSchema(SchemaProto &&schema);
56 
57   // Randomly pick a document from the in-memory Icing for monkey testing.
58   //
59   // p_alive: chance of getting an alive document.
60   // p_all:   chance of getting a document that has ever been "Put" before,
61   //          including already "Delete"d documents.
62   // p_other: chance of getting a random namespace + uri that has never been
63   //          "Put" before.
64   //
65   //  p_alive, p_all, and p_other is required to be positive and sum to 1.
66   //  Otherwise, they will be normalized to ensure this.
67   //
68   // Returns an instance of PickDocumentResult.
69   PickDocumentResult RandomPickDocument(float p_alive, float p_all,
70                                         float p_other) const;
71 
72   // Puts the document into the in-memory Icing. If the (namespace, uri) pair
73   // already exists, the old document will be overwritten.
74   void Put(const MonkeyTokenizedDocument &document);
75 
76   std::unordered_set<std::string> GetAllNamespaces() const;
77 
78   // Deletes the Document specified by the given (namespace, uri) pair.
79   //
80   // Returns:
81   //   OK on success
82   //   NOT_FOUND if no document exists with namespace, uri
83   libtextclassifier3::Status Delete(const std::string &name_space,
84                                     const std::string &uri);
85 
86   // Deletes all Documents belonging to the specified namespace.
87   //
88   // Returns:
89   //   The number of deleted documents on success
90   //   INTERNAL_ERROR if there are inconsistencies in the in-memory Icing
91   libtextclassifier3::StatusOr<uint32_t> DeleteByNamespace(
92       const std::string &name_space);
93 
94   // Deletes all Documents belonging to the specified type
95   //
96   // Returns:
97   //   The number of deleted documents on success
98   //   INTERNAL_ERROR if there are inconsistencies in the in-memory Icing
99   libtextclassifier3::StatusOr<uint32_t> DeleteBySchemaType(
100       const std::string &schema_type);
101 
102   // Deletes all Documents that match the query specified in search_spec.
103   // Check the comments of Search() for the supported query types.
104   //
105   // Returns:
106   //   The number of deleted documents on success
107   //   INTERNAL_ERROR if there are inconsistencies in the in-memory Icing
108   libtextclassifier3::StatusOr<uint32_t> DeleteByQuery(
109       const SearchSpecProto &search_spec);
110 
111   // Retrieves documents according to search_spec.
112   // Currently, only the "query", "term_match_type", "embedding_query_vectors",
113   // and "embedding_query_metric_type" fields are recognized by the in-memory
114   // Icing.
115   //
116   // For term based queries, only single term queries with possible section
117   // restrictions are supported.
118   //
119   // For embedding based queries, only the fixed format of
120   // `semanticSearch(getEmbeddingParameter(0), low, high)` is supported, where
121   // `low` and `high` are floating point numbers that specify the score range.
122   // Section restrictions are also recognized.
123   libtextclassifier3::StatusOr<std::vector<DocumentProto>> Search(
124       const SearchSpecProto &search_spec) const;
125 
126  private:
127   // Does not own.
128   MonkeyTestRandomEngine *random_;
129 
130   std::vector<MonkeyTokenizedDocument> documents_;
131   std::vector<DocumentId> existing_doc_ids_;
132   // A map from namespaces to uris and then from uris to internal document ids,
133   // which is used for fast lookups.
134   std::unordered_map<std::string, std::unordered_map<std::string, DocumentId>>
135       namespace_uri_docid_map;
136 
137   std::unique_ptr<SchemaProto> schema_;
138   // A map that maps from (schema_type, property_name) to the corresponding
139   // PropertyConfigProto.
140   std::unordered_map<
141       std::string, std::unordered_map<std::string, const PropertyConfigProto &>>
142       property_config_map_;
143 
144   // Finds and returns the internal document id for the document identified by
145   // the given key (namespace, uri)
146   //
147   // Returns:
148   //   The document id found on success
149   //   NOT_FOUND if the key doesn't exist or doc has been deleted
150   libtextclassifier3::StatusOr<DocumentId> InternalGet(
151       const std::string &name_space, const std::string &uri) const;
152 
153   // A helper method for DeleteByQuery and Search to get matched internal doc
154   // ids.
155   libtextclassifier3::StatusOr<std::vector<DocumentId>> InternalSearch(
156       const SearchSpecProto &search_spec) const;
157 
158   libtextclassifier3::StatusOr<const PropertyConfigProto *> GetPropertyConfig(
159       const std::string &schema_type, const std::string &property_name) const;
160 
161   struct PropertyIndexInfo {
162     // Whether the property is indexable.
163     bool indexable;
164     // The term match type if the property is of type string.
165     TermMatchType::Code term_match_type =
166         TermMatchType::Code::TermMatchType_Code_UNKNOWN;
167   };
168   libtextclassifier3::StatusOr<PropertyIndexInfo> GetPropertyIndexInfo(
169       const std::string &schema_type,
170       const MonkeyTokenizedSection &section) const;
171 
172   libtextclassifier3::StatusOr<bool> DoesDocumentMatchQuery(
173       const MonkeyTokenizedDocument &document,
174       const SearchSpecProto &search_spec) const;
175 };
176 
177 }  // namespace lib
178 }  // namespace icing
179 
180 #endif  // ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
181