xref: /aosp_15_r20/external/icing/icing/schema/schema-property-iterator.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
16 #define ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
17 
18 #include <algorithm>
19 #include <numeric>
20 #include <string>
21 #include <string_view>
22 #include <unordered_set>
23 #include <utility>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/proto/schema.pb.h"
28 #include "icing/schema/property-util.h"
29 #include "icing/schema/schema-util.h"
30 
31 namespace icing {
32 namespace lib {
33 
34 // SchemaPropertyIterator: a class for iterating through all properties of a
35 // given SchemaTypeConfigProto in lexicographical order. Only leaf
36 // (non-document-type) properties will be returned, and for document type
37 // properties, the iterator will traverse down to the next nested level of
38 // schema.
39 //
40 // REQUIRED: The schema in which this SchemaTypeConfigProto is defined must have
41 // already passed the validation step during SetSchema.
42 class SchemaPropertyIterator {
43  public:
SchemaPropertyIterator(const SchemaTypeConfigProto & base_schema_type_config,const SchemaUtil::TypeConfigMap & type_config_map)44   explicit SchemaPropertyIterator(
45       const SchemaTypeConfigProto& base_schema_type_config,
46       const SchemaUtil::TypeConfigMap& type_config_map)
47       : type_config_map_(type_config_map) {
48     levels_.push_back(LevelInfo(base_schema_type_config,
49                                 /*base_property_path=*/"",
50                                 /*all_nested_properties_indexable=*/true));
51     parent_type_config_names_.insert(base_schema_type_config.schema_type());
52   }
53 
54   // Gets the current property config.
55   //
56   // REQUIRES: The preceding call for Advance() is OK.
GetCurrentPropertyConfig()57   const PropertyConfigProto& GetCurrentPropertyConfig() const {
58     return levels_.back().GetCurrentPropertyConfig();
59   }
60 
61   // Gets the current property path.
62   //
63   // REQUIRES: The preceding call for Advance() is OK.
GetCurrentPropertyPath()64   std::string GetCurrentPropertyPath() const {
65     return levels_.back().GetCurrentPropertyPath();
66   }
67 
68   // Returns whether the current property is indexable. This would be true if
69   // either the current level is nested indexable, or if the current property is
70   // declared indexable in the indexable_nested_properties_list of the top-level
71   // schema type.
72   //
73   // REQUIRES: The preceding call for Advance() is OK.
GetCurrentPropertyIndexable()74   bool GetCurrentPropertyIndexable() const {
75     return levels_.back().GetCurrentPropertyIndexable();
76   }
77 
78   // Returns whether the current schema level is nested indexable. If this is
79   // true, all properties in the level are indexed.
80   //
81   // REQUIRES: The preceding call for Advance() is OK.
GetLevelNestedIndexable()82   bool GetLevelNestedIndexable() const {
83     return levels_.back().GetLevelNestedIndexable();
84   }
85 
86   // The set of indexable nested properties that are defined in the
87   // indexable_nested_properties_list but are not found in the schema
88   // definition. These properties still consume sectionIds, but will not be
89   // indexed.
unknown_indexable_nested_property_paths()90   const std::vector<std::string>& unknown_indexable_nested_property_paths()
91       const {
92     return unknown_indexable_nested_property_paths_;
93   }
94 
95   // Advances to the next leaf property.
96   //
97   // Returns:
98   //   - OK on success
99   //   - OUT_OF_RANGE_ERROR if there is no more leaf property
100   //   - INVALID_ARGUMENT_ERROR if cycle dependency is detected in the nested
101   //     schema
102   //   - NOT_FOUND_ERROR if any nested schema name is not found in
103   //     type_config_map
104   libtextclassifier3::Status Advance();
105 
106  private:
107   // An inner class for maintaining the iterating state of a (nested) level.
108   // Nested SchemaTypeConfig is a tree structure, so we have to traverse it
109   // recursively to all leaf properties.
110   class LevelInfo {
111    public:
LevelInfo(const SchemaTypeConfigProto & schema_type_config,std::string base_property_path,bool all_nested_properties_indexable)112     explicit LevelInfo(const SchemaTypeConfigProto& schema_type_config,
113                        std::string base_property_path,
114                        bool all_nested_properties_indexable)
115         : schema_type_config_(schema_type_config),
116           base_property_path_(std::move(base_property_path)),
117           sorted_property_indices_(schema_type_config.properties_size()),
118           current_vec_idx_(-1),
119           sorted_property_indexable_(schema_type_config.properties_size()),
120           all_nested_properties_indexable_(all_nested_properties_indexable) {
121       // Index sort property by lexicographical order.
122       std::iota(sorted_property_indices_.begin(),
123                 sorted_property_indices_.end(),
124                 /*value=*/0);
125       std::sort(
126           sorted_property_indices_.begin(), sorted_property_indices_.end(),
127           [&schema_type_config](int lhs_idx, int rhs_idx) -> bool {
128             return schema_type_config.properties(lhs_idx).property_name() <
129                    schema_type_config.properties(rhs_idx).property_name();
130           });
131     }
132 
Advance()133     bool Advance() {
134       return ++current_vec_idx_ < sorted_property_indices_.size();
135     }
136 
GetCurrentPropertyConfig()137     const PropertyConfigProto& GetCurrentPropertyConfig() const {
138       return schema_type_config_.properties(
139           sorted_property_indices_[current_vec_idx_]);
140     }
141 
GetCurrentPropertyPath()142     std::string GetCurrentPropertyPath() const {
143       return property_util::ConcatenatePropertyPathExpr(
144           base_property_path_, GetCurrentPropertyConfig().property_name());
145     }
146 
GetLevelNestedIndexable()147     bool GetLevelNestedIndexable() const {
148       return all_nested_properties_indexable_;
149     }
150 
GetCurrentPropertyIndexable()151     bool GetCurrentPropertyIndexable() const {
152       return sorted_property_indexable_[current_vec_idx_];
153     }
154 
SetCurrentPropertyIndexable(bool indexable)155     void SetCurrentPropertyIndexable(bool indexable) {
156       sorted_property_indexable_[current_vec_idx_] = indexable;
157     }
158 
GetSchemaTypeName()159     std::string_view GetSchemaTypeName() const {
160       return schema_type_config_.schema_type();
161     }
162 
163    private:
164     const SchemaTypeConfigProto& schema_type_config_;  // Does not own
165 
166     // Concatenated property path of all parent levels.
167     std::string base_property_path_;
168 
169     // We perform index sort (comparing property name) in order to iterate all
170     // leaf properties in lexicographical order. This vector is for storing
171     // these sorted indices.
172     std::vector<int> sorted_property_indices_;
173     int current_vec_idx_;
174 
175     // Vector indicating whether each property in the current level is
176     // indexable. We can declare different indexable settings for properties in
177     // the same level using indexable_nested_properties_list.
178     //
179     // Element indices in this vector correspond to property indices in the
180     // sorted order.
181     std::vector<bool> sorted_property_indexable_;
182 
183     // Indicates if all properties in the current level is nested indexable.
184     // This would be true for a level if the document declares
185     // index_nested_properties=true. If any of parent document type
186     // property sets its flag false, then this would be false for all its child
187     // properties.
188     bool all_nested_properties_indexable_;
189   };
190 
191   const SchemaUtil::TypeConfigMap& type_config_map_;  // Does not own
192 
193   // For maintaining the stack of recursive nested schema type traversal. We use
194   // std::vector instead of std::stack to avoid memory allocate and free too
195   // frequently.
196   std::vector<LevelInfo> levels_;
197 
198   // Maintaining all traversed parent schema type config names of the current
199   // stack (levels_). It is used to detect nested schema cycle dependency.
200   std::unordered_multiset<std::string_view> parent_type_config_names_;
201 
202   // Sorted list of indexable nested properties for the top-level schema.
203   std::vector<std::string> sorted_top_level_indexable_nested_properties_;
204 
205   // Current iteration index in the sorted_top_level_indexable_nested_properties
206   // list.
207   int current_top_level_indexable_nested_properties_idx_ = 0;
208 
209   // Vector of indexable nested properties defined in the
210   // indexable_nested_properties_list, but not found in the schema definition.
211   // These properties still consume sectionIds, but will not be indexed.
212   // Properties are inserted into this vector in sorted order.
213   //
214   // TODO(b/289152024): Implement support for indexing these properties if they
215   // are in the child types of polymorphic nested properties.
216   std::vector<std::string> unknown_indexable_nested_property_paths_;
217 };
218 
219 }  // namespace lib
220 }  // namespace icing
221 
222 #endif  // ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_
223