1 // Copyright (C) 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_ 16 #define ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_ 17 18 #include <algorithm> 19 #include <numeric> 20 #include <string> 21 #include <string_view> 22 #include <unordered_set> 23 #include <utility> 24 #include <vector> 25 26 #include "icing/text_classifier/lib3/utils/base/status.h" 27 #include "icing/proto/schema.pb.h" 28 #include "icing/schema/property-util.h" 29 #include "icing/schema/schema-util.h" 30 31 namespace icing { 32 namespace lib { 33 34 // SchemaPropertyIterator: a class for iterating through all properties of a 35 // given SchemaTypeConfigProto in lexicographical order. Only leaf 36 // (non-document-type) properties will be returned, and for document type 37 // properties, the iterator will traverse down to the next nested level of 38 // schema. 39 // 40 // REQUIRED: The schema in which this SchemaTypeConfigProto is defined must have 41 // already passed the validation step during SetSchema. 42 class SchemaPropertyIterator { 43 public: SchemaPropertyIterator(const SchemaTypeConfigProto & base_schema_type_config,const SchemaUtil::TypeConfigMap & type_config_map)44 explicit SchemaPropertyIterator( 45 const SchemaTypeConfigProto& base_schema_type_config, 46 const SchemaUtil::TypeConfigMap& type_config_map) 47 : type_config_map_(type_config_map) { 48 levels_.push_back(LevelInfo(base_schema_type_config, 49 /*base_property_path=*/"", 50 /*all_nested_properties_indexable=*/true)); 51 parent_type_config_names_.insert(base_schema_type_config.schema_type()); 52 } 53 54 // Gets the current property config. 55 // 56 // REQUIRES: The preceding call for Advance() is OK. GetCurrentPropertyConfig()57 const PropertyConfigProto& GetCurrentPropertyConfig() const { 58 return levels_.back().GetCurrentPropertyConfig(); 59 } 60 61 // Gets the current property path. 62 // 63 // REQUIRES: The preceding call for Advance() is OK. GetCurrentPropertyPath()64 std::string GetCurrentPropertyPath() const { 65 return levels_.back().GetCurrentPropertyPath(); 66 } 67 68 // Returns whether the current property is indexable. This would be true if 69 // either the current level is nested indexable, or if the current property is 70 // declared indexable in the indexable_nested_properties_list of the top-level 71 // schema type. 72 // 73 // REQUIRES: The preceding call for Advance() is OK. GetCurrentPropertyIndexable()74 bool GetCurrentPropertyIndexable() const { 75 return levels_.back().GetCurrentPropertyIndexable(); 76 } 77 78 // Returns whether the current schema level is nested indexable. If this is 79 // true, all properties in the level are indexed. 80 // 81 // REQUIRES: The preceding call for Advance() is OK. GetLevelNestedIndexable()82 bool GetLevelNestedIndexable() const { 83 return levels_.back().GetLevelNestedIndexable(); 84 } 85 86 // The set of indexable nested properties that are defined in the 87 // indexable_nested_properties_list but are not found in the schema 88 // definition. These properties still consume sectionIds, but will not be 89 // indexed. unknown_indexable_nested_property_paths()90 const std::vector<std::string>& unknown_indexable_nested_property_paths() 91 const { 92 return unknown_indexable_nested_property_paths_; 93 } 94 95 // Advances to the next leaf property. 96 // 97 // Returns: 98 // - OK on success 99 // - OUT_OF_RANGE_ERROR if there is no more leaf property 100 // - INVALID_ARGUMENT_ERROR if cycle dependency is detected in the nested 101 // schema 102 // - NOT_FOUND_ERROR if any nested schema name is not found in 103 // type_config_map 104 libtextclassifier3::Status Advance(); 105 106 private: 107 // An inner class for maintaining the iterating state of a (nested) level. 108 // Nested SchemaTypeConfig is a tree structure, so we have to traverse it 109 // recursively to all leaf properties. 110 class LevelInfo { 111 public: LevelInfo(const SchemaTypeConfigProto & schema_type_config,std::string base_property_path,bool all_nested_properties_indexable)112 explicit LevelInfo(const SchemaTypeConfigProto& schema_type_config, 113 std::string base_property_path, 114 bool all_nested_properties_indexable) 115 : schema_type_config_(schema_type_config), 116 base_property_path_(std::move(base_property_path)), 117 sorted_property_indices_(schema_type_config.properties_size()), 118 current_vec_idx_(-1), 119 sorted_property_indexable_(schema_type_config.properties_size()), 120 all_nested_properties_indexable_(all_nested_properties_indexable) { 121 // Index sort property by lexicographical order. 122 std::iota(sorted_property_indices_.begin(), 123 sorted_property_indices_.end(), 124 /*value=*/0); 125 std::sort( 126 sorted_property_indices_.begin(), sorted_property_indices_.end(), 127 [&schema_type_config](int lhs_idx, int rhs_idx) -> bool { 128 return schema_type_config.properties(lhs_idx).property_name() < 129 schema_type_config.properties(rhs_idx).property_name(); 130 }); 131 } 132 Advance()133 bool Advance() { 134 return ++current_vec_idx_ < sorted_property_indices_.size(); 135 } 136 GetCurrentPropertyConfig()137 const PropertyConfigProto& GetCurrentPropertyConfig() const { 138 return schema_type_config_.properties( 139 sorted_property_indices_[current_vec_idx_]); 140 } 141 GetCurrentPropertyPath()142 std::string GetCurrentPropertyPath() const { 143 return property_util::ConcatenatePropertyPathExpr( 144 base_property_path_, GetCurrentPropertyConfig().property_name()); 145 } 146 GetLevelNestedIndexable()147 bool GetLevelNestedIndexable() const { 148 return all_nested_properties_indexable_; 149 } 150 GetCurrentPropertyIndexable()151 bool GetCurrentPropertyIndexable() const { 152 return sorted_property_indexable_[current_vec_idx_]; 153 } 154 SetCurrentPropertyIndexable(bool indexable)155 void SetCurrentPropertyIndexable(bool indexable) { 156 sorted_property_indexable_[current_vec_idx_] = indexable; 157 } 158 GetSchemaTypeName()159 std::string_view GetSchemaTypeName() const { 160 return schema_type_config_.schema_type(); 161 } 162 163 private: 164 const SchemaTypeConfigProto& schema_type_config_; // Does not own 165 166 // Concatenated property path of all parent levels. 167 std::string base_property_path_; 168 169 // We perform index sort (comparing property name) in order to iterate all 170 // leaf properties in lexicographical order. This vector is for storing 171 // these sorted indices. 172 std::vector<int> sorted_property_indices_; 173 int current_vec_idx_; 174 175 // Vector indicating whether each property in the current level is 176 // indexable. We can declare different indexable settings for properties in 177 // the same level using indexable_nested_properties_list. 178 // 179 // Element indices in this vector correspond to property indices in the 180 // sorted order. 181 std::vector<bool> sorted_property_indexable_; 182 183 // Indicates if all properties in the current level is nested indexable. 184 // This would be true for a level if the document declares 185 // index_nested_properties=true. If any of parent document type 186 // property sets its flag false, then this would be false for all its child 187 // properties. 188 bool all_nested_properties_indexable_; 189 }; 190 191 const SchemaUtil::TypeConfigMap& type_config_map_; // Does not own 192 193 // For maintaining the stack of recursive nested schema type traversal. We use 194 // std::vector instead of std::stack to avoid memory allocate and free too 195 // frequently. 196 std::vector<LevelInfo> levels_; 197 198 // Maintaining all traversed parent schema type config names of the current 199 // stack (levels_). It is used to detect nested schema cycle dependency. 200 std::unordered_multiset<std::string_view> parent_type_config_names_; 201 202 // Sorted list of indexable nested properties for the top-level schema. 203 std::vector<std::string> sorted_top_level_indexable_nested_properties_; 204 205 // Current iteration index in the sorted_top_level_indexable_nested_properties 206 // list. 207 int current_top_level_indexable_nested_properties_idx_ = 0; 208 209 // Vector of indexable nested properties defined in the 210 // indexable_nested_properties_list, but not found in the schema definition. 211 // These properties still consume sectionIds, but will not be indexed. 212 // Properties are inserted into this vector in sorted order. 213 // 214 // TODO(b/289152024): Implement support for indexing these properties if they 215 // are in the child types of polymorphic nested properties. 216 std::vector<std::string> unknown_indexable_nested_property_paths_; 217 }; 218 219 } // namespace lib 220 } // namespace icing 221 222 #endif // ICING_SCHEMA_SCHEMA_PROPERTY_ITERATOR_H_ 223