1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/schema/schema-property-iterator.h"
16
17 #include <algorithm>
18 #include <string>
19 #include <unordered_set>
20 #include <utility>
21 #include <vector>
22
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/proto/schema.pb.h"
27 #include "icing/schema/property-util.h"
28
29 namespace icing {
30 namespace lib {
31
Advance()32 libtextclassifier3::Status SchemaPropertyIterator::Advance() {
33 while (!levels_.empty()) {
34 if (!levels_.back().Advance()) {
35 // When finishing iterating all properties of the current level, pop it
36 // from the stack (levels_), return to the previous level and resume the
37 // iteration.
38 parent_type_config_names_.erase(
39 parent_type_config_names_.find(levels_.back().GetSchemaTypeName()));
40 levels_.pop_back();
41 continue;
42 }
43
44 const PropertyConfigProto& curr_property_config =
45 levels_.back().GetCurrentPropertyConfig();
46 std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
47
48 // Iterate through the sorted_top_level_indexable_nested_properties_ in
49 // order until we find the first element that is >= curr_property_path.
50 while (current_top_level_indexable_nested_properties_idx_ <
51 sorted_top_level_indexable_nested_properties_.size() &&
52 sorted_top_level_indexable_nested_properties_.at(
53 current_top_level_indexable_nested_properties_idx_) <
54 curr_property_path) {
55 // If an element in sorted_top_level_indexable_nested_properties_ < the
56 // current property path, it means that we've already iterated past the
57 // possible position for it without seeing it.
58 // It's not a valid property path in our schema definition. Add it to
59 // unknown_indexable_nested_properties_ and advance
60 // current_top_level_indexable_nested_properties_idx_.
61 unknown_indexable_nested_property_paths_.push_back(
62 sorted_top_level_indexable_nested_properties_.at(
63 current_top_level_indexable_nested_properties_idx_));
64 ++current_top_level_indexable_nested_properties_idx_;
65 }
66
67 if (curr_property_config.data_type() !=
68 PropertyConfigProto::DataType::DOCUMENT) {
69 // We've advanced to a leaf property.
70 // Set whether this property is indexable according to its level's
71 // indexable config. If this property is declared in
72 // indexable_nested_properties_list of the top-level schema, it is also
73 // nested indexable.
74 std::string* current_indexable_nested_prop =
75 current_top_level_indexable_nested_properties_idx_ <
76 sorted_top_level_indexable_nested_properties_.size()
77 ? &sorted_top_level_indexable_nested_properties_.at(
78 current_top_level_indexable_nested_properties_idx_)
79 : nullptr;
80 if (current_indexable_nested_prop == nullptr ||
81 *current_indexable_nested_prop > curr_property_path) {
82 // Current property is not in the indexable list. Set it as indexable if
83 // its schema level is indexable AND it is an indexable property.
84 bool is_property_indexable =
85 levels_.back().GetLevelNestedIndexable() &&
86 SchemaUtil::IsIndexedProperty(curr_property_config);
87 levels_.back().SetCurrentPropertyIndexable(is_property_indexable);
88 } else if (*current_indexable_nested_prop == curr_property_path) {
89 // Current property is in the indexable list. Set its indexable config
90 // to true. This property will consume a sectionId regardless of whether
91 // or not it is actually indexable.
92 levels_.back().SetCurrentPropertyIndexable(true);
93 ++current_top_level_indexable_nested_properties_idx_;
94 }
95 return libtextclassifier3::Status::OK;
96 }
97
98 // - When advancing to a TYPE_DOCUMENT property, it means it is a nested
99 // schema and we need to traverse the next level. Look up SchemaTypeConfig
100 // (by the schema name) by type_config_map_, and push a new level into
101 // levels_.
102 // - Each level has to record the index of property it is currently at, so
103 // we can resume the iteration when returning back to it. Also other
104 // essential info will be maintained in LevelInfo as well.
105 auto nested_type_config_iter =
106 type_config_map_.find(curr_property_config.schema_type());
107 if (nested_type_config_iter == type_config_map_.end()) {
108 // This should never happen because our schema should already be
109 // validated by this point.
110 return absl_ports::NotFoundError(absl_ports::StrCat(
111 "Type config not found: ", curr_property_config.schema_type()));
112 }
113 const SchemaTypeConfigProto& nested_type_config =
114 nested_type_config_iter->second;
115
116 if (levels_.back().GetLevelNestedIndexable()) {
117 // We should set sorted_top_level_indexable_nested_properties_ to the list
118 // defined by the current level.
119 // GetLevelNestedIndexable() is true either because:
120 // 1. We're looking at a document property of the top-level schema --
121 // The first LevelInfo for the iterator is initialized with
122 // all_nested_properties_indexable_ = true.
123 // 2. All previous levels set index_nested_properties = true:
124 // This indicates that upper-level schema types want to follow nested
125 // properties definition of its document subtypes. If this is the first
126 // subtype level that defines a list, we should set it as
127 // top_level_indexable_nested_properties_ for the current top-level
128 // schema.
129 sorted_top_level_indexable_nested_properties_.clear();
130 sorted_top_level_indexable_nested_properties_.reserve(
131 curr_property_config.document_indexing_config()
132 .indexable_nested_properties_list()
133 .size());
134 for (const std::string& property :
135 curr_property_config.document_indexing_config()
136 .indexable_nested_properties_list()) {
137 // Concat the current property name to each property to get the full
138 // property path expression for each indexable nested property.
139 sorted_top_level_indexable_nested_properties_.push_back(
140 property_util::ConcatenatePropertyPathExpr(curr_property_path,
141 property));
142 }
143 current_top_level_indexable_nested_properties_idx_ = 0;
144 // Sort elements and dedupe
145 std::sort(sorted_top_level_indexable_nested_properties_.begin(),
146 sorted_top_level_indexable_nested_properties_.end());
147 auto last =
148 std::unique(sorted_top_level_indexable_nested_properties_.begin(),
149 sorted_top_level_indexable_nested_properties_.end());
150 sorted_top_level_indexable_nested_properties_.erase(
151 last, sorted_top_level_indexable_nested_properties_.end());
152 }
153
154 bool is_cycle =
155 parent_type_config_names_.find(nested_type_config.schema_type()) !=
156 parent_type_config_names_.end();
157 bool is_parent_property_path =
158 current_top_level_indexable_nested_properties_idx_ <
159 sorted_top_level_indexable_nested_properties_.size() &&
160 property_util::IsParentPropertyPath(
161 curr_property_path,
162 sorted_top_level_indexable_nested_properties_.at(
163 current_top_level_indexable_nested_properties_idx_));
164 if (is_cycle && !is_parent_property_path) {
165 // Cycle detected. The schema definition is guaranteed to be valid here
166 // since it must have already been validated during SchemaUtil::Validate,
167 // which would have rejected any schema with bad cycles.
168 //
169 // There are no properties in the indexable_nested_properties_list that
170 // are a part of this circular reference.
171 // We do not need to iterate this type further so we simply move on to
172 // other properties in the parent type.
173 continue;
174 }
175
176 bool all_nested_properties_indexable =
177 levels_.back().GetLevelNestedIndexable() &&
178 curr_property_config.document_indexing_config()
179 .index_nested_properties();
180 levels_.push_back(LevelInfo(nested_type_config,
181 std::move(curr_property_path),
182 all_nested_properties_indexable));
183 parent_type_config_names_.insert(nested_type_config.schema_type());
184 }
185
186 // Before returning, move all remaining uniterated properties from
187 // sorted_top_level_indexable_nested_properties_ into
188 // unknown_indexable_nested_properties_.
189 std::move(sorted_top_level_indexable_nested_properties_.begin() +
190 current_top_level_indexable_nested_properties_idx_,
191 sorted_top_level_indexable_nested_properties_.end(),
192 std::back_inserter(unknown_indexable_nested_property_paths_));
193
194 return absl_ports::OutOfRangeError("End of iterator");
195 }
196
197 } // namespace lib
198 } // namespace icing
199