1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2023 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker // http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker
15*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/schema-property-iterator.h"
16*8b6cd535SAndroid Build Coastguard Worker
17*8b6cd535SAndroid Build Coastguard Worker #include <algorithm>
18*8b6cd535SAndroid Build Coastguard Worker #include <string>
19*8b6cd535SAndroid Build Coastguard Worker #include <unordered_set>
20*8b6cd535SAndroid Build Coastguard Worker #include <utility>
21*8b6cd535SAndroid Build Coastguard Worker #include <vector>
22*8b6cd535SAndroid Build Coastguard Worker
23*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/status.h"
24*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/canonical_errors.h"
25*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/str_cat.h"
26*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/schema.pb.h"
27*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/property-util.h"
28*8b6cd535SAndroid Build Coastguard Worker
29*8b6cd535SAndroid Build Coastguard Worker namespace icing {
30*8b6cd535SAndroid Build Coastguard Worker namespace lib {
31*8b6cd535SAndroid Build Coastguard Worker
Advance()32*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status SchemaPropertyIterator::Advance() {
33*8b6cd535SAndroid Build Coastguard Worker while (!levels_.empty()) {
34*8b6cd535SAndroid Build Coastguard Worker if (!levels_.back().Advance()) {
35*8b6cd535SAndroid Build Coastguard Worker // When finishing iterating all properties of the current level, pop it
36*8b6cd535SAndroid Build Coastguard Worker // from the stack (levels_), return to the previous level and resume the
37*8b6cd535SAndroid Build Coastguard Worker // iteration.
38*8b6cd535SAndroid Build Coastguard Worker parent_type_config_names_.erase(
39*8b6cd535SAndroid Build Coastguard Worker parent_type_config_names_.find(levels_.back().GetSchemaTypeName()));
40*8b6cd535SAndroid Build Coastguard Worker levels_.pop_back();
41*8b6cd535SAndroid Build Coastguard Worker continue;
42*8b6cd535SAndroid Build Coastguard Worker }
43*8b6cd535SAndroid Build Coastguard Worker
44*8b6cd535SAndroid Build Coastguard Worker const PropertyConfigProto& curr_property_config =
45*8b6cd535SAndroid Build Coastguard Worker levels_.back().GetCurrentPropertyConfig();
46*8b6cd535SAndroid Build Coastguard Worker std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
47*8b6cd535SAndroid Build Coastguard Worker
48*8b6cd535SAndroid Build Coastguard Worker // Iterate through the sorted_top_level_indexable_nested_properties_ in
49*8b6cd535SAndroid Build Coastguard Worker // order until we find the first element that is >= curr_property_path.
50*8b6cd535SAndroid Build Coastguard Worker while (current_top_level_indexable_nested_properties_idx_ <
51*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.size() &&
52*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.at(
53*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_) <
54*8b6cd535SAndroid Build Coastguard Worker curr_property_path) {
55*8b6cd535SAndroid Build Coastguard Worker // If an element in sorted_top_level_indexable_nested_properties_ < the
56*8b6cd535SAndroid Build Coastguard Worker // current property path, it means that we've already iterated past the
57*8b6cd535SAndroid Build Coastguard Worker // possible position for it without seeing it.
58*8b6cd535SAndroid Build Coastguard Worker // It's not a valid property path in our schema definition. Add it to
59*8b6cd535SAndroid Build Coastguard Worker // unknown_indexable_nested_properties_ and advance
60*8b6cd535SAndroid Build Coastguard Worker // current_top_level_indexable_nested_properties_idx_.
61*8b6cd535SAndroid Build Coastguard Worker unknown_indexable_nested_property_paths_.push_back(
62*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.at(
63*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_));
64*8b6cd535SAndroid Build Coastguard Worker ++current_top_level_indexable_nested_properties_idx_;
65*8b6cd535SAndroid Build Coastguard Worker }
66*8b6cd535SAndroid Build Coastguard Worker
67*8b6cd535SAndroid Build Coastguard Worker if (curr_property_config.data_type() !=
68*8b6cd535SAndroid Build Coastguard Worker PropertyConfigProto::DataType::DOCUMENT) {
69*8b6cd535SAndroid Build Coastguard Worker // We've advanced to a leaf property.
70*8b6cd535SAndroid Build Coastguard Worker // Set whether this property is indexable according to its level's
71*8b6cd535SAndroid Build Coastguard Worker // indexable config. If this property is declared in
72*8b6cd535SAndroid Build Coastguard Worker // indexable_nested_properties_list of the top-level schema, it is also
73*8b6cd535SAndroid Build Coastguard Worker // nested indexable.
74*8b6cd535SAndroid Build Coastguard Worker std::string* current_indexable_nested_prop =
75*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_ <
76*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.size()
77*8b6cd535SAndroid Build Coastguard Worker ? &sorted_top_level_indexable_nested_properties_.at(
78*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_)
79*8b6cd535SAndroid Build Coastguard Worker : nullptr;
80*8b6cd535SAndroid Build Coastguard Worker if (current_indexable_nested_prop == nullptr ||
81*8b6cd535SAndroid Build Coastguard Worker *current_indexable_nested_prop > curr_property_path) {
82*8b6cd535SAndroid Build Coastguard Worker // Current property is not in the indexable list. Set it as indexable if
83*8b6cd535SAndroid Build Coastguard Worker // its schema level is indexable AND it is an indexable property.
84*8b6cd535SAndroid Build Coastguard Worker bool is_property_indexable =
85*8b6cd535SAndroid Build Coastguard Worker levels_.back().GetLevelNestedIndexable() &&
86*8b6cd535SAndroid Build Coastguard Worker SchemaUtil::IsIndexedProperty(curr_property_config);
87*8b6cd535SAndroid Build Coastguard Worker levels_.back().SetCurrentPropertyIndexable(is_property_indexable);
88*8b6cd535SAndroid Build Coastguard Worker } else if (*current_indexable_nested_prop == curr_property_path) {
89*8b6cd535SAndroid Build Coastguard Worker // Current property is in the indexable list. Set its indexable config
90*8b6cd535SAndroid Build Coastguard Worker // to true. This property will consume a sectionId regardless of whether
91*8b6cd535SAndroid Build Coastguard Worker // or not it is actually indexable.
92*8b6cd535SAndroid Build Coastguard Worker levels_.back().SetCurrentPropertyIndexable(true);
93*8b6cd535SAndroid Build Coastguard Worker ++current_top_level_indexable_nested_properties_idx_;
94*8b6cd535SAndroid Build Coastguard Worker }
95*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK;
96*8b6cd535SAndroid Build Coastguard Worker }
97*8b6cd535SAndroid Build Coastguard Worker
98*8b6cd535SAndroid Build Coastguard Worker // - When advancing to a TYPE_DOCUMENT property, it means it is a nested
99*8b6cd535SAndroid Build Coastguard Worker // schema and we need to traverse the next level. Look up SchemaTypeConfig
100*8b6cd535SAndroid Build Coastguard Worker // (by the schema name) by type_config_map_, and push a new level into
101*8b6cd535SAndroid Build Coastguard Worker // levels_.
102*8b6cd535SAndroid Build Coastguard Worker // - Each level has to record the index of property it is currently at, so
103*8b6cd535SAndroid Build Coastguard Worker // we can resume the iteration when returning back to it. Also other
104*8b6cd535SAndroid Build Coastguard Worker // essential info will be maintained in LevelInfo as well.
105*8b6cd535SAndroid Build Coastguard Worker auto nested_type_config_iter =
106*8b6cd535SAndroid Build Coastguard Worker type_config_map_.find(curr_property_config.schema_type());
107*8b6cd535SAndroid Build Coastguard Worker if (nested_type_config_iter == type_config_map_.end()) {
108*8b6cd535SAndroid Build Coastguard Worker // This should never happen because our schema should already be
109*8b6cd535SAndroid Build Coastguard Worker // validated by this point.
110*8b6cd535SAndroid Build Coastguard Worker return absl_ports::NotFoundError(absl_ports::StrCat(
111*8b6cd535SAndroid Build Coastguard Worker "Type config not found: ", curr_property_config.schema_type()));
112*8b6cd535SAndroid Build Coastguard Worker }
113*8b6cd535SAndroid Build Coastguard Worker const SchemaTypeConfigProto& nested_type_config =
114*8b6cd535SAndroid Build Coastguard Worker nested_type_config_iter->second;
115*8b6cd535SAndroid Build Coastguard Worker
116*8b6cd535SAndroid Build Coastguard Worker if (levels_.back().GetLevelNestedIndexable()) {
117*8b6cd535SAndroid Build Coastguard Worker // We should set sorted_top_level_indexable_nested_properties_ to the list
118*8b6cd535SAndroid Build Coastguard Worker // defined by the current level.
119*8b6cd535SAndroid Build Coastguard Worker // GetLevelNestedIndexable() is true either because:
120*8b6cd535SAndroid Build Coastguard Worker // 1. We're looking at a document property of the top-level schema --
121*8b6cd535SAndroid Build Coastguard Worker // The first LevelInfo for the iterator is initialized with
122*8b6cd535SAndroid Build Coastguard Worker // all_nested_properties_indexable_ = true.
123*8b6cd535SAndroid Build Coastguard Worker // 2. All previous levels set index_nested_properties = true:
124*8b6cd535SAndroid Build Coastguard Worker // This indicates that upper-level schema types want to follow nested
125*8b6cd535SAndroid Build Coastguard Worker // properties definition of its document subtypes. If this is the first
126*8b6cd535SAndroid Build Coastguard Worker // subtype level that defines a list, we should set it as
127*8b6cd535SAndroid Build Coastguard Worker // top_level_indexable_nested_properties_ for the current top-level
128*8b6cd535SAndroid Build Coastguard Worker // schema.
129*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.clear();
130*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.reserve(
131*8b6cd535SAndroid Build Coastguard Worker curr_property_config.document_indexing_config()
132*8b6cd535SAndroid Build Coastguard Worker .indexable_nested_properties_list()
133*8b6cd535SAndroid Build Coastguard Worker .size());
134*8b6cd535SAndroid Build Coastguard Worker for (const std::string& property :
135*8b6cd535SAndroid Build Coastguard Worker curr_property_config.document_indexing_config()
136*8b6cd535SAndroid Build Coastguard Worker .indexable_nested_properties_list()) {
137*8b6cd535SAndroid Build Coastguard Worker // Concat the current property name to each property to get the full
138*8b6cd535SAndroid Build Coastguard Worker // property path expression for each indexable nested property.
139*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.push_back(
140*8b6cd535SAndroid Build Coastguard Worker property_util::ConcatenatePropertyPathExpr(curr_property_path,
141*8b6cd535SAndroid Build Coastguard Worker property));
142*8b6cd535SAndroid Build Coastguard Worker }
143*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_ = 0;
144*8b6cd535SAndroid Build Coastguard Worker // Sort elements and dedupe
145*8b6cd535SAndroid Build Coastguard Worker std::sort(sorted_top_level_indexable_nested_properties_.begin(),
146*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.end());
147*8b6cd535SAndroid Build Coastguard Worker auto last =
148*8b6cd535SAndroid Build Coastguard Worker std::unique(sorted_top_level_indexable_nested_properties_.begin(),
149*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.end());
150*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.erase(
151*8b6cd535SAndroid Build Coastguard Worker last, sorted_top_level_indexable_nested_properties_.end());
152*8b6cd535SAndroid Build Coastguard Worker }
153*8b6cd535SAndroid Build Coastguard Worker
154*8b6cd535SAndroid Build Coastguard Worker bool is_cycle =
155*8b6cd535SAndroid Build Coastguard Worker parent_type_config_names_.find(nested_type_config.schema_type()) !=
156*8b6cd535SAndroid Build Coastguard Worker parent_type_config_names_.end();
157*8b6cd535SAndroid Build Coastguard Worker bool is_parent_property_path =
158*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_ <
159*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.size() &&
160*8b6cd535SAndroid Build Coastguard Worker property_util::IsParentPropertyPath(
161*8b6cd535SAndroid Build Coastguard Worker curr_property_path,
162*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.at(
163*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_));
164*8b6cd535SAndroid Build Coastguard Worker if (is_cycle && !is_parent_property_path) {
165*8b6cd535SAndroid Build Coastguard Worker // Cycle detected. The schema definition is guaranteed to be valid here
166*8b6cd535SAndroid Build Coastguard Worker // since it must have already been validated during SchemaUtil::Validate,
167*8b6cd535SAndroid Build Coastguard Worker // which would have rejected any schema with bad cycles.
168*8b6cd535SAndroid Build Coastguard Worker //
169*8b6cd535SAndroid Build Coastguard Worker // There are no properties in the indexable_nested_properties_list that
170*8b6cd535SAndroid Build Coastguard Worker // are a part of this circular reference.
171*8b6cd535SAndroid Build Coastguard Worker // We do not need to iterate this type further so we simply move on to
172*8b6cd535SAndroid Build Coastguard Worker // other properties in the parent type.
173*8b6cd535SAndroid Build Coastguard Worker continue;
174*8b6cd535SAndroid Build Coastguard Worker }
175*8b6cd535SAndroid Build Coastguard Worker
176*8b6cd535SAndroid Build Coastguard Worker bool all_nested_properties_indexable =
177*8b6cd535SAndroid Build Coastguard Worker levels_.back().GetLevelNestedIndexable() &&
178*8b6cd535SAndroid Build Coastguard Worker curr_property_config.document_indexing_config()
179*8b6cd535SAndroid Build Coastguard Worker .index_nested_properties();
180*8b6cd535SAndroid Build Coastguard Worker levels_.push_back(LevelInfo(nested_type_config,
181*8b6cd535SAndroid Build Coastguard Worker std::move(curr_property_path),
182*8b6cd535SAndroid Build Coastguard Worker all_nested_properties_indexable));
183*8b6cd535SAndroid Build Coastguard Worker parent_type_config_names_.insert(nested_type_config.schema_type());
184*8b6cd535SAndroid Build Coastguard Worker }
185*8b6cd535SAndroid Build Coastguard Worker
186*8b6cd535SAndroid Build Coastguard Worker // Before returning, move all remaining uniterated properties from
187*8b6cd535SAndroid Build Coastguard Worker // sorted_top_level_indexable_nested_properties_ into
188*8b6cd535SAndroid Build Coastguard Worker // unknown_indexable_nested_properties_.
189*8b6cd535SAndroid Build Coastguard Worker std::move(sorted_top_level_indexable_nested_properties_.begin() +
190*8b6cd535SAndroid Build Coastguard Worker current_top_level_indexable_nested_properties_idx_,
191*8b6cd535SAndroid Build Coastguard Worker sorted_top_level_indexable_nested_properties_.end(),
192*8b6cd535SAndroid Build Coastguard Worker std::back_inserter(unknown_indexable_nested_property_paths_));
193*8b6cd535SAndroid Build Coastguard Worker
194*8b6cd535SAndroid Build Coastguard Worker return absl_ports::OutOfRangeError("End of iterator");
195*8b6cd535SAndroid Build Coastguard Worker }
196*8b6cd535SAndroid Build Coastguard Worker
197*8b6cd535SAndroid Build Coastguard Worker } // namespace lib
198*8b6cd535SAndroid Build Coastguard Worker } // namespace icing
199