xref: /aosp_15_r20/external/icing/icing/schema/schema-property-iterator.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2023 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker //      http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker 
15*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/schema-property-iterator.h"
16*8b6cd535SAndroid Build Coastguard Worker 
17*8b6cd535SAndroid Build Coastguard Worker #include <algorithm>
18*8b6cd535SAndroid Build Coastguard Worker #include <string>
19*8b6cd535SAndroid Build Coastguard Worker #include <unordered_set>
20*8b6cd535SAndroid Build Coastguard Worker #include <utility>
21*8b6cd535SAndroid Build Coastguard Worker #include <vector>
22*8b6cd535SAndroid Build Coastguard Worker 
23*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/status.h"
24*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/canonical_errors.h"
25*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/str_cat.h"
26*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/schema.pb.h"
27*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/property-util.h"
28*8b6cd535SAndroid Build Coastguard Worker 
29*8b6cd535SAndroid Build Coastguard Worker namespace icing {
30*8b6cd535SAndroid Build Coastguard Worker namespace lib {
31*8b6cd535SAndroid Build Coastguard Worker 
Advance()32*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status SchemaPropertyIterator::Advance() {
33*8b6cd535SAndroid Build Coastguard Worker   while (!levels_.empty()) {
34*8b6cd535SAndroid Build Coastguard Worker     if (!levels_.back().Advance()) {
35*8b6cd535SAndroid Build Coastguard Worker       // When finishing iterating all properties of the current level, pop it
36*8b6cd535SAndroid Build Coastguard Worker       // from the stack (levels_), return to the previous level and resume the
37*8b6cd535SAndroid Build Coastguard Worker       // iteration.
38*8b6cd535SAndroid Build Coastguard Worker       parent_type_config_names_.erase(
39*8b6cd535SAndroid Build Coastguard Worker           parent_type_config_names_.find(levels_.back().GetSchemaTypeName()));
40*8b6cd535SAndroid Build Coastguard Worker       levels_.pop_back();
41*8b6cd535SAndroid Build Coastguard Worker       continue;
42*8b6cd535SAndroid Build Coastguard Worker     }
43*8b6cd535SAndroid Build Coastguard Worker 
44*8b6cd535SAndroid Build Coastguard Worker     const PropertyConfigProto& curr_property_config =
45*8b6cd535SAndroid Build Coastguard Worker         levels_.back().GetCurrentPropertyConfig();
46*8b6cd535SAndroid Build Coastguard Worker     std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
47*8b6cd535SAndroid Build Coastguard Worker 
48*8b6cd535SAndroid Build Coastguard Worker     // Iterate through the sorted_top_level_indexable_nested_properties_ in
49*8b6cd535SAndroid Build Coastguard Worker     // order until we find the first element that is >= curr_property_path.
50*8b6cd535SAndroid Build Coastguard Worker     while (current_top_level_indexable_nested_properties_idx_ <
51*8b6cd535SAndroid Build Coastguard Worker                sorted_top_level_indexable_nested_properties_.size() &&
52*8b6cd535SAndroid Build Coastguard Worker            sorted_top_level_indexable_nested_properties_.at(
53*8b6cd535SAndroid Build Coastguard Worker                current_top_level_indexable_nested_properties_idx_) <
54*8b6cd535SAndroid Build Coastguard Worker                curr_property_path) {
55*8b6cd535SAndroid Build Coastguard Worker       // If an element in sorted_top_level_indexable_nested_properties_ < the
56*8b6cd535SAndroid Build Coastguard Worker       // current property path, it means that we've already iterated past the
57*8b6cd535SAndroid Build Coastguard Worker       // possible position for it without seeing it.
58*8b6cd535SAndroid Build Coastguard Worker       // It's not a valid property path in our schema definition. Add it to
59*8b6cd535SAndroid Build Coastguard Worker       // unknown_indexable_nested_properties_ and advance
60*8b6cd535SAndroid Build Coastguard Worker       // current_top_level_indexable_nested_properties_idx_.
61*8b6cd535SAndroid Build Coastguard Worker       unknown_indexable_nested_property_paths_.push_back(
62*8b6cd535SAndroid Build Coastguard Worker           sorted_top_level_indexable_nested_properties_.at(
63*8b6cd535SAndroid Build Coastguard Worker               current_top_level_indexable_nested_properties_idx_));
64*8b6cd535SAndroid Build Coastguard Worker       ++current_top_level_indexable_nested_properties_idx_;
65*8b6cd535SAndroid Build Coastguard Worker     }
66*8b6cd535SAndroid Build Coastguard Worker 
67*8b6cd535SAndroid Build Coastguard Worker     if (curr_property_config.data_type() !=
68*8b6cd535SAndroid Build Coastguard Worker         PropertyConfigProto::DataType::DOCUMENT) {
69*8b6cd535SAndroid Build Coastguard Worker       // We've advanced to a leaf property.
70*8b6cd535SAndroid Build Coastguard Worker       // Set whether this property is indexable according to its level's
71*8b6cd535SAndroid Build Coastguard Worker       // indexable config. If this property is declared in
72*8b6cd535SAndroid Build Coastguard Worker       // indexable_nested_properties_list of the top-level schema, it is also
73*8b6cd535SAndroid Build Coastguard Worker       // nested indexable.
74*8b6cd535SAndroid Build Coastguard Worker       std::string* current_indexable_nested_prop =
75*8b6cd535SAndroid Build Coastguard Worker           current_top_level_indexable_nested_properties_idx_ <
76*8b6cd535SAndroid Build Coastguard Worker                   sorted_top_level_indexable_nested_properties_.size()
77*8b6cd535SAndroid Build Coastguard Worker               ? &sorted_top_level_indexable_nested_properties_.at(
78*8b6cd535SAndroid Build Coastguard Worker                     current_top_level_indexable_nested_properties_idx_)
79*8b6cd535SAndroid Build Coastguard Worker               : nullptr;
80*8b6cd535SAndroid Build Coastguard Worker       if (current_indexable_nested_prop == nullptr ||
81*8b6cd535SAndroid Build Coastguard Worker           *current_indexable_nested_prop > curr_property_path) {
82*8b6cd535SAndroid Build Coastguard Worker         // Current property is not in the indexable list. Set it as indexable if
83*8b6cd535SAndroid Build Coastguard Worker         // its schema level is indexable AND it is an indexable property.
84*8b6cd535SAndroid Build Coastguard Worker         bool is_property_indexable =
85*8b6cd535SAndroid Build Coastguard Worker             levels_.back().GetLevelNestedIndexable() &&
86*8b6cd535SAndroid Build Coastguard Worker             SchemaUtil::IsIndexedProperty(curr_property_config);
87*8b6cd535SAndroid Build Coastguard Worker         levels_.back().SetCurrentPropertyIndexable(is_property_indexable);
88*8b6cd535SAndroid Build Coastguard Worker       } else if (*current_indexable_nested_prop == curr_property_path) {
89*8b6cd535SAndroid Build Coastguard Worker         // Current property is in the indexable list. Set its indexable config
90*8b6cd535SAndroid Build Coastguard Worker         // to true. This property will consume a sectionId regardless of whether
91*8b6cd535SAndroid Build Coastguard Worker         // or not it is actually indexable.
92*8b6cd535SAndroid Build Coastguard Worker         levels_.back().SetCurrentPropertyIndexable(true);
93*8b6cd535SAndroid Build Coastguard Worker         ++current_top_level_indexable_nested_properties_idx_;
94*8b6cd535SAndroid Build Coastguard Worker       }
95*8b6cd535SAndroid Build Coastguard Worker       return libtextclassifier3::Status::OK;
96*8b6cd535SAndroid Build Coastguard Worker     }
97*8b6cd535SAndroid Build Coastguard Worker 
98*8b6cd535SAndroid Build Coastguard Worker     // - When advancing to a TYPE_DOCUMENT property, it means it is a nested
99*8b6cd535SAndroid Build Coastguard Worker     //   schema and we need to traverse the next level. Look up SchemaTypeConfig
100*8b6cd535SAndroid Build Coastguard Worker     //   (by the schema name) by type_config_map_, and push a new level into
101*8b6cd535SAndroid Build Coastguard Worker     //   levels_.
102*8b6cd535SAndroid Build Coastguard Worker     // - Each level has to record the index of property it is currently at, so
103*8b6cd535SAndroid Build Coastguard Worker     //   we can resume the iteration when returning back to it. Also other
104*8b6cd535SAndroid Build Coastguard Worker     //   essential info will be maintained in LevelInfo as well.
105*8b6cd535SAndroid Build Coastguard Worker     auto nested_type_config_iter =
106*8b6cd535SAndroid Build Coastguard Worker         type_config_map_.find(curr_property_config.schema_type());
107*8b6cd535SAndroid Build Coastguard Worker     if (nested_type_config_iter == type_config_map_.end()) {
108*8b6cd535SAndroid Build Coastguard Worker       // This should never happen because our schema should already be
109*8b6cd535SAndroid Build Coastguard Worker       // validated by this point.
110*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::NotFoundError(absl_ports::StrCat(
111*8b6cd535SAndroid Build Coastguard Worker           "Type config not found: ", curr_property_config.schema_type()));
112*8b6cd535SAndroid Build Coastguard Worker     }
113*8b6cd535SAndroid Build Coastguard Worker     const SchemaTypeConfigProto& nested_type_config =
114*8b6cd535SAndroid Build Coastguard Worker         nested_type_config_iter->second;
115*8b6cd535SAndroid Build Coastguard Worker 
116*8b6cd535SAndroid Build Coastguard Worker     if (levels_.back().GetLevelNestedIndexable()) {
117*8b6cd535SAndroid Build Coastguard Worker       // We should set sorted_top_level_indexable_nested_properties_ to the list
118*8b6cd535SAndroid Build Coastguard Worker       // defined by the current level.
119*8b6cd535SAndroid Build Coastguard Worker       // GetLevelNestedIndexable() is true either because:
120*8b6cd535SAndroid Build Coastguard Worker       // 1. We're looking at a document property of the top-level schema --
121*8b6cd535SAndroid Build Coastguard Worker       //    The first LevelInfo for the iterator is initialized with
122*8b6cd535SAndroid Build Coastguard Worker       //    all_nested_properties_indexable_ = true.
123*8b6cd535SAndroid Build Coastguard Worker       // 2. All previous levels set index_nested_properties = true:
124*8b6cd535SAndroid Build Coastguard Worker       //    This indicates that upper-level schema types want to follow nested
125*8b6cd535SAndroid Build Coastguard Worker       //    properties definition of its document subtypes. If this is the first
126*8b6cd535SAndroid Build Coastguard Worker       //    subtype level that defines a list, we should set it as
127*8b6cd535SAndroid Build Coastguard Worker       //    top_level_indexable_nested_properties_ for the current top-level
128*8b6cd535SAndroid Build Coastguard Worker       //    schema.
129*8b6cd535SAndroid Build Coastguard Worker       sorted_top_level_indexable_nested_properties_.clear();
130*8b6cd535SAndroid Build Coastguard Worker       sorted_top_level_indexable_nested_properties_.reserve(
131*8b6cd535SAndroid Build Coastguard Worker           curr_property_config.document_indexing_config()
132*8b6cd535SAndroid Build Coastguard Worker               .indexable_nested_properties_list()
133*8b6cd535SAndroid Build Coastguard Worker               .size());
134*8b6cd535SAndroid Build Coastguard Worker       for (const std::string& property :
135*8b6cd535SAndroid Build Coastguard Worker            curr_property_config.document_indexing_config()
136*8b6cd535SAndroid Build Coastguard Worker                .indexable_nested_properties_list()) {
137*8b6cd535SAndroid Build Coastguard Worker         // Concat the current property name to each property to get the full
138*8b6cd535SAndroid Build Coastguard Worker         // property path expression for each indexable nested property.
139*8b6cd535SAndroid Build Coastguard Worker         sorted_top_level_indexable_nested_properties_.push_back(
140*8b6cd535SAndroid Build Coastguard Worker             property_util::ConcatenatePropertyPathExpr(curr_property_path,
141*8b6cd535SAndroid Build Coastguard Worker                                                        property));
142*8b6cd535SAndroid Build Coastguard Worker       }
143*8b6cd535SAndroid Build Coastguard Worker       current_top_level_indexable_nested_properties_idx_ = 0;
144*8b6cd535SAndroid Build Coastguard Worker       // Sort elements and dedupe
145*8b6cd535SAndroid Build Coastguard Worker       std::sort(sorted_top_level_indexable_nested_properties_.begin(),
146*8b6cd535SAndroid Build Coastguard Worker                 sorted_top_level_indexable_nested_properties_.end());
147*8b6cd535SAndroid Build Coastguard Worker       auto last =
148*8b6cd535SAndroid Build Coastguard Worker           std::unique(sorted_top_level_indexable_nested_properties_.begin(),
149*8b6cd535SAndroid Build Coastguard Worker                       sorted_top_level_indexable_nested_properties_.end());
150*8b6cd535SAndroid Build Coastguard Worker       sorted_top_level_indexable_nested_properties_.erase(
151*8b6cd535SAndroid Build Coastguard Worker           last, sorted_top_level_indexable_nested_properties_.end());
152*8b6cd535SAndroid Build Coastguard Worker     }
153*8b6cd535SAndroid Build Coastguard Worker 
154*8b6cd535SAndroid Build Coastguard Worker     bool is_cycle =
155*8b6cd535SAndroid Build Coastguard Worker         parent_type_config_names_.find(nested_type_config.schema_type()) !=
156*8b6cd535SAndroid Build Coastguard Worker         parent_type_config_names_.end();
157*8b6cd535SAndroid Build Coastguard Worker     bool is_parent_property_path =
158*8b6cd535SAndroid Build Coastguard Worker         current_top_level_indexable_nested_properties_idx_ <
159*8b6cd535SAndroid Build Coastguard Worker             sorted_top_level_indexable_nested_properties_.size() &&
160*8b6cd535SAndroid Build Coastguard Worker         property_util::IsParentPropertyPath(
161*8b6cd535SAndroid Build Coastguard Worker             curr_property_path,
162*8b6cd535SAndroid Build Coastguard Worker             sorted_top_level_indexable_nested_properties_.at(
163*8b6cd535SAndroid Build Coastguard Worker                 current_top_level_indexable_nested_properties_idx_));
164*8b6cd535SAndroid Build Coastguard Worker     if (is_cycle && !is_parent_property_path) {
165*8b6cd535SAndroid Build Coastguard Worker       // Cycle detected. The schema definition is guaranteed to be valid here
166*8b6cd535SAndroid Build Coastguard Worker       // since it must have already been validated during SchemaUtil::Validate,
167*8b6cd535SAndroid Build Coastguard Worker       // which would have rejected any schema with bad cycles.
168*8b6cd535SAndroid Build Coastguard Worker       //
169*8b6cd535SAndroid Build Coastguard Worker       // There are no properties in the indexable_nested_properties_list that
170*8b6cd535SAndroid Build Coastguard Worker       // are a part of this circular reference.
171*8b6cd535SAndroid Build Coastguard Worker       // We do not need to iterate this type further so we simply move on to
172*8b6cd535SAndroid Build Coastguard Worker       // other properties in the parent type.
173*8b6cd535SAndroid Build Coastguard Worker       continue;
174*8b6cd535SAndroid Build Coastguard Worker     }
175*8b6cd535SAndroid Build Coastguard Worker 
176*8b6cd535SAndroid Build Coastguard Worker     bool all_nested_properties_indexable =
177*8b6cd535SAndroid Build Coastguard Worker         levels_.back().GetLevelNestedIndexable() &&
178*8b6cd535SAndroid Build Coastguard Worker         curr_property_config.document_indexing_config()
179*8b6cd535SAndroid Build Coastguard Worker             .index_nested_properties();
180*8b6cd535SAndroid Build Coastguard Worker     levels_.push_back(LevelInfo(nested_type_config,
181*8b6cd535SAndroid Build Coastguard Worker                                 std::move(curr_property_path),
182*8b6cd535SAndroid Build Coastguard Worker                                 all_nested_properties_indexable));
183*8b6cd535SAndroid Build Coastguard Worker     parent_type_config_names_.insert(nested_type_config.schema_type());
184*8b6cd535SAndroid Build Coastguard Worker   }
185*8b6cd535SAndroid Build Coastguard Worker 
186*8b6cd535SAndroid Build Coastguard Worker   // Before returning, move all remaining uniterated properties from
187*8b6cd535SAndroid Build Coastguard Worker   // sorted_top_level_indexable_nested_properties_ into
188*8b6cd535SAndroid Build Coastguard Worker   // unknown_indexable_nested_properties_.
189*8b6cd535SAndroid Build Coastguard Worker   std::move(sorted_top_level_indexable_nested_properties_.begin() +
190*8b6cd535SAndroid Build Coastguard Worker                 current_top_level_indexable_nested_properties_idx_,
191*8b6cd535SAndroid Build Coastguard Worker             sorted_top_level_indexable_nested_properties_.end(),
192*8b6cd535SAndroid Build Coastguard Worker             std::back_inserter(unknown_indexable_nested_property_paths_));
193*8b6cd535SAndroid Build Coastguard Worker 
194*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::OutOfRangeError("End of iterator");
195*8b6cd535SAndroid Build Coastguard Worker }
196*8b6cd535SAndroid Build Coastguard Worker 
197*8b6cd535SAndroid Build Coastguard Worker }  // namespace lib
198*8b6cd535SAndroid Build Coastguard Worker }  // namespace icing
199