xref: /aosp_15_r20/external/icing/icing/schema/schema-property-iterator.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/schema/schema-property-iterator.h"
16 
17 #include <algorithm>
18 #include <string>
19 #include <unordered_set>
20 #include <utility>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/proto/schema.pb.h"
27 #include "icing/schema/property-util.h"
28 
29 namespace icing {
30 namespace lib {
31 
Advance()32 libtextclassifier3::Status SchemaPropertyIterator::Advance() {
33   while (!levels_.empty()) {
34     if (!levels_.back().Advance()) {
35       // When finishing iterating all properties of the current level, pop it
36       // from the stack (levels_), return to the previous level and resume the
37       // iteration.
38       parent_type_config_names_.erase(
39           parent_type_config_names_.find(levels_.back().GetSchemaTypeName()));
40       levels_.pop_back();
41       continue;
42     }
43 
44     const PropertyConfigProto& curr_property_config =
45         levels_.back().GetCurrentPropertyConfig();
46     std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
47 
48     // Iterate through the sorted_top_level_indexable_nested_properties_ in
49     // order until we find the first element that is >= curr_property_path.
50     while (current_top_level_indexable_nested_properties_idx_ <
51                sorted_top_level_indexable_nested_properties_.size() &&
52            sorted_top_level_indexable_nested_properties_.at(
53                current_top_level_indexable_nested_properties_idx_) <
54                curr_property_path) {
55       // If an element in sorted_top_level_indexable_nested_properties_ < the
56       // current property path, it means that we've already iterated past the
57       // possible position for it without seeing it.
58       // It's not a valid property path in our schema definition. Add it to
59       // unknown_indexable_nested_properties_ and advance
60       // current_top_level_indexable_nested_properties_idx_.
61       unknown_indexable_nested_property_paths_.push_back(
62           sorted_top_level_indexable_nested_properties_.at(
63               current_top_level_indexable_nested_properties_idx_));
64       ++current_top_level_indexable_nested_properties_idx_;
65     }
66 
67     if (curr_property_config.data_type() !=
68         PropertyConfigProto::DataType::DOCUMENT) {
69       // We've advanced to a leaf property.
70       // Set whether this property is indexable according to its level's
71       // indexable config. If this property is declared in
72       // indexable_nested_properties_list of the top-level schema, it is also
73       // nested indexable.
74       std::string* current_indexable_nested_prop =
75           current_top_level_indexable_nested_properties_idx_ <
76                   sorted_top_level_indexable_nested_properties_.size()
77               ? &sorted_top_level_indexable_nested_properties_.at(
78                     current_top_level_indexable_nested_properties_idx_)
79               : nullptr;
80       if (current_indexable_nested_prop == nullptr ||
81           *current_indexable_nested_prop > curr_property_path) {
82         // Current property is not in the indexable list. Set it as indexable if
83         // its schema level is indexable AND it is an indexable property.
84         bool is_property_indexable =
85             levels_.back().GetLevelNestedIndexable() &&
86             SchemaUtil::IsIndexedProperty(curr_property_config);
87         levels_.back().SetCurrentPropertyIndexable(is_property_indexable);
88       } else if (*current_indexable_nested_prop == curr_property_path) {
89         // Current property is in the indexable list. Set its indexable config
90         // to true. This property will consume a sectionId regardless of whether
91         // or not it is actually indexable.
92         levels_.back().SetCurrentPropertyIndexable(true);
93         ++current_top_level_indexable_nested_properties_idx_;
94       }
95       return libtextclassifier3::Status::OK;
96     }
97 
98     // - When advancing to a TYPE_DOCUMENT property, it means it is a nested
99     //   schema and we need to traverse the next level. Look up SchemaTypeConfig
100     //   (by the schema name) by type_config_map_, and push a new level into
101     //   levels_.
102     // - Each level has to record the index of property it is currently at, so
103     //   we can resume the iteration when returning back to it. Also other
104     //   essential info will be maintained in LevelInfo as well.
105     auto nested_type_config_iter =
106         type_config_map_.find(curr_property_config.schema_type());
107     if (nested_type_config_iter == type_config_map_.end()) {
108       // This should never happen because our schema should already be
109       // validated by this point.
110       return absl_ports::NotFoundError(absl_ports::StrCat(
111           "Type config not found: ", curr_property_config.schema_type()));
112     }
113     const SchemaTypeConfigProto& nested_type_config =
114         nested_type_config_iter->second;
115 
116     if (levels_.back().GetLevelNestedIndexable()) {
117       // We should set sorted_top_level_indexable_nested_properties_ to the list
118       // defined by the current level.
119       // GetLevelNestedIndexable() is true either because:
120       // 1. We're looking at a document property of the top-level schema --
121       //    The first LevelInfo for the iterator is initialized with
122       //    all_nested_properties_indexable_ = true.
123       // 2. All previous levels set index_nested_properties = true:
124       //    This indicates that upper-level schema types want to follow nested
125       //    properties definition of its document subtypes. If this is the first
126       //    subtype level that defines a list, we should set it as
127       //    top_level_indexable_nested_properties_ for the current top-level
128       //    schema.
129       sorted_top_level_indexable_nested_properties_.clear();
130       sorted_top_level_indexable_nested_properties_.reserve(
131           curr_property_config.document_indexing_config()
132               .indexable_nested_properties_list()
133               .size());
134       for (const std::string& property :
135            curr_property_config.document_indexing_config()
136                .indexable_nested_properties_list()) {
137         // Concat the current property name to each property to get the full
138         // property path expression for each indexable nested property.
139         sorted_top_level_indexable_nested_properties_.push_back(
140             property_util::ConcatenatePropertyPathExpr(curr_property_path,
141                                                        property));
142       }
143       current_top_level_indexable_nested_properties_idx_ = 0;
144       // Sort elements and dedupe
145       std::sort(sorted_top_level_indexable_nested_properties_.begin(),
146                 sorted_top_level_indexable_nested_properties_.end());
147       auto last =
148           std::unique(sorted_top_level_indexable_nested_properties_.begin(),
149                       sorted_top_level_indexable_nested_properties_.end());
150       sorted_top_level_indexable_nested_properties_.erase(
151           last, sorted_top_level_indexable_nested_properties_.end());
152     }
153 
154     bool is_cycle =
155         parent_type_config_names_.find(nested_type_config.schema_type()) !=
156         parent_type_config_names_.end();
157     bool is_parent_property_path =
158         current_top_level_indexable_nested_properties_idx_ <
159             sorted_top_level_indexable_nested_properties_.size() &&
160         property_util::IsParentPropertyPath(
161             curr_property_path,
162             sorted_top_level_indexable_nested_properties_.at(
163                 current_top_level_indexable_nested_properties_idx_));
164     if (is_cycle && !is_parent_property_path) {
165       // Cycle detected. The schema definition is guaranteed to be valid here
166       // since it must have already been validated during SchemaUtil::Validate,
167       // which would have rejected any schema with bad cycles.
168       //
169       // There are no properties in the indexable_nested_properties_list that
170       // are a part of this circular reference.
171       // We do not need to iterate this type further so we simply move on to
172       // other properties in the parent type.
173       continue;
174     }
175 
176     bool all_nested_properties_indexable =
177         levels_.back().GetLevelNestedIndexable() &&
178         curr_property_config.document_indexing_config()
179             .index_nested_properties();
180     levels_.push_back(LevelInfo(nested_type_config,
181                                 std::move(curr_property_path),
182                                 all_nested_properties_indexable));
183     parent_type_config_names_.insert(nested_type_config.schema_type());
184   }
185 
186   // Before returning, move all remaining uniterated properties from
187   // sorted_top_level_indexable_nested_properties_ into
188   // unknown_indexable_nested_properties_.
189   std::move(sorted_top_level_indexable_nested_properties_.begin() +
190                 current_top_level_indexable_nested_properties_idx_,
191             sorted_top_level_indexable_nested_properties_.end(),
192             std::back_inserter(unknown_indexable_nested_property_paths_));
193 
194   return absl_ports::OutOfRangeError("End of iterator");
195 }
196 
197 }  // namespace lib
198 }  // namespace icing
199