1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/schema/schema-util.h"
16
17 #include <algorithm>
18 #include <cctype>
19 #include <queue>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <utility>
25 #include <vector>
26
27 #include "icing/text_classifier/lib3/utils/base/status.h"
28 #include "icing/text_classifier/lib3/utils/base/statusor.h"
29 #include "icing/absl_ports/annotate.h"
30 #include "icing/absl_ports/canonical_errors.h"
31 #include "icing/absl_ports/str_cat.h"
32 #include "icing/absl_ports/str_join.h"
33 #include "icing/feature-flags.h"
34 #include "icing/proto/schema.pb.h"
35 #include "icing/proto/term.pb.h"
36 #include "icing/util/logging.h"
37 #include "icing/util/status-macros.h"
38
39 namespace icing {
40 namespace lib {
41
42 namespace {
43
AreStringIndexingConfigsEqual(const StringIndexingConfig & old_config,const StringIndexingConfig & new_config)44 bool AreStringIndexingConfigsEqual(const StringIndexingConfig& old_config,
45 const StringIndexingConfig& new_config) {
46 return old_config.term_match_type() == new_config.term_match_type() &&
47 old_config.tokenizer_type() == new_config.tokenizer_type();
48 }
49
AreDocumentIndexingConfigsEqual(const DocumentIndexingConfig & old_config,const DocumentIndexingConfig & new_config)50 bool AreDocumentIndexingConfigsEqual(const DocumentIndexingConfig& old_config,
51 const DocumentIndexingConfig& new_config) {
52 return old_config.index_nested_properties() ==
53 new_config.index_nested_properties();
54 }
55
AreIntegerIndexingConfigsEqual(const IntegerIndexingConfig & old_config,const IntegerIndexingConfig & new_config)56 bool AreIntegerIndexingConfigsEqual(const IntegerIndexingConfig& old_config,
57 const IntegerIndexingConfig& new_config) {
58 return old_config.numeric_match_type() == new_config.numeric_match_type();
59 }
60
AreJoinableConfigsEqual(const JoinableConfig & old_config,const JoinableConfig & new_config)61 bool AreJoinableConfigsEqual(const JoinableConfig& old_config,
62 const JoinableConfig& new_config) {
63 return old_config.value_type() == new_config.value_type() &&
64 old_config.delete_propagation_type() ==
65 new_config.delete_propagation_type();
66 }
67
AreEmbeddingIndexingConfigsEqual(const EmbeddingIndexingConfig & old_config,const EmbeddingIndexingConfig & new_config)68 bool AreEmbeddingIndexingConfigsEqual(
69 const EmbeddingIndexingConfig& old_config,
70 const EmbeddingIndexingConfig& new_config) {
71 return old_config.embedding_indexing_type() ==
72 new_config.embedding_indexing_type() &&
73 old_config.quantization_type() == new_config.quantization_type();
74 }
75
ArePropertiesEqual(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)76 bool ArePropertiesEqual(const PropertyConfigProto& old_property,
77 const PropertyConfigProto& new_property) {
78 return old_property.property_name() == new_property.property_name() &&
79 old_property.data_type() == new_property.data_type() &&
80 old_property.schema_type() == new_property.schema_type() &&
81 old_property.cardinality() == new_property.cardinality() &&
82 old_property.scorable_type() == new_property.scorable_type() &&
83 AreStringIndexingConfigsEqual(old_property.string_indexing_config(),
84 new_property.string_indexing_config()) &&
85 AreDocumentIndexingConfigsEqual(
86 old_property.document_indexing_config(),
87 new_property.document_indexing_config()) &&
88 AreIntegerIndexingConfigsEqual(
89 old_property.integer_indexing_config(),
90 new_property.integer_indexing_config()) &&
91 AreJoinableConfigsEqual(old_property.joinable_config(),
92 new_property.joinable_config()) &&
93 AreEmbeddingIndexingConfigsEqual(
94 old_property.embedding_indexing_config(),
95 new_property.embedding_indexing_config());
96 }
97
IsCardinalityCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)98 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
99 const PropertyConfigProto& new_property) {
100 if (old_property.cardinality() < new_property.cardinality()) {
101 // We allow a new, less restrictive cardinality (i.e. a REQUIRED field
102 // can become REPEATED or OPTIONAL, but not the other way around).
103 ICING_VLOG(1) << absl_ports::StrCat(
104 "Cardinality is more restrictive than before ",
105 PropertyConfigProto::Cardinality::Code_Name(old_property.cardinality()),
106 "->",
107 PropertyConfigProto::Cardinality::Code_Name(
108 new_property.cardinality()));
109 return false;
110 }
111 return true;
112 }
113
IsDataTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)114 bool IsDataTypeCompatible(const PropertyConfigProto& old_property,
115 const PropertyConfigProto& new_property) {
116 if (old_property.data_type() != new_property.data_type()) {
117 // TODO(cassiewang): Maybe we can be a bit looser with this, e.g. we just
118 // string cast an int64_t to a string. But for now, we'll stick with
119 // simplistics.
120 ICING_VLOG(1) << absl_ports::StrCat(
121 "Data type ",
122 PropertyConfigProto::DataType::Code_Name(old_property.data_type()),
123 "->",
124 PropertyConfigProto::DataType::Code_Name(new_property.data_type()));
125 return false;
126 }
127 return true;
128 }
129
IsSchemaTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)130 bool IsSchemaTypeCompatible(const PropertyConfigProto& old_property,
131 const PropertyConfigProto& new_property) {
132 if (old_property.schema_type() != new_property.schema_type()) {
133 ICING_VLOG(1) << absl_ports::StrCat("Schema type ",
134 old_property.schema_type(), "->",
135 new_property.schema_type());
136 return false;
137 }
138 return true;
139 }
140
IsPropertyCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)141 bool IsPropertyCompatible(const PropertyConfigProto& old_property,
142 const PropertyConfigProto& new_property) {
143 return IsDataTypeCompatible(old_property, new_property) &&
144 IsSchemaTypeCompatible(old_property, new_property) &&
145 IsCardinalityCompatible(old_property, new_property);
146 }
147
IsTermMatchTypeCompatible(const StringIndexingConfig & old_indexed,const StringIndexingConfig & new_indexed)148 bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
149 const StringIndexingConfig& new_indexed) {
150 return old_indexed.term_match_type() == new_indexed.term_match_type() &&
151 old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
152 }
153
IsIntegerNumericMatchTypeCompatible(const IntegerIndexingConfig & old_indexed,const IntegerIndexingConfig & new_indexed)154 bool IsIntegerNumericMatchTypeCompatible(
155 const IntegerIndexingConfig& old_indexed,
156 const IntegerIndexingConfig& new_indexed) {
157 return old_indexed.numeric_match_type() == new_indexed.numeric_match_type();
158 }
159
IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig & old_indexed,const EmbeddingIndexingConfig & new_indexed)160 bool IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig& old_indexed,
161 const EmbeddingIndexingConfig& new_indexed) {
162 return old_indexed.embedding_indexing_type() ==
163 new_indexed.embedding_indexing_type() &&
164 old_indexed.quantization_type() == new_indexed.quantization_type();
165 }
166
IsDocumentIndexingCompatible(const DocumentIndexingConfig & old_indexed,const DocumentIndexingConfig & new_indexed)167 bool IsDocumentIndexingCompatible(const DocumentIndexingConfig& old_indexed,
168 const DocumentIndexingConfig& new_indexed) {
169 // TODO(b/265304217): This could mark the new schema as incompatible and
170 // generate some unnecessary index rebuilds if the two schemas have an
171 // equivalent set of indexed properties, but changed the way that it is
172 // declared.
173 if (old_indexed.index_nested_properties() !=
174 new_indexed.index_nested_properties()) {
175 return false;
176 }
177
178 if (old_indexed.indexable_nested_properties_list().size() !=
179 new_indexed.indexable_nested_properties_list().size()) {
180 return false;
181 }
182
183 std::unordered_set<std::string_view> old_indexable_nested_properies_set(
184 old_indexed.indexable_nested_properties_list().begin(),
185 old_indexed.indexable_nested_properties_list().end());
186 for (const auto& property : new_indexed.indexable_nested_properties_list()) {
187 if (old_indexable_nested_properies_set.find(property) ==
188 old_indexable_nested_properies_set.end()) {
189 return false;
190 }
191 }
192 return true;
193 }
194
AddIncompatibleChangeToDelta(std::unordered_set<std::string> & incompatible_delta,const SchemaTypeConfigProto & old_type_config,const SchemaUtil::DependentMap & new_schema_dependent_map,const SchemaUtil::TypeConfigMap & old_type_config_map,const SchemaUtil::TypeConfigMap & new_type_config_map)195 void AddIncompatibleChangeToDelta(
196 std::unordered_set<std::string>& incompatible_delta,
197 const SchemaTypeConfigProto& old_type_config,
198 const SchemaUtil::DependentMap& new_schema_dependent_map,
199 const SchemaUtil::TypeConfigMap& old_type_config_map,
200 const SchemaUtil::TypeConfigMap& new_type_config_map) {
201 // If this type is incompatible, then every type that depends on it might
202 // also be incompatible. Use the dependent map to mark those ones as
203 // incompatible too.
204 incompatible_delta.insert(old_type_config.schema_type());
205 auto dependent_types_itr =
206 new_schema_dependent_map.find(old_type_config.schema_type());
207 if (dependent_types_itr != new_schema_dependent_map.end()) {
208 for (const auto& [dependent_type, _] : dependent_types_itr->second) {
209 // The types from new_schema that depend on the current
210 // old_type_config may not present in old_schema.
211 // Those types will be listed at schema_delta.schema_types_new
212 // instead.
213 std::string dependent_type_str(dependent_type);
214 if (old_type_config_map.find(dependent_type_str) !=
215 old_type_config_map.end()) {
216 incompatible_delta.insert(std::move(dependent_type_str));
217 }
218 }
219 }
220 }
221
222 // Returns if C1 <= C2 based on the following rule, where C1 and C2 are
223 // cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED.
224 //
225 // Rule: REQUIRED < OPTIONAL < REPEATED
CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,PropertyConfigProto::Cardinality::Code C2)226 bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
227 PropertyConfigProto::Cardinality::Code C2) {
228 if (C1 == C2) {
229 return true;
230 }
231 if (C1 == PropertyConfigProto::Cardinality::REQUIRED) {
232 return C2 == PropertyConfigProto::Cardinality::OPTIONAL ||
233 C2 == PropertyConfigProto::Cardinality::REPEATED;
234 }
235 if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) {
236 return C2 == PropertyConfigProto::Cardinality::REPEATED;
237 }
238 return false;
239 }
240
241 // Check if set1 is a subset of set2.
242 template <typename T>
IsSubset(const std::unordered_set<T> & set1,const std::unordered_set<T> & set2)243 bool IsSubset(const std::unordered_set<T>& set1,
244 const std::unordered_set<T>& set2) {
245 for (const auto& item : set1) {
246 if (set2.find(item) == set2.end()) {
247 return false;
248 }
249 }
250 return true;
251 }
252
253 // Builds a map of {schema_type -> set of scorable property names}
254 std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
BuildTypeToScorablePropertyNamesMap(const SchemaUtil::TypeConfigMap & type_config_map)255 BuildTypeToScorablePropertyNamesMap(
256 const SchemaUtil::TypeConfigMap& type_config_map) {
257 std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
258 type_to_scorable_property_names_map;
259 for (const auto& [schema_type, schema_type_config] : type_config_map) {
260 for (const PropertyConfigProto& property_config :
261 schema_type_config.properties()) {
262 if (property_config.scorable_type() ==
263 PropertyConfigProto::ScorableType::ENABLED) {
264 type_to_scorable_property_names_map[schema_type].insert(
265 property_config.property_name());
266 }
267 }
268 }
269 return type_to_scorable_property_names_map;
270 }
271
272 // Finds the schema types that have inconsistent scorable properties, which will
273 // be added in place in the `schema_delta`.
FindScorablePropertyInconsistentTypes(const SchemaUtil::TypeConfigMap & old_type_config_map,const SchemaUtil::TypeConfigMap & new_type_config_map,const SchemaUtil::DependentMap & new_schema_dependent_map,SchemaUtil::SchemaDelta * schema_delta)274 void FindScorablePropertyInconsistentTypes(
275 const SchemaUtil::TypeConfigMap& old_type_config_map,
276 const SchemaUtil::TypeConfigMap& new_type_config_map,
277 const SchemaUtil::DependentMap& new_schema_dependent_map,
278 SchemaUtil::SchemaDelta* schema_delta) {
279 std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
280 new_type_to_scorable_property_names_map =
281 BuildTypeToScorablePropertyNamesMap(new_type_config_map);
282 std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
283 old_type_to_scorable_property_names_map =
284 BuildTypeToScorablePropertyNamesMap(old_type_config_map);
285 for (const auto& [schema_type, _] : old_type_config_map) {
286 if (new_type_config_map.find(schema_type) == new_type_config_map.end()) {
287 // The type has been deleted in the new schema.
288 continue;
289 }
290 auto old_schema_type_property_names_iter =
291 old_type_to_scorable_property_names_map.find(schema_type);
292 auto new_schema_type_property_names_iter =
293 new_type_to_scorable_property_names_map.find(schema_type);
294 bool has_scorable_properties_in_old_schema =
295 old_schema_type_property_names_iter !=
296 old_type_to_scorable_property_names_map.end();
297 bool has_scorable_properties_in_new_schema =
298 new_schema_type_property_names_iter !=
299 new_type_to_scorable_property_names_map.end();
300 if (has_scorable_properties_in_old_schema &&
301 !has_scorable_properties_in_new_schema) {
302 schema_delta->schema_types_scorable_property_inconsistent.insert(
303 schema_type);
304 } else if (!has_scorable_properties_in_old_schema &&
305 has_scorable_properties_in_new_schema) {
306 schema_delta->schema_types_scorable_property_inconsistent.insert(
307 schema_type);
308 } else if (has_scorable_properties_in_old_schema &&
309 has_scorable_properties_in_new_schema) {
310 // The sets of scorable properties from the old and new schema are
311 // different.
312 if (old_schema_type_property_names_iter->second !=
313 new_schema_type_property_names_iter->second) {
314 schema_delta->schema_types_scorable_property_inconsistent.insert(
315 schema_type);
316 }
317 }
318 }
319
320 // Now, look up the DependentMap of the new schema config and find the parent
321 // types that depend on the currently discovered inconsistent types.
322 std::vector<std::string_view> parent_types;
323 for (const std::string& schema_type :
324 schema_delta->schema_types_scorable_property_inconsistent) {
325 auto parent_type_maps_iter = new_schema_dependent_map.find(schema_type);
326 if (parent_type_maps_iter == new_schema_dependent_map.end()) {
327 continue;
328 }
329 for (const auto& [parent_type, _] : parent_type_maps_iter->second) {
330 parent_types.push_back(parent_type);
331 }
332 }
333 schema_delta->schema_types_scorable_property_inconsistent.insert(
334 parent_types.begin(), parent_types.end());
335 }
336
337 } // namespace
338
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_types_map,const std::unordered_set<std::string_view> & joinable_types,std::string_view type,bool path_contains_joinable_property,SchemaUtil::DependentMap * expanded_nested_types_map,std::unordered_map<std::string_view,bool> && pending_expansion_paths_indexable,std::unordered_set<std::string_view> * sink_types)339 libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
340 const SchemaUtil::DependentMap& direct_nested_types_map,
341 const std::unordered_set<std::string_view>& joinable_types,
342 std::string_view type, bool path_contains_joinable_property,
343 SchemaUtil::DependentMap* expanded_nested_types_map,
344 std::unordered_map<std::string_view, bool>&&
345 pending_expansion_paths_indexable,
346 std::unordered_set<std::string_view>* sink_types) {
347 // TODO(b/280698121): Implement optimizations to this code to avoid reentering
348 // a node after it's already been expanded.
349
350 auto itr = direct_nested_types_map.find(type);
351 if (itr == direct_nested_types_map.end()) {
352 // It's a sink node. Just return.
353 sink_types->insert(type);
354 return libtextclassifier3::Status::OK;
355 }
356 std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
357 expanded_relations;
358
359 // Add all of the adjacent outgoing relations.
360 expanded_relations.reserve(itr->second.size());
361 expanded_relations.insert(itr->second.begin(), itr->second.end());
362
363 // Iterate through each adjacent outgoing relation and add their indirect
364 // outgoing relations.
365 for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) {
366 // Make a copy of pending_expansion_paths_indexable for every iteration.
367 std::unordered_map<std::string_view, bool> pending_expansion_paths_copy(
368 pending_expansion_paths_indexable);
369
370 // 1. Check the nested indexable config of the edge (type -> adjacent_type),
371 // and the joinable config of the current path up to adjacent_type.
372 //
373 // The nested indexable config is true if any of the PropertyConfigProtos
374 // representing the connecting edge has index_nested_properties=true.
375 bool is_edge_nested_indexable = std::any_of(
376 adjacent_property_protos.begin(), adjacent_property_protos.end(),
377 [](const PropertyConfigProto* property_config) {
378 return property_config->document_indexing_config()
379 .index_nested_properties();
380 });
381 // TODO(b/265304217): change this once we add joinable_properties_list.
382 // Check if addition of the new edge (type->adjacent_type) makes the path
383 // joinable.
384 bool new_path_contains_joinable_property =
385 joinable_types.count(type) > 0 || path_contains_joinable_property;
386 // Set is_nested_indexable field for the current edge
387 pending_expansion_paths_copy[type] = is_edge_nested_indexable;
388
389 // If is_edge_nested_indexable=false, then all paths to adjacent_type
390 // currently in the pending_expansions map are also not nested indexable.
391 if (!is_edge_nested_indexable) {
392 for (auto& pending_expansion : pending_expansion_paths_copy) {
393 pending_expansion.second = false;
394 }
395 }
396
397 // 2. Check if we're in the middle of expanding this type - IOW
398 // there's a cycle!
399 //
400 // This cycle is not allowed if either:
401 // 1. The cycle starting at adjacent_type is nested indexable, OR
402 // 2. The current path contains a joinable property.
403 auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type);
404 if (adjacent_itr != pending_expansion_paths_copy.end()) {
405 if (adjacent_itr->second || new_path_contains_joinable_property) {
406 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
407 "Invalid cycle detected in type configs. '", type,
408 "' references itself and is nested-indexable or nested-joinable."));
409 }
410 // The cycle is allowed and there's no need to keep iterating the loop.
411 // Move on to the next adjacent value.
412 continue;
413 }
414
415 // 3. Expand this type as needed.
416 ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
417 direct_nested_types_map, joinable_types, adjacent_type,
418 new_path_contains_joinable_property, expanded_nested_types_map,
419 std::move(pending_expansion_paths_copy), sink_types));
420 if (sink_types->count(adjacent_type) > 0) {
421 // "adjacent" is a sink node. Just skip to the next.
422 continue;
423 }
424
425 // 4. "adjacent" has been fully expanded. Add all of its transitive
426 // outgoing relations to this type's transitive outgoing relations.
427 auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
428 for (const auto& [transitive_reachable, _] :
429 adjacent_expanded_itr->second) {
430 // Insert a transitive reachable node `transitive_reachable` for `type` if
431 // it wasn't previously reachable.
432 // Since there is no direct edge between `type` and `transitive_reachable`
433 // we insert an empty vector into the dependent map.
434 expanded_relations.insert({transitive_reachable, {}});
435 }
436 }
437 for (const auto& kvp : expanded_relations) {
438 expanded_nested_types_map->operator[](type).insert(kvp);
439 }
440 return libtextclassifier3::Status::OK;
441 }
442
443 template <typename T>
CalculateAcyclicTransitiveRelations(const SchemaUtil::TypeRelationMap<T> & direct_relation_map,std::string_view type,SchemaUtil::TypeRelationMap<T> * expanded_relation_map,std::unordered_set<std::string_view> * pending_expansions,std::unordered_set<std::string_view> * sink_types)444 libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
445 const SchemaUtil::TypeRelationMap<T>& direct_relation_map,
446 std::string_view type,
447 SchemaUtil::TypeRelationMap<T>* expanded_relation_map,
448 std::unordered_set<std::string_view>* pending_expansions,
449 std::unordered_set<std::string_view>* sink_types) {
450 auto expanded_itr = expanded_relation_map->find(type);
451 if (expanded_itr != expanded_relation_map->end()) {
452 // We've already expanded this type. Just return.
453 return libtextclassifier3::Status::OK;
454 }
455 auto itr = direct_relation_map.find(type);
456 if (itr == direct_relation_map.end()) {
457 // It's a sink node. Just return.
458 sink_types->insert(type);
459 return libtextclassifier3::Status::OK;
460 }
461 pending_expansions->insert(type);
462 std::unordered_map<std::string_view, T> expanded_relations;
463
464 // Add all of the adjacent outgoing relations.
465 expanded_relations.reserve(itr->second.size());
466 expanded_relations.insert(itr->second.begin(), itr->second.end());
467
468 // Iterate through each adjacent outgoing relation and add their indirect
469 // outgoing relations.
470 for (const auto& [adjacent, _] : itr->second) {
471 // 1. Check if we're in the middle of expanding this type - IOW there's a
472 // cycle!
473 if (pending_expansions->count(adjacent) > 0) {
474 return absl_ports::InvalidArgumentError(
475 absl_ports::StrCat("Invalid cycle detected in type configs. '", type,
476 "' references or inherits from itself."));
477 }
478
479 // 2. Expand this type as needed.
480 ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
481 direct_relation_map, adjacent, expanded_relation_map,
482 pending_expansions, sink_types));
483 if (sink_types->count(adjacent) > 0) {
484 // "adjacent" is a sink node. Just skip to the next.
485 continue;
486 }
487
488 // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
489 // relations to this type's transitive outgoing relations.
490 auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
491 for (const auto& [transitive_reachable, _] :
492 adjacent_expanded_itr->second) {
493 // Insert a transitive reachable node `transitive_reachable` for `type`.
494 // Also since there is no direct edge between `type` and
495 // `transitive_reachable`, the direct edge is initialized by default.
496 expanded_relations.insert({transitive_reachable, T()});
497 }
498 }
499 expanded_relation_map->insert({type, std::move(expanded_relations)});
500 pending_expansions->erase(type);
501 return libtextclassifier3::Status::OK;
502 }
503
504 // Calculate and return the expanded nested-type map from
505 // direct_nested_type_map. This expands the direct_nested_type_map to also
506 // include indirect nested-type relations.
507 //
508 // Ex. Suppose we have the following relations in direct_nested_type_map.
509 //
510 // C -> B (Schema type B has a document property of type C)
511 // B -> A (Schema type A has a document property of type B)
512 //
513 // Then, this function would expand the map by adding C -> A to the map.
514 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_type_map,const std::unordered_set<std::string_view> & joinable_types,bool allow_circular_schema_definitions)515 CalculateTransitiveNestedTypeRelations(
516 const SchemaUtil::DependentMap& direct_nested_type_map,
517 const std::unordered_set<std::string_view>& joinable_types,
518 bool allow_circular_schema_definitions) {
519 SchemaUtil::DependentMap expanded_nested_type_map;
520 // Types that have no outgoing relations.
521 std::unordered_set<std::string_view> sink_types;
522
523 if (allow_circular_schema_definitions) {
524 // Map of nodes that are pending expansion -> whether the path from each key
525 // node to the 'current' node is nested_indexable.
526 // A copy of this map is made for each new node that we expand.
527 std::unordered_map<std::string_view, bool>
528 pending_expansion_paths_indexable;
529 for (const auto& kvp : direct_nested_type_map) {
530 ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
531 direct_nested_type_map, joinable_types, kvp.first,
532 /*path_contains_joinable_property=*/false, &expanded_nested_type_map,
533 std::unordered_map<std::string_view, bool>(
534 pending_expansion_paths_indexable),
535 &sink_types));
536 }
537 } else {
538 // If allow_circular_schema_definitions is false, then fallback to the old
539 // way of detecting cycles.
540 // Types that we are expanding.
541 std::unordered_set<std::string_view> pending_expansions;
542 for (const auto& kvp : direct_nested_type_map) {
543 ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
544 direct_nested_type_map, kvp.first, &expanded_nested_type_map,
545 &pending_expansions, &sink_types));
546 }
547 }
548 return expanded_nested_type_map;
549 }
550
551 // Calculate and return the expanded inheritance map from
552 // direct_nested_type_map. This expands the direct_inheritance_map to also
553 // include indirect inheritance relations.
554 //
555 // Ex. Suppose we have the following relations in direct_inheritance_map.
556 //
557 // C -> B (Schema type C is B's parent_type )
558 // B -> A (Schema type B is A's parent_type)
559 //
560 // Then, this function would expand the map by adding C -> A to the map.
561 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
CalculateTransitiveInheritanceRelations(const SchemaUtil::InheritanceMap & direct_inheritance_map)562 CalculateTransitiveInheritanceRelations(
563 const SchemaUtil::InheritanceMap& direct_inheritance_map) {
564 SchemaUtil::InheritanceMap expanded_inheritance_map;
565
566 // Types that we are expanding.
567 std::unordered_set<std::string_view> pending_expansions;
568
569 // Types that have no outgoing relation.
570 std::unordered_set<std::string_view> sink_types;
571 for (const auto& kvp : direct_inheritance_map) {
572 ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
573 direct_inheritance_map, kvp.first, &expanded_inheritance_map,
574 &pending_expansions, &sink_types));
575 }
576 return expanded_inheritance_map;
577 }
578
579 // Builds a transitive dependent map. Types with no dependents will not be
580 // present in the map as keys.
581 //
582 // Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
583 // type B and B has a property of type C. C and D only have non-document
584 // properties.
585 //
586 // The transitive dependent map for this schema would be:
587 // C -> A, B (both A and B depend on C)
588 // B -> A (A depends on B)
589 //
590 // A and D will not be present in the map as keys because no type depends on
591 // them.
592 //
593 // RETURNS:
594 // On success, a transitive dependent map of all types in the schema.
595 // INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
596 // ALREADY_EXISTS if a schema type is specified more than once in the schema
597 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
BuildTransitiveDependentGraph(const SchemaProto & schema,bool allow_circular_schema_definitions)598 BuildTransitiveDependentGraph(const SchemaProto& schema,
599 bool allow_circular_schema_definitions) {
600 // We expand the nested-type dependent map and inheritance map differently
601 // when calculating transitive relations. These two types of relations also
602 // should not be transitive so we keep these as separate maps.
603 //
604 // e.g. For schema type A, B and C, B depends on A through inheritance, and
605 // C depends on B by having a property with type B, we will have the two
606 // relations {A, B} and {B, C} in the dependent map, but will not have {A, C}
607 // in the map.
608 SchemaUtil::DependentMap direct_nested_type_map;
609 SchemaUtil::InheritanceMap direct_inheritance_map;
610
611 // Set of schema types that have at least one joinable property.
612 std::unordered_set<std::string_view> joinable_types;
613
614 // Add all first-order dependents.
615 std::unordered_set<std::string_view> known_types;
616 std::unordered_set<std::string_view> unknown_types;
617 for (const auto& type_config : schema.types()) {
618 std::string_view schema_type(type_config.schema_type());
619 if (known_types.count(schema_type) > 0) {
620 return absl_ports::AlreadyExistsError(absl_ports::StrCat(
621 "Field 'schema_type' '", schema_type, "' is already defined"));
622 }
623 known_types.insert(schema_type);
624 unknown_types.erase(schema_type);
625 // Insert inheritance relations into the inheritance map.
626 for (std::string_view parent_schema_type : type_config.parent_types()) {
627 if (known_types.count(parent_schema_type) == 0) {
628 unknown_types.insert(parent_schema_type);
629 }
630 direct_inheritance_map[parent_schema_type][schema_type] = true;
631 }
632 for (const auto& property_config : type_config.properties()) {
633 if (property_config.joinable_config().value_type() !=
634 JoinableConfig::ValueType::NONE) {
635 joinable_types.insert(schema_type);
636 }
637 // Insert nested-type relations into the nested-type map.
638 if (property_config.data_type() ==
639 PropertyConfigProto::DataType::DOCUMENT) {
640 // Need to know what schema_type these Document properties should be
641 // validated against
642 std::string_view property_schema_type(property_config.schema_type());
643 if (known_types.count(property_schema_type) == 0) {
644 unknown_types.insert(property_schema_type);
645 }
646 direct_nested_type_map[property_schema_type][schema_type].push_back(
647 &property_config);
648 }
649 }
650 }
651 if (!unknown_types.empty()) {
652 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
653 "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
654 }
655
656 // Merge two expanded maps into a single dependent_map, without making
657 // inheritance and nested-type relations transitive.
658 ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map,
659 CalculateTransitiveNestedTypeRelations(
660 direct_nested_type_map, joinable_types,
661 allow_circular_schema_definitions));
662 ICING_ASSIGN_OR_RETURN(
663 SchemaUtil::InheritanceMap expanded_inheritance_map,
664 CalculateTransitiveInheritanceRelations(direct_inheritance_map));
665 for (const auto& [parent_type, inheritance_relation] :
666 expanded_inheritance_map) {
667 // Insert the parent_type into the dependent map if it is not present
668 // already.
669 merged_dependent_map.insert({parent_type, {}});
670 for (const auto& [child_type, _] : inheritance_relation) {
671 // Insert the child_type into parent_type's dependent map if it's not
672 // present already, in which case the value will be an empty vector.
673 merged_dependent_map[parent_type].insert({child_type, {}});
674 }
675 }
676 return merged_dependent_map;
677 }
678
679 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
BuildTransitiveInheritanceGraph(const SchemaProto & schema)680 SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) {
681 SchemaUtil::InheritanceMap direct_inheritance_map;
682 for (const auto& type_config : schema.types()) {
683 for (std::string_view parent_schema_type : type_config.parent_types()) {
684 direct_inheritance_map[parent_schema_type][type_config.schema_type()] =
685 true;
686 }
687 }
688 return CalculateTransitiveInheritanceRelations(direct_inheritance_map);
689 }
690
Validate(const SchemaProto & schema,const FeatureFlags & feature_flags,bool allow_circular_schema_definitions)691 libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
692 const SchemaProto& schema, const FeatureFlags& feature_flags,
693 bool allow_circular_schema_definitions) {
694 // 1. Build the dependent map. This will detect any cycles, non-existent or
695 // duplicate types in the schema.
696 ICING_ASSIGN_OR_RETURN(
697 SchemaUtil::DependentMap dependent_map,
698 BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions));
699
700 // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
701 // already.
702 std::unordered_set<std::string_view> known_property_names;
703
704 // Tracks PropertyConfigs containing joinable properties.
705 std::unordered_set<std::string_view> schema_types_with_joinable_property;
706
707 // 2. Validate the properties of each type.
708 for (const auto& type_config : schema.types()) {
709 std::string_view schema_type(type_config.schema_type());
710 ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
711
712 // We only care about properties being unique within one type_config
713 known_property_names.clear();
714
715 for (const auto& property_config : type_config.properties()) {
716 std::string_view property_name(property_config.property_name());
717 ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
718
719 // Property names must be unique
720 if (!known_property_names.insert(property_name).second) {
721 return absl_ports::AlreadyExistsError(absl_ports::StrCat(
722 "Field 'property_name' '", property_name,
723 "' is already defined for schema '", schema_type, "'"));
724 }
725
726 auto data_type = property_config.data_type();
727 ICING_RETURN_IF_ERROR(
728 ValidateDataType(data_type, schema_type, property_name));
729
730 if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
731 // Need to know what schema_type these Document properties should be
732 // validated against
733 std::string_view property_schema_type(property_config.schema_type());
734 libtextclassifier3::Status validated_status =
735 ValidateSchemaType(property_schema_type);
736 if (!validated_status.ok()) {
737 return absl_ports::Annotate(
738 validated_status,
739 absl_ports::StrCat("Field 'schema_type' is required for DOCUMENT "
740 "data_types in schema property '",
741 schema_type, ".", property_name, "'"));
742 }
743
744 ICING_RETURN_IF_ERROR(ValidateDocumentIndexingConfig(
745 property_config.document_indexing_config(), schema_type,
746 property_name));
747 }
748
749 ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
750 schema_type, property_name));
751 if (feature_flags.enable_scorable_properties()) {
752 ICING_RETURN_IF_ERROR(
753 ValidateScorableType(schema_type, property_config));
754 }
755
756 if (data_type == PropertyConfigProto::DataType::STRING) {
757 ICING_RETURN_IF_ERROR(ValidateStringIndexingConfig(
758 property_config.string_indexing_config(), data_type, schema_type,
759 property_name));
760 }
761
762 ICING_RETURN_IF_ERROR(
763 ValidateJoinableConfig(property_config.joinable_config(), data_type,
764 property_config.cardinality(), schema_type,
765 property_name, feature_flags));
766 if (property_config.joinable_config().value_type() !=
767 JoinableConfig::ValueType::NONE) {
768 schema_types_with_joinable_property.insert(schema_type);
769 }
770 }
771 }
772
773 // BFS traverse the dependent graph to make sure that no nested levels
774 // (properties with DOCUMENT data type) have REPEATED cardinality while
775 // depending on schema types with joinable property.
776 std::queue<std::string_view> frontier;
777 for (const auto& schema_type : schema_types_with_joinable_property) {
778 frontier.push(schema_type);
779 }
780 std::unordered_set<std::string_view> traversed =
781 std::move(schema_types_with_joinable_property);
782 while (!frontier.empty()) {
783 std::string_view schema_type = frontier.front();
784 frontier.pop();
785
786 const auto it = dependent_map.find(schema_type);
787 if (it == dependent_map.end()) {
788 continue;
789 }
790
791 // Check every type that has a property of type schema_type.
792 for (const auto& [next_schema_type, property_configs] : it->second) {
793 // Check all properties in "next_schema_type" that are of type
794 // "schema_type".
795 for (const PropertyConfigProto* property_config : property_configs) {
796 if (property_config != nullptr &&
797 property_config->cardinality() ==
798 PropertyConfigProto::Cardinality::REPEATED) {
799 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
800 "Schema type '", next_schema_type,
801 "' cannot have REPEATED nested document property '",
802 property_config->property_name(),
803 "' while connecting to some joinable properties"));
804 }
805 }
806
807 if (traversed.count(next_schema_type) == 0) {
808 traversed.insert(next_schema_type);
809 frontier.push(next_schema_type);
810 }
811 }
812 }
813
814 // Verify that every child type's property set has included all compatible
815 // properties from parent types.
816 ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema));
817 return dependent_map;
818 }
819
ValidateSchemaType(std::string_view schema_type)820 libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
821 std::string_view schema_type) {
822 // Require a schema_type
823 if (schema_type.empty()) {
824 return absl_ports::InvalidArgumentError(
825 "Field 'schema_type' cannot be empty.");
826 }
827
828 return libtextclassifier3::Status::OK;
829 }
830
ValidatePropertyName(std::string_view property_name,std::string_view schema_type)831 libtextclassifier3::Status SchemaUtil::ValidatePropertyName(
832 std::string_view property_name, std::string_view schema_type) {
833 // Require a property_name
834 if (property_name.empty()) {
835 return absl_ports::InvalidArgumentError(
836 absl_ports::StrCat("Field 'property_name' for schema '", schema_type,
837 "' cannot be empty."));
838 }
839
840 // Only support alphanumeric values.
841 for (char c : property_name) {
842 if (!std::isalnum(c)) {
843 return absl_ports::InvalidArgumentError(
844 absl_ports::StrCat("Field 'property_name' '", property_name,
845 "' can only contain alphanumeric characters."));
846 }
847 }
848
849 return libtextclassifier3::Status::OK;
850 }
851
ValidateDataType(PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)852 libtextclassifier3::Status SchemaUtil::ValidateDataType(
853 PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
854 std::string_view property_name) {
855 // UNKNOWN is the default enum value and should only be used for backwards
856 // compatibility
857 if (data_type == PropertyConfigProto::DataType::UNKNOWN) {
858 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
859 "Field 'data_type' cannot be UNKNOWN for schema property '",
860 schema_type, ".", property_name, "'"));
861 }
862
863 return libtextclassifier3::Status::OK;
864 }
865
ValidateCardinality(PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name)866 libtextclassifier3::Status SchemaUtil::ValidateCardinality(
867 PropertyConfigProto::Cardinality::Code cardinality,
868 std::string_view schema_type, std::string_view property_name) {
869 // UNKNOWN is the default enum value and should only be used for backwards
870 // compatibility
871 if (cardinality == PropertyConfigProto::Cardinality::UNKNOWN) {
872 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
873 "Field 'cardinality' cannot be UNKNOWN for schema property '",
874 schema_type, ".", property_name, "'"));
875 }
876
877 return libtextclassifier3::Status::OK;
878 }
879
ValidateScorableType(std::string_view schema_type,const PropertyConfigProto & property_config_proto)880 libtextclassifier3::Status SchemaUtil::ValidateScorableType(
881 std::string_view schema_type,
882 const PropertyConfigProto& property_config_proto) {
883 if (property_config_proto.data_type() ==
884 PropertyConfigProto::DataType::DOCUMENT) {
885 if (property_config_proto.scorable_type() !=
886 PropertyConfigProto::ScorableType::UNKNOWN) {
887 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
888 "Field 'scorable_type' shouldn't be explicitly set for data type "
889 "DOCUMENT. It is considered scorable if any of its or its "
890 "dependency's property is scorable."));
891 }
892 }
893
894 if (property_config_proto.scorable_type() ==
895 PropertyConfigProto::ScorableType::DISABLED ||
896 property_config_proto.scorable_type() ==
897 PropertyConfigProto::ScorableType::UNKNOWN) {
898 return libtextclassifier3::Status::OK;
899 }
900
901 switch (property_config_proto.data_type()) {
902 case PropertyConfigProto::DataType::INT64:
903 case PropertyConfigProto::DataType::DOUBLE:
904 case PropertyConfigProto::DataType::BOOLEAN:
905 return libtextclassifier3::Status::OK;
906 default:
907 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
908 "Field 'scorable_type' cannot be enabled for data type '",
909 PropertyConfigProto::DataType::Code_Name(
910 property_config_proto.data_type()),
911 "' for schema property '", schema_type, ".",
912 property_config_proto.property_name(), "'"));
913 }
914 }
915
ValidateStringIndexingConfig(const StringIndexingConfig & config,PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)916 libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
917 const StringIndexingConfig& config,
918 PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
919 std::string_view property_name) {
920 if (config.term_match_type() == TermMatchType::UNKNOWN &&
921 config.tokenizer_type() != StringIndexingConfig::TokenizerType::NONE) {
922 // They set a tokenizer type, but no term match type.
923 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
924 "Indexed string property '", schema_type, ".", property_name,
925 "' cannot have a term match type UNKNOWN"));
926 }
927
928 if (config.term_match_type() != TermMatchType::UNKNOWN &&
929 config.tokenizer_type() == StringIndexingConfig::TokenizerType::NONE) {
930 // They set a term match type, but no tokenizer type
931 return absl_ports::InvalidArgumentError(
932 absl_ports::StrCat("Indexed string property '", property_name,
933 "' cannot have a tokenizer type of NONE"));
934 }
935
936 return libtextclassifier3::Status::OK;
937 }
938
ValidateJoinableConfig(const JoinableConfig & config,PropertyConfigProto::DataType::Code data_type,PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name,const FeatureFlags & feature_flags)939 libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
940 const JoinableConfig& config, PropertyConfigProto::DataType::Code data_type,
941 PropertyConfigProto::Cardinality::Code cardinality,
942 std::string_view schema_type, std::string_view property_name,
943 const FeatureFlags& feature_flags) {
944 if (config.value_type() == JoinableConfig::ValueType::QUALIFIED_ID) {
945 if (data_type != PropertyConfigProto::DataType::STRING) {
946 return absl_ports::InvalidArgumentError(
947 absl_ports::StrCat("Qualified id joinable property '", property_name,
948 "' is required to have STRING data type"));
949 }
950
951 if (!feature_flags.enable_repeated_field_joins() &&
952 cardinality == PropertyConfigProto::Cardinality::REPEATED) {
953 return absl_ports::InvalidArgumentError(
954 absl_ports::StrCat("Qualified id joinable property '", property_name,
955 "' cannot have REPEATED cardinality"));
956 }
957 }
958
959 if (config.delete_propagation_type() !=
960 JoinableConfig::DeletePropagationType::NONE &&
961 config.value_type() != JoinableConfig::ValueType::QUALIFIED_ID) {
962 return absl_ports::InvalidArgumentError(
963 absl_ports::StrCat("Field 'property_name' '", property_name,
964 "' is required to have QUALIFIED_ID joinable "
965 "value type with delete propagation enabled"));
966 }
967
968 return libtextclassifier3::Status::OK;
969 }
970
ValidateDocumentIndexingConfig(const DocumentIndexingConfig & config,std::string_view schema_type,std::string_view property_name)971 libtextclassifier3::Status SchemaUtil::ValidateDocumentIndexingConfig(
972 const DocumentIndexingConfig& config, std::string_view schema_type,
973 std::string_view property_name) {
974 if (!config.indexable_nested_properties_list().empty() &&
975 config.index_nested_properties()) {
976 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
977 "DocumentIndexingConfig.index_nested_properties is required to be "
978 "false when providing a non-empty indexable_nested_properties_list "
979 "for property '",
980 schema_type, ".", property_name, "'"));
981 }
982 return libtextclassifier3::Status::OK;
983 }
984
IsIndexedProperty(const PropertyConfigProto & property_config)985 /* static */ bool SchemaUtil::IsIndexedProperty(
986 const PropertyConfigProto& property_config) {
987 switch (property_config.data_type()) {
988 case PropertyConfigProto::DataType::STRING:
989 return property_config.string_indexing_config().term_match_type() !=
990 TermMatchType::UNKNOWN &&
991 property_config.string_indexing_config().tokenizer_type() !=
992 StringIndexingConfig::TokenizerType::NONE;
993 case PropertyConfigProto::DataType::INT64:
994 return property_config.integer_indexing_config().numeric_match_type() !=
995 IntegerIndexingConfig::NumericMatchType::UNKNOWN;
996 case PropertyConfigProto::DataType::DOCUMENT:
997 // A document property is considered indexed if it has
998 // index_nested_properties=true, or a non-empty
999 // indexable_nested_properties_list.
1000 return property_config.document_indexing_config()
1001 .index_nested_properties() ||
1002 !property_config.document_indexing_config()
1003 .indexable_nested_properties_list()
1004 .empty();
1005 case PropertyConfigProto::DataType::VECTOR:
1006 return property_config.embedding_indexing_config()
1007 .embedding_indexing_type() !=
1008 EmbeddingIndexingConfig::EmbeddingIndexingType::UNKNOWN;
1009 case PropertyConfigProto::DataType::UNKNOWN:
1010 case PropertyConfigProto::DataType::DOUBLE:
1011 case PropertyConfigProto::DataType::BOOLEAN:
1012 case PropertyConfigProto::DataType::BYTES:
1013 case PropertyConfigProto::DataType::BLOB_HANDLE:
1014 return false;
1015 }
1016 }
1017
IsParent(const SchemaUtil::InheritanceMap & inheritance_map,std::string_view parent_type,std::string_view child_type)1018 bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
1019 std::string_view parent_type,
1020 std::string_view child_type) {
1021 auto iter = inheritance_map.find(parent_type);
1022 if (iter == inheritance_map.end()) {
1023 return false;
1024 }
1025 return iter->second.count(child_type) > 0;
1026 }
1027
IsInheritedPropertyCompatible(const SchemaUtil::InheritanceMap & inheritance_map,const PropertyConfigProto & child_property_config,const PropertyConfigProto & parent_property_config)1028 bool SchemaUtil::IsInheritedPropertyCompatible(
1029 const SchemaUtil::InheritanceMap& inheritance_map,
1030 const PropertyConfigProto& child_property_config,
1031 const PropertyConfigProto& parent_property_config) {
1032 // Check if child_property_config->cardinality() <=
1033 // parent_property_config->cardinality().
1034 // Subtype may require a stricter cardinality, but cannot loosen cardinality
1035 // requirements.
1036 if (!CardinalityLessThanEq(child_property_config.cardinality(),
1037 parent_property_config.cardinality())) {
1038 return false;
1039 }
1040
1041 // Now we can assume T1 and T2 are not nullptr, and cardinality check passes.
1042 if (child_property_config.data_type() !=
1043 PropertyConfigProto::DataType::DOCUMENT ||
1044 parent_property_config.data_type() !=
1045 PropertyConfigProto::DataType::DOCUMENT) {
1046 return child_property_config.data_type() ==
1047 parent_property_config.data_type();
1048 }
1049
1050 // Now we can assume T1 and T2 are both document type.
1051 return child_property_config.schema_type() ==
1052 parent_property_config.schema_type() ||
1053 IsParent(inheritance_map, parent_property_config.schema_type(),
1054 child_property_config.schema_type());
1055 }
1056
ValidateInheritedProperties(const SchemaProto & schema)1057 libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties(
1058 const SchemaProto& schema) {
1059 // Create a inheritance map
1060 ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map,
1061 BuildTransitiveInheritanceGraph(schema));
1062
1063 // Create a map that maps from type name to property names, and then from
1064 // property names to PropertyConfigProto.
1065 std::unordered_map<
1066 std::string, std::unordered_map<std::string, const PropertyConfigProto*>>
1067 property_map;
1068 for (const SchemaTypeConfigProto& type_config : schema.types()) {
1069 // Skipping building entries for types without any child or parent, since
1070 // such entry will never be used.
1071 if (type_config.parent_types().empty() &&
1072 inheritance_map.count(type_config.schema_type()) == 0) {
1073 continue;
1074 }
1075 auto& curr_property_map = property_map[type_config.schema_type()];
1076 for (const PropertyConfigProto& property_config :
1077 type_config.properties()) {
1078 curr_property_map[property_config.property_name()] = &property_config;
1079 }
1080 }
1081
1082 // Validate child properties.
1083 for (const SchemaTypeConfigProto& type_config : schema.types()) {
1084 const std::string& child_type_name = type_config.schema_type();
1085 auto& child_property_map = property_map[child_type_name];
1086
1087 for (const std::string& parent_type_name : type_config.parent_types()) {
1088 auto& parent_property_map = property_map[parent_type_name];
1089
1090 for (const auto& [property_name, parent_property_config] :
1091 parent_property_map) {
1092 auto child_property_iter = child_property_map.find(property_name);
1093 if (child_property_iter == child_property_map.end()) {
1094 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
1095 "Property ", property_name, " is not present in child type ",
1096 child_type_name, ", but it is defined in the parent type ",
1097 parent_type_name, "."));
1098 }
1099 if (!IsInheritedPropertyCompatible(inheritance_map,
1100 *child_property_iter->second,
1101 *parent_property_config)) {
1102 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
1103 "Property ", property_name, " from child type ", child_type_name,
1104 " is not compatible to the parent type ", parent_type_name, "."));
1105 }
1106 }
1107 }
1108 }
1109 return libtextclassifier3::Status::OK;
1110 }
1111
BuildTypeConfigMap(const SchemaProto & schema,SchemaUtil::TypeConfigMap * type_config_map)1112 void SchemaUtil::BuildTypeConfigMap(
1113 const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
1114 type_config_map->clear();
1115 for (const SchemaTypeConfigProto& type_config : schema.types()) {
1116 type_config_map->emplace(type_config.schema_type(), type_config);
1117 }
1118 }
1119
ParsePropertyConfigs(const SchemaTypeConfigProto & type_config)1120 SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
1121 const SchemaTypeConfigProto& type_config) {
1122 ParsedPropertyConfigs parsed_property_configs;
1123
1124 // TODO(cassiewang): consider caching property_config_map for some properties,
1125 // e.g. using LRU cache. Or changing schema.proto to use go/protomap.
1126 for (const PropertyConfigProto& property_config : type_config.properties()) {
1127 std::string_view property_name = property_config.property_name();
1128 parsed_property_configs.property_config_map.emplace(property_name,
1129 &property_config);
1130 if (property_config.cardinality() ==
1131 PropertyConfigProto::Cardinality::REQUIRED) {
1132 parsed_property_configs.required_properties.insert(property_name);
1133 }
1134
1135 // A non-default term_match_type indicates that this property is meant to be
1136 // indexed.
1137 if (IsIndexedProperty(property_config)) {
1138 parsed_property_configs.indexed_properties.insert(property_name);
1139 }
1140
1141 // A non-default value_type indicates that this property is meant to be
1142 // joinable.
1143 if (property_config.joinable_config().value_type() !=
1144 JoinableConfig::ValueType::NONE) {
1145 parsed_property_configs.joinable_properties.insert(property_name);
1146 }
1147
1148 // Also keep track of how many nested document properties there are. Adding
1149 // new nested document properties will result in join-index rebuild.
1150 if (property_config.data_type() ==
1151 PropertyConfigProto::DataType::DOCUMENT) {
1152 parsed_property_configs.nested_document_properties.insert(property_name);
1153 }
1154 }
1155
1156 return parsed_property_configs;
1157 }
1158
ComputeCompatibilityDelta(const SchemaProto & old_schema,const SchemaProto & new_schema,const DependentMap & new_schema_dependent_map,const FeatureFlags & feature_flags)1159 const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
1160 const SchemaProto& old_schema, const SchemaProto& new_schema,
1161 const DependentMap& new_schema_dependent_map,
1162 const FeatureFlags& feature_flags) {
1163 SchemaDelta schema_delta;
1164
1165 TypeConfigMap old_type_config_map, new_type_config_map;
1166 BuildTypeConfigMap(old_schema, &old_type_config_map);
1167 BuildTypeConfigMap(new_schema, &new_type_config_map);
1168
1169 if (feature_flags.enable_scorable_properties()) {
1170 FindScorablePropertyInconsistentTypes(
1171 old_type_config_map, new_type_config_map, new_schema_dependent_map,
1172 &schema_delta);
1173 }
1174
1175 // Iterate through and check each field of the old schema
1176 for (const auto& old_type_config : old_schema.types()) {
1177 auto new_schema_type_and_config =
1178 new_type_config_map.find(old_type_config.schema_type());
1179
1180 if (new_schema_type_and_config == new_type_config_map.end()) {
1181 // Didn't find the old schema type in the new schema, all the old
1182 // documents of this schema type are invalid without the schema
1183 ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type '",
1184 old_type_config.schema_type(),
1185 "' was not defined in new schema");
1186 schema_delta.schema_types_deleted.insert(old_type_config.schema_type());
1187 continue;
1188 }
1189
1190 ParsedPropertyConfigs new_parsed_property_configs =
1191 ParsePropertyConfigs(new_schema_type_and_config->second);
1192
1193 // We only need to check the old, existing properties to see if they're
1194 // compatible since we'll have old data that may be invalidated or need to
1195 // be reindexed.
1196 std::unordered_set<std::string_view> old_required_properties;
1197 std::unordered_set<std::string_view> old_indexed_properties;
1198 std::unordered_set<std::string_view> old_joinable_properties;
1199 std::unordered_set<std::string_view> old_nested_document_properties;
1200
1201 // If there is a different number of properties, then there must have been a
1202 // change.
1203 bool has_property_changed =
1204 old_type_config.properties_size() !=
1205 new_schema_type_and_config->second.properties_size();
1206 bool is_incompatible = false;
1207 bool is_index_incompatible = false;
1208 bool is_join_incompatible = false;
1209 for (const auto& old_property_config : old_type_config.properties()) {
1210 std::string_view property_name = old_property_config.property_name();
1211 if (old_property_config.cardinality() ==
1212 PropertyConfigProto::Cardinality::REQUIRED) {
1213 old_required_properties.insert(property_name);
1214 }
1215
1216 // A non-default term_match_type indicates that this property is meant to
1217 // be indexed.
1218 bool is_indexed_property = IsIndexedProperty(old_property_config);
1219 if (is_indexed_property) {
1220 old_indexed_properties.insert(property_name);
1221 }
1222
1223 bool is_joinable_property =
1224 old_property_config.joinable_config().value_type() !=
1225 JoinableConfig::ValueType::NONE;
1226 if (is_joinable_property) {
1227 old_joinable_properties.insert(property_name);
1228 }
1229
1230 // A nested-document property is a property of DataType::DOCUMENT.
1231 bool is_nested_document_property =
1232 old_property_config.data_type() ==
1233 PropertyConfigProto::DataType::DOCUMENT;
1234 if (is_nested_document_property) {
1235 old_nested_document_properties.insert(property_name);
1236 }
1237
1238 auto new_property_name_and_config =
1239 new_parsed_property_configs.property_config_map.find(
1240 old_property_config.property_name());
1241
1242 if (new_property_name_and_config ==
1243 new_parsed_property_configs.property_config_map.end()) {
1244 // Didn't find the old property
1245 ICING_VLOG(1) << absl_ports::StrCat(
1246 "Previously defined property type '", old_type_config.schema_type(),
1247 ".", old_property_config.property_name(),
1248 "' was not defined in new schema");
1249 is_incompatible = true;
1250 is_index_incompatible |= is_indexed_property;
1251 is_join_incompatible |=
1252 is_joinable_property || is_nested_document_property;
1253 continue;
1254 }
1255
1256 const PropertyConfigProto* new_property_config =
1257 new_property_name_and_config->second;
1258 if (!has_property_changed &&
1259 !ArePropertiesEqual(old_property_config, *new_property_config)) {
1260 // Finally found a property that changed.
1261 has_property_changed = true;
1262 }
1263
1264 if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
1265 ICING_VLOG(1) << absl_ports::StrCat(
1266 "Property '", old_type_config.schema_type(), ".",
1267 old_property_config.property_name(), "' is incompatible.");
1268 is_incompatible = true;
1269 }
1270
1271 // Any change in the indexed property requires a reindexing
1272 if (!IsTermMatchTypeCompatible(
1273 old_property_config.string_indexing_config(),
1274 new_property_config->string_indexing_config()) ||
1275 !IsIntegerNumericMatchTypeCompatible(
1276 old_property_config.integer_indexing_config(),
1277 new_property_config->integer_indexing_config()) ||
1278 !IsDocumentIndexingCompatible(
1279 old_property_config.document_indexing_config(),
1280 new_property_config->document_indexing_config()) ||
1281 !IsEmbeddingIndexingCompatible(
1282 old_property_config.embedding_indexing_config(),
1283 new_property_config->embedding_indexing_config())) {
1284 is_index_incompatible = true;
1285 }
1286
1287 if (old_property_config.joinable_config().value_type() !=
1288 new_property_config->joinable_config().value_type()) {
1289 is_join_incompatible = true;
1290 }
1291 }
1292
1293 // We can't have new properties that are REQUIRED since we won't know how
1294 // to backfill the data, and the existing data will be invalid. We're
1295 // guaranteed from our previous checks that all the old properties are also
1296 // present in the new property config, so we can do a simple int comparison
1297 // here to detect new required properties.
1298 if (!IsSubset(new_parsed_property_configs.required_properties,
1299 old_required_properties)) {
1300 ICING_VLOG(1) << absl_ports::StrCat(
1301 "New schema '", old_type_config.schema_type(),
1302 "' has REQUIRED properties that are not "
1303 "present in the previously defined schema");
1304 is_incompatible = true;
1305 }
1306
1307 // If we've gained any new indexed properties (this includes gaining new
1308 // indexed nested document properties), then the section ids may change.
1309 // Since the section ids are stored in the index, we'll need to
1310 // reindex everything.
1311 if (!IsSubset(new_parsed_property_configs.indexed_properties,
1312 old_indexed_properties)) {
1313 ICING_VLOG(1) << "Set of indexed properties in schema type '"
1314 << old_type_config.schema_type()
1315 << "' has changed, required reindexing.";
1316 is_index_incompatible = true;
1317 }
1318
1319 // If we've gained any new joinable properties, then the joinable property
1320 // ids may change. Since the joinable property ids are stored in the cache,
1321 // we'll need to reconstruct join index.
1322 // If we've gained any new nested document properties, we also rebuild the
1323 // join index. This is because we index all nested joinable properties, so
1324 // adding a nested document property will most probably result in having
1325 // more joinable properties.
1326 if (!IsSubset(new_parsed_property_configs.joinable_properties,
1327 old_joinable_properties) ||
1328 !IsSubset(new_parsed_property_configs.nested_document_properties,
1329 old_nested_document_properties)) {
1330 ICING_VLOG(1) << "Set of joinable properties in schema type '"
1331 << old_type_config.schema_type()
1332 << "' has changed, required reconstructing joinable cache.";
1333 is_join_incompatible = true;
1334 }
1335
1336 if (is_incompatible) {
1337 AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
1338 old_type_config, new_schema_dependent_map,
1339 old_type_config_map, new_type_config_map);
1340 }
1341
1342 if (is_index_incompatible) {
1343 AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
1344 old_type_config, new_schema_dependent_map,
1345 old_type_config_map, new_type_config_map);
1346 }
1347
1348 if (is_join_incompatible) {
1349 AddIncompatibleChangeToDelta(schema_delta.schema_types_join_incompatible,
1350 old_type_config, new_schema_dependent_map,
1351 old_type_config_map, new_type_config_map);
1352 }
1353
1354 if (!is_incompatible && !is_index_incompatible && !is_join_incompatible &&
1355 has_property_changed) {
1356 schema_delta.schema_types_changed_fully_compatible.insert(
1357 old_type_config.schema_type());
1358 }
1359
1360 // Lastly, remove this type from the map. We know that this type can't
1361 // come up in future iterations through the old schema types because the old
1362 // type config has unique types.
1363 new_type_config_map.erase(old_type_config.schema_type());
1364 }
1365
1366 // Any types that are still present in the new_type_config_map are newly added
1367 // types.
1368 schema_delta.schema_types_new.reserve(new_type_config_map.size());
1369 for (auto& kvp : new_type_config_map) {
1370 schema_delta.schema_types_new.insert(std::move(kvp.first));
1371 }
1372
1373 return schema_delta;
1374 }
1375
1376 } // namespace lib
1377 } // namespace icing
1378