xref: /aosp_15_r20/external/icing/icing/schema/schema-util.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/schema/schema-util.h"
16 
17 #include <algorithm>
18 #include <cctype>
19 #include <queue>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <utility>
25 #include <vector>
26 
27 #include "icing/text_classifier/lib3/utils/base/status.h"
28 #include "icing/text_classifier/lib3/utils/base/statusor.h"
29 #include "icing/absl_ports/annotate.h"
30 #include "icing/absl_ports/canonical_errors.h"
31 #include "icing/absl_ports/str_cat.h"
32 #include "icing/absl_ports/str_join.h"
33 #include "icing/feature-flags.h"
34 #include "icing/proto/schema.pb.h"
35 #include "icing/proto/term.pb.h"
36 #include "icing/util/logging.h"
37 #include "icing/util/status-macros.h"
38 
39 namespace icing {
40 namespace lib {
41 
42 namespace {
43 
AreStringIndexingConfigsEqual(const StringIndexingConfig & old_config,const StringIndexingConfig & new_config)44 bool AreStringIndexingConfigsEqual(const StringIndexingConfig& old_config,
45                                    const StringIndexingConfig& new_config) {
46   return old_config.term_match_type() == new_config.term_match_type() &&
47          old_config.tokenizer_type() == new_config.tokenizer_type();
48 }
49 
AreDocumentIndexingConfigsEqual(const DocumentIndexingConfig & old_config,const DocumentIndexingConfig & new_config)50 bool AreDocumentIndexingConfigsEqual(const DocumentIndexingConfig& old_config,
51                                      const DocumentIndexingConfig& new_config) {
52   return old_config.index_nested_properties() ==
53          new_config.index_nested_properties();
54 }
55 
AreIntegerIndexingConfigsEqual(const IntegerIndexingConfig & old_config,const IntegerIndexingConfig & new_config)56 bool AreIntegerIndexingConfigsEqual(const IntegerIndexingConfig& old_config,
57                                     const IntegerIndexingConfig& new_config) {
58   return old_config.numeric_match_type() == new_config.numeric_match_type();
59 }
60 
AreJoinableConfigsEqual(const JoinableConfig & old_config,const JoinableConfig & new_config)61 bool AreJoinableConfigsEqual(const JoinableConfig& old_config,
62                              const JoinableConfig& new_config) {
63   return old_config.value_type() == new_config.value_type() &&
64          old_config.delete_propagation_type() ==
65              new_config.delete_propagation_type();
66 }
67 
AreEmbeddingIndexingConfigsEqual(const EmbeddingIndexingConfig & old_config,const EmbeddingIndexingConfig & new_config)68 bool AreEmbeddingIndexingConfigsEqual(
69     const EmbeddingIndexingConfig& old_config,
70     const EmbeddingIndexingConfig& new_config) {
71   return old_config.embedding_indexing_type() ==
72              new_config.embedding_indexing_type() &&
73          old_config.quantization_type() == new_config.quantization_type();
74 }
75 
ArePropertiesEqual(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)76 bool ArePropertiesEqual(const PropertyConfigProto& old_property,
77                         const PropertyConfigProto& new_property) {
78   return old_property.property_name() == new_property.property_name() &&
79          old_property.data_type() == new_property.data_type() &&
80          old_property.schema_type() == new_property.schema_type() &&
81          old_property.cardinality() == new_property.cardinality() &&
82          old_property.scorable_type() == new_property.scorable_type() &&
83          AreStringIndexingConfigsEqual(old_property.string_indexing_config(),
84                                        new_property.string_indexing_config()) &&
85          AreDocumentIndexingConfigsEqual(
86              old_property.document_indexing_config(),
87              new_property.document_indexing_config()) &&
88          AreIntegerIndexingConfigsEqual(
89              old_property.integer_indexing_config(),
90              new_property.integer_indexing_config()) &&
91          AreJoinableConfigsEqual(old_property.joinable_config(),
92                                  new_property.joinable_config()) &&
93          AreEmbeddingIndexingConfigsEqual(
94              old_property.embedding_indexing_config(),
95              new_property.embedding_indexing_config());
96 }
97 
IsCardinalityCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)98 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
99                              const PropertyConfigProto& new_property) {
100   if (old_property.cardinality() < new_property.cardinality()) {
101     // We allow a new, less restrictive cardinality (i.e. a REQUIRED field
102     // can become REPEATED or OPTIONAL, but not the other way around).
103     ICING_VLOG(1) << absl_ports::StrCat(
104         "Cardinality is more restrictive than before ",
105         PropertyConfigProto::Cardinality::Code_Name(old_property.cardinality()),
106         "->",
107         PropertyConfigProto::Cardinality::Code_Name(
108             new_property.cardinality()));
109     return false;
110   }
111   return true;
112 }
113 
IsDataTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)114 bool IsDataTypeCompatible(const PropertyConfigProto& old_property,
115                           const PropertyConfigProto& new_property) {
116   if (old_property.data_type() != new_property.data_type()) {
117     // TODO(cassiewang): Maybe we can be a bit looser with this, e.g. we just
118     // string cast an int64_t to a string. But for now, we'll stick with
119     // simplistics.
120     ICING_VLOG(1) << absl_ports::StrCat(
121         "Data type ",
122         PropertyConfigProto::DataType::Code_Name(old_property.data_type()),
123         "->",
124         PropertyConfigProto::DataType::Code_Name(new_property.data_type()));
125     return false;
126   }
127   return true;
128 }
129 
IsSchemaTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)130 bool IsSchemaTypeCompatible(const PropertyConfigProto& old_property,
131                             const PropertyConfigProto& new_property) {
132   if (old_property.schema_type() != new_property.schema_type()) {
133     ICING_VLOG(1) << absl_ports::StrCat("Schema type ",
134                                         old_property.schema_type(), "->",
135                                         new_property.schema_type());
136     return false;
137   }
138   return true;
139 }
140 
IsPropertyCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)141 bool IsPropertyCompatible(const PropertyConfigProto& old_property,
142                           const PropertyConfigProto& new_property) {
143   return IsDataTypeCompatible(old_property, new_property) &&
144          IsSchemaTypeCompatible(old_property, new_property) &&
145          IsCardinalityCompatible(old_property, new_property);
146 }
147 
IsTermMatchTypeCompatible(const StringIndexingConfig & old_indexed,const StringIndexingConfig & new_indexed)148 bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
149                                const StringIndexingConfig& new_indexed) {
150   return old_indexed.term_match_type() == new_indexed.term_match_type() &&
151          old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
152 }
153 
IsIntegerNumericMatchTypeCompatible(const IntegerIndexingConfig & old_indexed,const IntegerIndexingConfig & new_indexed)154 bool IsIntegerNumericMatchTypeCompatible(
155     const IntegerIndexingConfig& old_indexed,
156     const IntegerIndexingConfig& new_indexed) {
157   return old_indexed.numeric_match_type() == new_indexed.numeric_match_type();
158 }
159 
IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig & old_indexed,const EmbeddingIndexingConfig & new_indexed)160 bool IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig& old_indexed,
161                                    const EmbeddingIndexingConfig& new_indexed) {
162   return old_indexed.embedding_indexing_type() ==
163              new_indexed.embedding_indexing_type() &&
164          old_indexed.quantization_type() == new_indexed.quantization_type();
165 }
166 
IsDocumentIndexingCompatible(const DocumentIndexingConfig & old_indexed,const DocumentIndexingConfig & new_indexed)167 bool IsDocumentIndexingCompatible(const DocumentIndexingConfig& old_indexed,
168                                   const DocumentIndexingConfig& new_indexed) {
169   // TODO(b/265304217): This could mark the new schema as incompatible and
170   // generate some unnecessary index rebuilds if the two schemas have an
171   // equivalent set of indexed properties, but changed the way that it is
172   // declared.
173   if (old_indexed.index_nested_properties() !=
174       new_indexed.index_nested_properties()) {
175     return false;
176   }
177 
178   if (old_indexed.indexable_nested_properties_list().size() !=
179       new_indexed.indexable_nested_properties_list().size()) {
180     return false;
181   }
182 
183   std::unordered_set<std::string_view> old_indexable_nested_properies_set(
184       old_indexed.indexable_nested_properties_list().begin(),
185       old_indexed.indexable_nested_properties_list().end());
186   for (const auto& property : new_indexed.indexable_nested_properties_list()) {
187     if (old_indexable_nested_properies_set.find(property) ==
188         old_indexable_nested_properies_set.end()) {
189       return false;
190     }
191   }
192   return true;
193 }
194 
AddIncompatibleChangeToDelta(std::unordered_set<std::string> & incompatible_delta,const SchemaTypeConfigProto & old_type_config,const SchemaUtil::DependentMap & new_schema_dependent_map,const SchemaUtil::TypeConfigMap & old_type_config_map,const SchemaUtil::TypeConfigMap & new_type_config_map)195 void AddIncompatibleChangeToDelta(
196     std::unordered_set<std::string>& incompatible_delta,
197     const SchemaTypeConfigProto& old_type_config,
198     const SchemaUtil::DependentMap& new_schema_dependent_map,
199     const SchemaUtil::TypeConfigMap& old_type_config_map,
200     const SchemaUtil::TypeConfigMap& new_type_config_map) {
201   // If this type is incompatible, then every type that depends on it might
202   // also be incompatible. Use the dependent map to mark those ones as
203   // incompatible too.
204   incompatible_delta.insert(old_type_config.schema_type());
205   auto dependent_types_itr =
206       new_schema_dependent_map.find(old_type_config.schema_type());
207   if (dependent_types_itr != new_schema_dependent_map.end()) {
208     for (const auto& [dependent_type, _] : dependent_types_itr->second) {
209       // The types from new_schema that depend on the current
210       // old_type_config may not present in old_schema.
211       // Those types will be listed at schema_delta.schema_types_new
212       // instead.
213       std::string dependent_type_str(dependent_type);
214       if (old_type_config_map.find(dependent_type_str) !=
215           old_type_config_map.end()) {
216         incompatible_delta.insert(std::move(dependent_type_str));
217       }
218     }
219   }
220 }
221 
222 // Returns if C1 <= C2 based on the following rule, where C1 and C2 are
223 // cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED.
224 //
225 // Rule: REQUIRED < OPTIONAL < REPEATED
CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,PropertyConfigProto::Cardinality::Code C2)226 bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
227                            PropertyConfigProto::Cardinality::Code C2) {
228   if (C1 == C2) {
229     return true;
230   }
231   if (C1 == PropertyConfigProto::Cardinality::REQUIRED) {
232     return C2 == PropertyConfigProto::Cardinality::OPTIONAL ||
233            C2 == PropertyConfigProto::Cardinality::REPEATED;
234   }
235   if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) {
236     return C2 == PropertyConfigProto::Cardinality::REPEATED;
237   }
238   return false;
239 }
240 
241 // Check if set1 is a subset of set2.
242 template <typename T>
IsSubset(const std::unordered_set<T> & set1,const std::unordered_set<T> & set2)243 bool IsSubset(const std::unordered_set<T>& set1,
244               const std::unordered_set<T>& set2) {
245   for (const auto& item : set1) {
246     if (set2.find(item) == set2.end()) {
247       return false;
248     }
249   }
250   return true;
251 }
252 
253 // Builds a map of {schema_type -> set of scorable property names}
254 std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
BuildTypeToScorablePropertyNamesMap(const SchemaUtil::TypeConfigMap & type_config_map)255 BuildTypeToScorablePropertyNamesMap(
256     const SchemaUtil::TypeConfigMap& type_config_map) {
257   std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
258       type_to_scorable_property_names_map;
259   for (const auto& [schema_type, schema_type_config] : type_config_map) {
260     for (const PropertyConfigProto& property_config :
261          schema_type_config.properties()) {
262       if (property_config.scorable_type() ==
263           PropertyConfigProto::ScorableType::ENABLED) {
264         type_to_scorable_property_names_map[schema_type].insert(
265             property_config.property_name());
266       }
267     }
268   }
269   return type_to_scorable_property_names_map;
270 }
271 
272 // Finds the schema types that have inconsistent scorable properties, which will
273 // be added in place in the `schema_delta`.
FindScorablePropertyInconsistentTypes(const SchemaUtil::TypeConfigMap & old_type_config_map,const SchemaUtil::TypeConfigMap & new_type_config_map,const SchemaUtil::DependentMap & new_schema_dependent_map,SchemaUtil::SchemaDelta * schema_delta)274 void FindScorablePropertyInconsistentTypes(
275     const SchemaUtil::TypeConfigMap& old_type_config_map,
276     const SchemaUtil::TypeConfigMap& new_type_config_map,
277     const SchemaUtil::DependentMap& new_schema_dependent_map,
278     SchemaUtil::SchemaDelta* schema_delta) {
279   std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
280       new_type_to_scorable_property_names_map =
281           BuildTypeToScorablePropertyNamesMap(new_type_config_map);
282   std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
283       old_type_to_scorable_property_names_map =
284           BuildTypeToScorablePropertyNamesMap(old_type_config_map);
285   for (const auto& [schema_type, _] : old_type_config_map) {
286     if (new_type_config_map.find(schema_type) == new_type_config_map.end()) {
287       // The type has been deleted in the new schema.
288       continue;
289     }
290     auto old_schema_type_property_names_iter =
291         old_type_to_scorable_property_names_map.find(schema_type);
292     auto new_schema_type_property_names_iter =
293         new_type_to_scorable_property_names_map.find(schema_type);
294     bool has_scorable_properties_in_old_schema =
295         old_schema_type_property_names_iter !=
296         old_type_to_scorable_property_names_map.end();
297     bool has_scorable_properties_in_new_schema =
298         new_schema_type_property_names_iter !=
299         new_type_to_scorable_property_names_map.end();
300     if (has_scorable_properties_in_old_schema &&
301         !has_scorable_properties_in_new_schema) {
302       schema_delta->schema_types_scorable_property_inconsistent.insert(
303           schema_type);
304     } else if (!has_scorable_properties_in_old_schema &&
305                has_scorable_properties_in_new_schema) {
306       schema_delta->schema_types_scorable_property_inconsistent.insert(
307           schema_type);
308     } else if (has_scorable_properties_in_old_schema &&
309                has_scorable_properties_in_new_schema) {
310       // The sets of scorable properties from the old and new schema are
311       // different.
312       if (old_schema_type_property_names_iter->second !=
313           new_schema_type_property_names_iter->second) {
314         schema_delta->schema_types_scorable_property_inconsistent.insert(
315             schema_type);
316       }
317     }
318   }
319 
320   // Now, look up the DependentMap of the new schema config and find the parent
321   // types that depend on the currently discovered inconsistent types.
322   std::vector<std::string_view> parent_types;
323   for (const std::string& schema_type :
324        schema_delta->schema_types_scorable_property_inconsistent) {
325     auto parent_type_maps_iter = new_schema_dependent_map.find(schema_type);
326     if (parent_type_maps_iter == new_schema_dependent_map.end()) {
327       continue;
328     }
329     for (const auto& [parent_type, _] : parent_type_maps_iter->second) {
330       parent_types.push_back(parent_type);
331     }
332   }
333   schema_delta->schema_types_scorable_property_inconsistent.insert(
334       parent_types.begin(), parent_types.end());
335 }
336 
337 }  // namespace
338 
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_types_map,const std::unordered_set<std::string_view> & joinable_types,std::string_view type,bool path_contains_joinable_property,SchemaUtil::DependentMap * expanded_nested_types_map,std::unordered_map<std::string_view,bool> && pending_expansion_paths_indexable,std::unordered_set<std::string_view> * sink_types)339 libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
340     const SchemaUtil::DependentMap& direct_nested_types_map,
341     const std::unordered_set<std::string_view>& joinable_types,
342     std::string_view type, bool path_contains_joinable_property,
343     SchemaUtil::DependentMap* expanded_nested_types_map,
344     std::unordered_map<std::string_view, bool>&&
345         pending_expansion_paths_indexable,
346     std::unordered_set<std::string_view>* sink_types) {
347   // TODO(b/280698121): Implement optimizations to this code to avoid reentering
348   // a node after it's already been expanded.
349 
350   auto itr = direct_nested_types_map.find(type);
351   if (itr == direct_nested_types_map.end()) {
352     // It's a sink node. Just return.
353     sink_types->insert(type);
354     return libtextclassifier3::Status::OK;
355   }
356   std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
357       expanded_relations;
358 
359   // Add all of the adjacent outgoing relations.
360   expanded_relations.reserve(itr->second.size());
361   expanded_relations.insert(itr->second.begin(), itr->second.end());
362 
363   // Iterate through each adjacent outgoing relation and add their indirect
364   // outgoing relations.
365   for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) {
366     // Make a copy of pending_expansion_paths_indexable for every iteration.
367     std::unordered_map<std::string_view, bool> pending_expansion_paths_copy(
368         pending_expansion_paths_indexable);
369 
370     // 1. Check the nested indexable config of the edge (type -> adjacent_type),
371     //    and the joinable config of the current path up to adjacent_type.
372     //
373     // The nested indexable config is true if any of the PropertyConfigProtos
374     // representing the connecting edge has index_nested_properties=true.
375     bool is_edge_nested_indexable = std::any_of(
376         adjacent_property_protos.begin(), adjacent_property_protos.end(),
377         [](const PropertyConfigProto* property_config) {
378           return property_config->document_indexing_config()
379               .index_nested_properties();
380         });
381     // TODO(b/265304217): change this once we add joinable_properties_list.
382     // Check if addition of the new edge (type->adjacent_type) makes the path
383     // joinable.
384     bool new_path_contains_joinable_property =
385         joinable_types.count(type) > 0 || path_contains_joinable_property;
386     // Set is_nested_indexable field for the current edge
387     pending_expansion_paths_copy[type] = is_edge_nested_indexable;
388 
389     // If is_edge_nested_indexable=false, then all paths to adjacent_type
390     // currently in the pending_expansions map are also not nested indexable.
391     if (!is_edge_nested_indexable) {
392       for (auto& pending_expansion : pending_expansion_paths_copy) {
393         pending_expansion.second = false;
394       }
395     }
396 
397     // 2. Check if we're in the middle of expanding this type - IOW
398     // there's a cycle!
399     //
400     // This cycle is not allowed if either:
401     //  1. The cycle starting at adjacent_type is nested indexable, OR
402     //  2. The current path contains a joinable property.
403     auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type);
404     if (adjacent_itr != pending_expansion_paths_copy.end()) {
405       if (adjacent_itr->second || new_path_contains_joinable_property) {
406         return absl_ports::InvalidArgumentError(absl_ports::StrCat(
407             "Invalid cycle detected in type configs. '", type,
408             "' references itself and is nested-indexable or nested-joinable."));
409       }
410       // The cycle is allowed and there's no need to keep iterating the loop.
411       // Move on to the next adjacent value.
412       continue;
413     }
414 
415     // 3. Expand this type as needed.
416     ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
417         direct_nested_types_map, joinable_types, adjacent_type,
418         new_path_contains_joinable_property, expanded_nested_types_map,
419         std::move(pending_expansion_paths_copy), sink_types));
420     if (sink_types->count(adjacent_type) > 0) {
421       // "adjacent" is a sink node. Just skip to the next.
422       continue;
423     }
424 
425     // 4. "adjacent" has been fully expanded. Add all of its transitive
426     // outgoing relations to this type's transitive outgoing relations.
427     auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
428     for (const auto& [transitive_reachable, _] :
429          adjacent_expanded_itr->second) {
430       // Insert a transitive reachable node `transitive_reachable` for `type` if
431       // it wasn't previously reachable.
432       // Since there is no direct edge between `type` and `transitive_reachable`
433       // we insert an empty vector into the dependent map.
434       expanded_relations.insert({transitive_reachable, {}});
435     }
436   }
437   for (const auto& kvp : expanded_relations) {
438     expanded_nested_types_map->operator[](type).insert(kvp);
439   }
440   return libtextclassifier3::Status::OK;
441 }
442 
443 template <typename T>
CalculateAcyclicTransitiveRelations(const SchemaUtil::TypeRelationMap<T> & direct_relation_map,std::string_view type,SchemaUtil::TypeRelationMap<T> * expanded_relation_map,std::unordered_set<std::string_view> * pending_expansions,std::unordered_set<std::string_view> * sink_types)444 libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
445     const SchemaUtil::TypeRelationMap<T>& direct_relation_map,
446     std::string_view type,
447     SchemaUtil::TypeRelationMap<T>* expanded_relation_map,
448     std::unordered_set<std::string_view>* pending_expansions,
449     std::unordered_set<std::string_view>* sink_types) {
450   auto expanded_itr = expanded_relation_map->find(type);
451   if (expanded_itr != expanded_relation_map->end()) {
452     // We've already expanded this type. Just return.
453     return libtextclassifier3::Status::OK;
454   }
455   auto itr = direct_relation_map.find(type);
456   if (itr == direct_relation_map.end()) {
457     // It's a sink node. Just return.
458     sink_types->insert(type);
459     return libtextclassifier3::Status::OK;
460   }
461   pending_expansions->insert(type);
462   std::unordered_map<std::string_view, T> expanded_relations;
463 
464   // Add all of the adjacent outgoing relations.
465   expanded_relations.reserve(itr->second.size());
466   expanded_relations.insert(itr->second.begin(), itr->second.end());
467 
468   // Iterate through each adjacent outgoing relation and add their indirect
469   // outgoing relations.
470   for (const auto& [adjacent, _] : itr->second) {
471     // 1. Check if we're in the middle of expanding this type - IOW there's a
472     // cycle!
473     if (pending_expansions->count(adjacent) > 0) {
474       return absl_ports::InvalidArgumentError(
475           absl_ports::StrCat("Invalid cycle detected in type configs. '", type,
476                              "' references or inherits from itself."));
477     }
478 
479     // 2. Expand this type as needed.
480     ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
481         direct_relation_map, adjacent, expanded_relation_map,
482         pending_expansions, sink_types));
483     if (sink_types->count(adjacent) > 0) {
484       // "adjacent" is a sink node. Just skip to the next.
485       continue;
486     }
487 
488     // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
489     // relations to this type's transitive outgoing relations.
490     auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
491     for (const auto& [transitive_reachable, _] :
492          adjacent_expanded_itr->second) {
493       // Insert a transitive reachable node `transitive_reachable` for `type`.
494       // Also since there is no direct edge between `type` and
495       // `transitive_reachable`, the direct edge is initialized by default.
496       expanded_relations.insert({transitive_reachable, T()});
497     }
498   }
499   expanded_relation_map->insert({type, std::move(expanded_relations)});
500   pending_expansions->erase(type);
501   return libtextclassifier3::Status::OK;
502 }
503 
504 // Calculate and return the expanded nested-type map from
505 // direct_nested_type_map. This expands the direct_nested_type_map to also
506 // include indirect nested-type relations.
507 //
508 // Ex. Suppose we have the following relations in direct_nested_type_map.
509 //
510 // C -> B (Schema type B has a document property of type C)
511 // B -> A (Schema type A has a document property of type B)
512 //
513 // Then, this function would expand the map by adding C -> A to the map.
514 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_type_map,const std::unordered_set<std::string_view> & joinable_types,bool allow_circular_schema_definitions)515 CalculateTransitiveNestedTypeRelations(
516     const SchemaUtil::DependentMap& direct_nested_type_map,
517     const std::unordered_set<std::string_view>& joinable_types,
518     bool allow_circular_schema_definitions) {
519   SchemaUtil::DependentMap expanded_nested_type_map;
520   // Types that have no outgoing relations.
521   std::unordered_set<std::string_view> sink_types;
522 
523   if (allow_circular_schema_definitions) {
524     // Map of nodes that are pending expansion -> whether the path from each key
525     // node to the 'current' node is nested_indexable.
526     // A copy of this map is made for each new node that we expand.
527     std::unordered_map<std::string_view, bool>
528         pending_expansion_paths_indexable;
529     for (const auto& kvp : direct_nested_type_map) {
530       ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
531           direct_nested_type_map, joinable_types, kvp.first,
532           /*path_contains_joinable_property=*/false, &expanded_nested_type_map,
533           std::unordered_map<std::string_view, bool>(
534               pending_expansion_paths_indexable),
535           &sink_types));
536     }
537   } else {
538     // If allow_circular_schema_definitions is false, then fallback to the old
539     // way of detecting cycles.
540     // Types that we are expanding.
541     std::unordered_set<std::string_view> pending_expansions;
542     for (const auto& kvp : direct_nested_type_map) {
543       ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
544           direct_nested_type_map, kvp.first, &expanded_nested_type_map,
545           &pending_expansions, &sink_types));
546     }
547   }
548   return expanded_nested_type_map;
549 }
550 
551 // Calculate and return the expanded inheritance map from
552 // direct_nested_type_map. This expands the direct_inheritance_map to also
553 // include indirect inheritance relations.
554 //
555 // Ex. Suppose we have the following relations in direct_inheritance_map.
556 //
557 // C -> B (Schema type C is B's parent_type )
558 // B -> A (Schema type B is A's parent_type)
559 //
560 // Then, this function would expand the map by adding C -> A to the map.
561 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
CalculateTransitiveInheritanceRelations(const SchemaUtil::InheritanceMap & direct_inheritance_map)562 CalculateTransitiveInheritanceRelations(
563     const SchemaUtil::InheritanceMap& direct_inheritance_map) {
564   SchemaUtil::InheritanceMap expanded_inheritance_map;
565 
566   // Types that we are expanding.
567   std::unordered_set<std::string_view> pending_expansions;
568 
569   // Types that have no outgoing relation.
570   std::unordered_set<std::string_view> sink_types;
571   for (const auto& kvp : direct_inheritance_map) {
572     ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
573         direct_inheritance_map, kvp.first, &expanded_inheritance_map,
574         &pending_expansions, &sink_types));
575   }
576   return expanded_inheritance_map;
577 }
578 
579 // Builds a transitive dependent map. Types with no dependents will not be
580 // present in the map as keys.
581 //
582 // Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
583 // type B and B has a property of type C. C and D only have non-document
584 // properties.
585 //
586 // The transitive dependent map for this schema would be:
587 // C -> A, B (both A and B depend on C)
588 // B -> A (A depends on B)
589 //
590 // A and D will not be present in the map as keys because no type depends on
591 // them.
592 //
593 // RETURNS:
594 //   On success, a transitive dependent map of all types in the schema.
595 //   INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
596 //   ALREADY_EXISTS if a schema type is specified more than once in the schema
597 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
BuildTransitiveDependentGraph(const SchemaProto & schema,bool allow_circular_schema_definitions)598 BuildTransitiveDependentGraph(const SchemaProto& schema,
599                               bool allow_circular_schema_definitions) {
600   // We expand the nested-type dependent map and inheritance map differently
601   // when calculating transitive relations. These two types of relations also
602   // should not be transitive so we keep these as separate maps.
603   //
604   // e.g. For schema type A, B and C, B depends on A through inheritance, and
605   // C depends on B by having a property with type B, we will have the two
606   // relations {A, B} and {B, C} in the dependent map, but will not have {A, C}
607   // in the map.
608   SchemaUtil::DependentMap direct_nested_type_map;
609   SchemaUtil::InheritanceMap direct_inheritance_map;
610 
611   // Set of schema types that have at least one joinable property.
612   std::unordered_set<std::string_view> joinable_types;
613 
614   // Add all first-order dependents.
615   std::unordered_set<std::string_view> known_types;
616   std::unordered_set<std::string_view> unknown_types;
617   for (const auto& type_config : schema.types()) {
618     std::string_view schema_type(type_config.schema_type());
619     if (known_types.count(schema_type) > 0) {
620       return absl_ports::AlreadyExistsError(absl_ports::StrCat(
621           "Field 'schema_type' '", schema_type, "' is already defined"));
622     }
623     known_types.insert(schema_type);
624     unknown_types.erase(schema_type);
625     // Insert inheritance relations into the inheritance map.
626     for (std::string_view parent_schema_type : type_config.parent_types()) {
627       if (known_types.count(parent_schema_type) == 0) {
628         unknown_types.insert(parent_schema_type);
629       }
630       direct_inheritance_map[parent_schema_type][schema_type] = true;
631     }
632     for (const auto& property_config : type_config.properties()) {
633       if (property_config.joinable_config().value_type() !=
634           JoinableConfig::ValueType::NONE) {
635         joinable_types.insert(schema_type);
636       }
637       // Insert nested-type relations into the nested-type map.
638       if (property_config.data_type() ==
639           PropertyConfigProto::DataType::DOCUMENT) {
640         // Need to know what schema_type these Document properties should be
641         // validated against
642         std::string_view property_schema_type(property_config.schema_type());
643         if (known_types.count(property_schema_type) == 0) {
644           unknown_types.insert(property_schema_type);
645         }
646         direct_nested_type_map[property_schema_type][schema_type].push_back(
647             &property_config);
648       }
649     }
650   }
651   if (!unknown_types.empty()) {
652     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
653         "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
654   }
655 
656   // Merge two expanded maps into a single dependent_map, without making
657   // inheritance and nested-type relations transitive.
658   ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map,
659                          CalculateTransitiveNestedTypeRelations(
660                              direct_nested_type_map, joinable_types,
661                              allow_circular_schema_definitions));
662   ICING_ASSIGN_OR_RETURN(
663       SchemaUtil::InheritanceMap expanded_inheritance_map,
664       CalculateTransitiveInheritanceRelations(direct_inheritance_map));
665   for (const auto& [parent_type, inheritance_relation] :
666        expanded_inheritance_map) {
667     // Insert the parent_type into the dependent map if it is not present
668     // already.
669     merged_dependent_map.insert({parent_type, {}});
670     for (const auto& [child_type, _] : inheritance_relation) {
671       // Insert the child_type into parent_type's dependent map if it's not
672       // present already, in which case the value will be an empty vector.
673       merged_dependent_map[parent_type].insert({child_type, {}});
674     }
675   }
676   return merged_dependent_map;
677 }
678 
679 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
BuildTransitiveInheritanceGraph(const SchemaProto & schema)680 SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) {
681   SchemaUtil::InheritanceMap direct_inheritance_map;
682   for (const auto& type_config : schema.types()) {
683     for (std::string_view parent_schema_type : type_config.parent_types()) {
684       direct_inheritance_map[parent_schema_type][type_config.schema_type()] =
685           true;
686     }
687   }
688   return CalculateTransitiveInheritanceRelations(direct_inheritance_map);
689 }
690 
Validate(const SchemaProto & schema,const FeatureFlags & feature_flags,bool allow_circular_schema_definitions)691 libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
692     const SchemaProto& schema, const FeatureFlags& feature_flags,
693     bool allow_circular_schema_definitions) {
694   // 1. Build the dependent map. This will detect any cycles, non-existent or
695   // duplicate types in the schema.
696   ICING_ASSIGN_OR_RETURN(
697       SchemaUtil::DependentMap dependent_map,
698       BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions));
699 
700   // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
701   // already.
702   std::unordered_set<std::string_view> known_property_names;
703 
704   // Tracks PropertyConfigs containing joinable properties.
705   std::unordered_set<std::string_view> schema_types_with_joinable_property;
706 
707   // 2. Validate the properties of each type.
708   for (const auto& type_config : schema.types()) {
709     std::string_view schema_type(type_config.schema_type());
710     ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
711 
712     // We only care about properties being unique within one type_config
713     known_property_names.clear();
714 
715     for (const auto& property_config : type_config.properties()) {
716       std::string_view property_name(property_config.property_name());
717       ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
718 
719       // Property names must be unique
720       if (!known_property_names.insert(property_name).second) {
721         return absl_ports::AlreadyExistsError(absl_ports::StrCat(
722             "Field 'property_name' '", property_name,
723             "' is already defined for schema '", schema_type, "'"));
724       }
725 
726       auto data_type = property_config.data_type();
727       ICING_RETURN_IF_ERROR(
728           ValidateDataType(data_type, schema_type, property_name));
729 
730       if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
731         // Need to know what schema_type these Document properties should be
732         // validated against
733         std::string_view property_schema_type(property_config.schema_type());
734         libtextclassifier3::Status validated_status =
735             ValidateSchemaType(property_schema_type);
736         if (!validated_status.ok()) {
737           return absl_ports::Annotate(
738               validated_status,
739               absl_ports::StrCat("Field 'schema_type' is required for DOCUMENT "
740                                  "data_types in schema property '",
741                                  schema_type, ".", property_name, "'"));
742         }
743 
744         ICING_RETURN_IF_ERROR(ValidateDocumentIndexingConfig(
745             property_config.document_indexing_config(), schema_type,
746             property_name));
747       }
748 
749       ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
750                                                 schema_type, property_name));
751       if (feature_flags.enable_scorable_properties()) {
752         ICING_RETURN_IF_ERROR(
753             ValidateScorableType(schema_type, property_config));
754       }
755 
756       if (data_type == PropertyConfigProto::DataType::STRING) {
757         ICING_RETURN_IF_ERROR(ValidateStringIndexingConfig(
758             property_config.string_indexing_config(), data_type, schema_type,
759             property_name));
760       }
761 
762       ICING_RETURN_IF_ERROR(
763           ValidateJoinableConfig(property_config.joinable_config(), data_type,
764                                  property_config.cardinality(), schema_type,
765                                  property_name, feature_flags));
766       if (property_config.joinable_config().value_type() !=
767           JoinableConfig::ValueType::NONE) {
768         schema_types_with_joinable_property.insert(schema_type);
769       }
770     }
771   }
772 
773   // BFS traverse the dependent graph to make sure that no nested levels
774   // (properties with DOCUMENT data type) have REPEATED cardinality while
775   // depending on schema types with joinable property.
776   std::queue<std::string_view> frontier;
777   for (const auto& schema_type : schema_types_with_joinable_property) {
778     frontier.push(schema_type);
779   }
780   std::unordered_set<std::string_view> traversed =
781       std::move(schema_types_with_joinable_property);
782   while (!frontier.empty()) {
783     std::string_view schema_type = frontier.front();
784     frontier.pop();
785 
786     const auto it = dependent_map.find(schema_type);
787     if (it == dependent_map.end()) {
788       continue;
789     }
790 
791     // Check every type that has a property of type schema_type.
792     for (const auto& [next_schema_type, property_configs] : it->second) {
793       // Check all properties in "next_schema_type" that are of type
794       // "schema_type".
795       for (const PropertyConfigProto* property_config : property_configs) {
796         if (property_config != nullptr &&
797             property_config->cardinality() ==
798                 PropertyConfigProto::Cardinality::REPEATED) {
799           return absl_ports::InvalidArgumentError(absl_ports::StrCat(
800               "Schema type '", next_schema_type,
801               "' cannot have REPEATED nested document property '",
802               property_config->property_name(),
803               "' while connecting to some joinable properties"));
804         }
805       }
806 
807       if (traversed.count(next_schema_type) == 0) {
808         traversed.insert(next_schema_type);
809         frontier.push(next_schema_type);
810       }
811     }
812   }
813 
814   // Verify that every child type's property set has included all compatible
815   // properties from parent types.
816   ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema));
817   return dependent_map;
818 }
819 
ValidateSchemaType(std::string_view schema_type)820 libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
821     std::string_view schema_type) {
822   // Require a schema_type
823   if (schema_type.empty()) {
824     return absl_ports::InvalidArgumentError(
825         "Field 'schema_type' cannot be empty.");
826   }
827 
828   return libtextclassifier3::Status::OK;
829 }
830 
ValidatePropertyName(std::string_view property_name,std::string_view schema_type)831 libtextclassifier3::Status SchemaUtil::ValidatePropertyName(
832     std::string_view property_name, std::string_view schema_type) {
833   // Require a property_name
834   if (property_name.empty()) {
835     return absl_ports::InvalidArgumentError(
836         absl_ports::StrCat("Field 'property_name' for schema '", schema_type,
837                            "' cannot be empty."));
838   }
839 
840   // Only support alphanumeric values.
841   for (char c : property_name) {
842     if (!std::isalnum(c)) {
843       return absl_ports::InvalidArgumentError(
844           absl_ports::StrCat("Field 'property_name' '", property_name,
845                              "' can only contain alphanumeric characters."));
846     }
847   }
848 
849   return libtextclassifier3::Status::OK;
850 }
851 
ValidateDataType(PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)852 libtextclassifier3::Status SchemaUtil::ValidateDataType(
853     PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
854     std::string_view property_name) {
855   // UNKNOWN is the default enum value and should only be used for backwards
856   // compatibility
857   if (data_type == PropertyConfigProto::DataType::UNKNOWN) {
858     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
859         "Field 'data_type' cannot be UNKNOWN for schema property '",
860         schema_type, ".", property_name, "'"));
861   }
862 
863   return libtextclassifier3::Status::OK;
864 }
865 
ValidateCardinality(PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name)866 libtextclassifier3::Status SchemaUtil::ValidateCardinality(
867     PropertyConfigProto::Cardinality::Code cardinality,
868     std::string_view schema_type, std::string_view property_name) {
869   // UNKNOWN is the default enum value and should only be used for backwards
870   // compatibility
871   if (cardinality == PropertyConfigProto::Cardinality::UNKNOWN) {
872     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
873         "Field 'cardinality' cannot be UNKNOWN for schema property '",
874         schema_type, ".", property_name, "'"));
875   }
876 
877   return libtextclassifier3::Status::OK;
878 }
879 
ValidateScorableType(std::string_view schema_type,const PropertyConfigProto & property_config_proto)880 libtextclassifier3::Status SchemaUtil::ValidateScorableType(
881     std::string_view schema_type,
882     const PropertyConfigProto& property_config_proto) {
883   if (property_config_proto.data_type() ==
884       PropertyConfigProto::DataType::DOCUMENT) {
885     if (property_config_proto.scorable_type() !=
886         PropertyConfigProto::ScorableType::UNKNOWN) {
887       return absl_ports::InvalidArgumentError(absl_ports::StrCat(
888           "Field 'scorable_type' shouldn't be explicitly set for data type "
889           "DOCUMENT. It is considered scorable if any of its or its "
890           "dependency's property is scorable."));
891     }
892   }
893 
894   if (property_config_proto.scorable_type() ==
895           PropertyConfigProto::ScorableType::DISABLED ||
896       property_config_proto.scorable_type() ==
897           PropertyConfigProto::ScorableType::UNKNOWN) {
898     return libtextclassifier3::Status::OK;
899   }
900 
901   switch (property_config_proto.data_type()) {
902     case PropertyConfigProto::DataType::INT64:
903     case PropertyConfigProto::DataType::DOUBLE:
904     case PropertyConfigProto::DataType::BOOLEAN:
905       return libtextclassifier3::Status::OK;
906     default:
907       return absl_ports::InvalidArgumentError(absl_ports::StrCat(
908           "Field 'scorable_type' cannot be enabled for data type '",
909           PropertyConfigProto::DataType::Code_Name(
910               property_config_proto.data_type()),
911           "' for schema property '", schema_type, ".",
912           property_config_proto.property_name(), "'"));
913   }
914 }
915 
ValidateStringIndexingConfig(const StringIndexingConfig & config,PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)916 libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
917     const StringIndexingConfig& config,
918     PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
919     std::string_view property_name) {
920   if (config.term_match_type() == TermMatchType::UNKNOWN &&
921       config.tokenizer_type() != StringIndexingConfig::TokenizerType::NONE) {
922     // They set a tokenizer type, but no term match type.
923     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
924         "Indexed string property '", schema_type, ".", property_name,
925         "' cannot have a term match type UNKNOWN"));
926   }
927 
928   if (config.term_match_type() != TermMatchType::UNKNOWN &&
929       config.tokenizer_type() == StringIndexingConfig::TokenizerType::NONE) {
930     // They set a term match type, but no tokenizer type
931     return absl_ports::InvalidArgumentError(
932         absl_ports::StrCat("Indexed string property '", property_name,
933                            "' cannot have a tokenizer type of NONE"));
934   }
935 
936   return libtextclassifier3::Status::OK;
937 }
938 
ValidateJoinableConfig(const JoinableConfig & config,PropertyConfigProto::DataType::Code data_type,PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name,const FeatureFlags & feature_flags)939 libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
940     const JoinableConfig& config, PropertyConfigProto::DataType::Code data_type,
941     PropertyConfigProto::Cardinality::Code cardinality,
942     std::string_view schema_type, std::string_view property_name,
943     const FeatureFlags& feature_flags) {
944   if (config.value_type() == JoinableConfig::ValueType::QUALIFIED_ID) {
945     if (data_type != PropertyConfigProto::DataType::STRING) {
946       return absl_ports::InvalidArgumentError(
947           absl_ports::StrCat("Qualified id joinable property '", property_name,
948                              "' is required to have STRING data type"));
949     }
950 
951     if (!feature_flags.enable_repeated_field_joins() &&
952         cardinality == PropertyConfigProto::Cardinality::REPEATED) {
953       return absl_ports::InvalidArgumentError(
954           absl_ports::StrCat("Qualified id joinable property '", property_name,
955                              "' cannot have REPEATED cardinality"));
956     }
957   }
958 
959   if (config.delete_propagation_type() !=
960           JoinableConfig::DeletePropagationType::NONE &&
961       config.value_type() != JoinableConfig::ValueType::QUALIFIED_ID) {
962     return absl_ports::InvalidArgumentError(
963         absl_ports::StrCat("Field 'property_name' '", property_name,
964                            "' is required to have QUALIFIED_ID joinable "
965                            "value type with delete propagation enabled"));
966   }
967 
968   return libtextclassifier3::Status::OK;
969 }
970 
ValidateDocumentIndexingConfig(const DocumentIndexingConfig & config,std::string_view schema_type,std::string_view property_name)971 libtextclassifier3::Status SchemaUtil::ValidateDocumentIndexingConfig(
972     const DocumentIndexingConfig& config, std::string_view schema_type,
973     std::string_view property_name) {
974   if (!config.indexable_nested_properties_list().empty() &&
975       config.index_nested_properties()) {
976     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
977         "DocumentIndexingConfig.index_nested_properties is required to be "
978         "false when providing a non-empty indexable_nested_properties_list "
979         "for property '",
980         schema_type, ".", property_name, "'"));
981   }
982   return libtextclassifier3::Status::OK;
983 }
984 
IsIndexedProperty(const PropertyConfigProto & property_config)985 /* static */ bool SchemaUtil::IsIndexedProperty(
986     const PropertyConfigProto& property_config) {
987   switch (property_config.data_type()) {
988     case PropertyConfigProto::DataType::STRING:
989       return property_config.string_indexing_config().term_match_type() !=
990                  TermMatchType::UNKNOWN &&
991              property_config.string_indexing_config().tokenizer_type() !=
992                  StringIndexingConfig::TokenizerType::NONE;
993     case PropertyConfigProto::DataType::INT64:
994       return property_config.integer_indexing_config().numeric_match_type() !=
995              IntegerIndexingConfig::NumericMatchType::UNKNOWN;
996     case PropertyConfigProto::DataType::DOCUMENT:
997       // A document property is considered indexed if it has
998       // index_nested_properties=true, or a non-empty
999       // indexable_nested_properties_list.
1000       return property_config.document_indexing_config()
1001                  .index_nested_properties() ||
1002              !property_config.document_indexing_config()
1003                   .indexable_nested_properties_list()
1004                   .empty();
1005     case PropertyConfigProto::DataType::VECTOR:
1006       return property_config.embedding_indexing_config()
1007                  .embedding_indexing_type() !=
1008              EmbeddingIndexingConfig::EmbeddingIndexingType::UNKNOWN;
1009     case PropertyConfigProto::DataType::UNKNOWN:
1010     case PropertyConfigProto::DataType::DOUBLE:
1011     case PropertyConfigProto::DataType::BOOLEAN:
1012     case PropertyConfigProto::DataType::BYTES:
1013     case PropertyConfigProto::DataType::BLOB_HANDLE:
1014       return false;
1015   }
1016 }
1017 
IsParent(const SchemaUtil::InheritanceMap & inheritance_map,std::string_view parent_type,std::string_view child_type)1018 bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
1019                           std::string_view parent_type,
1020                           std::string_view child_type) {
1021   auto iter = inheritance_map.find(parent_type);
1022   if (iter == inheritance_map.end()) {
1023     return false;
1024   }
1025   return iter->second.count(child_type) > 0;
1026 }
1027 
IsInheritedPropertyCompatible(const SchemaUtil::InheritanceMap & inheritance_map,const PropertyConfigProto & child_property_config,const PropertyConfigProto & parent_property_config)1028 bool SchemaUtil::IsInheritedPropertyCompatible(
1029     const SchemaUtil::InheritanceMap& inheritance_map,
1030     const PropertyConfigProto& child_property_config,
1031     const PropertyConfigProto& parent_property_config) {
1032   // Check if child_property_config->cardinality() <=
1033   // parent_property_config->cardinality().
1034   // Subtype may require a stricter cardinality, but cannot loosen cardinality
1035   // requirements.
1036   if (!CardinalityLessThanEq(child_property_config.cardinality(),
1037                              parent_property_config.cardinality())) {
1038     return false;
1039   }
1040 
1041   // Now we can assume T1 and T2 are not nullptr, and cardinality check passes.
1042   if (child_property_config.data_type() !=
1043           PropertyConfigProto::DataType::DOCUMENT ||
1044       parent_property_config.data_type() !=
1045           PropertyConfigProto::DataType::DOCUMENT) {
1046     return child_property_config.data_type() ==
1047            parent_property_config.data_type();
1048   }
1049 
1050   // Now we can assume T1 and T2 are both document type.
1051   return child_property_config.schema_type() ==
1052              parent_property_config.schema_type() ||
1053          IsParent(inheritance_map, parent_property_config.schema_type(),
1054                   child_property_config.schema_type());
1055 }
1056 
ValidateInheritedProperties(const SchemaProto & schema)1057 libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties(
1058     const SchemaProto& schema) {
1059   // Create a inheritance map
1060   ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map,
1061                          BuildTransitiveInheritanceGraph(schema));
1062 
1063   // Create a map that maps from type name to property names, and then from
1064   // property names to PropertyConfigProto.
1065   std::unordered_map<
1066       std::string, std::unordered_map<std::string, const PropertyConfigProto*>>
1067       property_map;
1068   for (const SchemaTypeConfigProto& type_config : schema.types()) {
1069     // Skipping building entries for types without any child or parent, since
1070     // such entry will never be used.
1071     if (type_config.parent_types().empty() &&
1072         inheritance_map.count(type_config.schema_type()) == 0) {
1073       continue;
1074     }
1075     auto& curr_property_map = property_map[type_config.schema_type()];
1076     for (const PropertyConfigProto& property_config :
1077          type_config.properties()) {
1078       curr_property_map[property_config.property_name()] = &property_config;
1079     }
1080   }
1081 
1082   // Validate child properties.
1083   for (const SchemaTypeConfigProto& type_config : schema.types()) {
1084     const std::string& child_type_name = type_config.schema_type();
1085     auto& child_property_map = property_map[child_type_name];
1086 
1087     for (const std::string& parent_type_name : type_config.parent_types()) {
1088       auto& parent_property_map = property_map[parent_type_name];
1089 
1090       for (const auto& [property_name, parent_property_config] :
1091            parent_property_map) {
1092         auto child_property_iter = child_property_map.find(property_name);
1093         if (child_property_iter == child_property_map.end()) {
1094           return absl_ports::InvalidArgumentError(absl_ports::StrCat(
1095               "Property ", property_name, " is not present in child type ",
1096               child_type_name, ", but it is defined in the parent type ",
1097               parent_type_name, "."));
1098         }
1099         if (!IsInheritedPropertyCompatible(inheritance_map,
1100                                            *child_property_iter->second,
1101                                            *parent_property_config)) {
1102           return absl_ports::InvalidArgumentError(absl_ports::StrCat(
1103               "Property ", property_name, " from child type ", child_type_name,
1104               " is not compatible to the parent type ", parent_type_name, "."));
1105         }
1106       }
1107     }
1108   }
1109   return libtextclassifier3::Status::OK;
1110 }
1111 
BuildTypeConfigMap(const SchemaProto & schema,SchemaUtil::TypeConfigMap * type_config_map)1112 void SchemaUtil::BuildTypeConfigMap(
1113     const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
1114   type_config_map->clear();
1115   for (const SchemaTypeConfigProto& type_config : schema.types()) {
1116     type_config_map->emplace(type_config.schema_type(), type_config);
1117   }
1118 }
1119 
ParsePropertyConfigs(const SchemaTypeConfigProto & type_config)1120 SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
1121     const SchemaTypeConfigProto& type_config) {
1122   ParsedPropertyConfigs parsed_property_configs;
1123 
1124   // TODO(cassiewang): consider caching property_config_map for some properties,
1125   // e.g. using LRU cache. Or changing schema.proto to use go/protomap.
1126   for (const PropertyConfigProto& property_config : type_config.properties()) {
1127     std::string_view property_name = property_config.property_name();
1128     parsed_property_configs.property_config_map.emplace(property_name,
1129                                                         &property_config);
1130     if (property_config.cardinality() ==
1131         PropertyConfigProto::Cardinality::REQUIRED) {
1132       parsed_property_configs.required_properties.insert(property_name);
1133     }
1134 
1135     // A non-default term_match_type indicates that this property is meant to be
1136     // indexed.
1137     if (IsIndexedProperty(property_config)) {
1138       parsed_property_configs.indexed_properties.insert(property_name);
1139     }
1140 
1141     // A non-default value_type indicates that this property is meant to be
1142     // joinable.
1143     if (property_config.joinable_config().value_type() !=
1144         JoinableConfig::ValueType::NONE) {
1145       parsed_property_configs.joinable_properties.insert(property_name);
1146     }
1147 
1148     // Also keep track of how many nested document properties there are. Adding
1149     // new nested document properties will result in join-index rebuild.
1150     if (property_config.data_type() ==
1151         PropertyConfigProto::DataType::DOCUMENT) {
1152       parsed_property_configs.nested_document_properties.insert(property_name);
1153     }
1154   }
1155 
1156   return parsed_property_configs;
1157 }
1158 
ComputeCompatibilityDelta(const SchemaProto & old_schema,const SchemaProto & new_schema,const DependentMap & new_schema_dependent_map,const FeatureFlags & feature_flags)1159 const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
1160     const SchemaProto& old_schema, const SchemaProto& new_schema,
1161     const DependentMap& new_schema_dependent_map,
1162     const FeatureFlags& feature_flags) {
1163   SchemaDelta schema_delta;
1164 
1165   TypeConfigMap old_type_config_map, new_type_config_map;
1166   BuildTypeConfigMap(old_schema, &old_type_config_map);
1167   BuildTypeConfigMap(new_schema, &new_type_config_map);
1168 
1169   if (feature_flags.enable_scorable_properties()) {
1170     FindScorablePropertyInconsistentTypes(
1171         old_type_config_map, new_type_config_map, new_schema_dependent_map,
1172         &schema_delta);
1173   }
1174 
1175   // Iterate through and check each field of the old schema
1176   for (const auto& old_type_config : old_schema.types()) {
1177     auto new_schema_type_and_config =
1178         new_type_config_map.find(old_type_config.schema_type());
1179 
1180     if (new_schema_type_and_config == new_type_config_map.end()) {
1181       // Didn't find the old schema type in the new schema, all the old
1182       // documents of this schema type are invalid without the schema
1183       ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type '",
1184                                           old_type_config.schema_type(),
1185                                           "' was not defined in new schema");
1186       schema_delta.schema_types_deleted.insert(old_type_config.schema_type());
1187       continue;
1188     }
1189 
1190     ParsedPropertyConfigs new_parsed_property_configs =
1191         ParsePropertyConfigs(new_schema_type_and_config->second);
1192 
1193     // We only need to check the old, existing properties to see if they're
1194     // compatible since we'll have old data that may be invalidated or need to
1195     // be reindexed.
1196     std::unordered_set<std::string_view> old_required_properties;
1197     std::unordered_set<std::string_view> old_indexed_properties;
1198     std::unordered_set<std::string_view> old_joinable_properties;
1199     std::unordered_set<std::string_view> old_nested_document_properties;
1200 
1201     // If there is a different number of properties, then there must have been a
1202     // change.
1203     bool has_property_changed =
1204         old_type_config.properties_size() !=
1205         new_schema_type_and_config->second.properties_size();
1206     bool is_incompatible = false;
1207     bool is_index_incompatible = false;
1208     bool is_join_incompatible = false;
1209     for (const auto& old_property_config : old_type_config.properties()) {
1210       std::string_view property_name = old_property_config.property_name();
1211       if (old_property_config.cardinality() ==
1212           PropertyConfigProto::Cardinality::REQUIRED) {
1213         old_required_properties.insert(property_name);
1214       }
1215 
1216       // A non-default term_match_type indicates that this property is meant to
1217       // be indexed.
1218       bool is_indexed_property = IsIndexedProperty(old_property_config);
1219       if (is_indexed_property) {
1220         old_indexed_properties.insert(property_name);
1221       }
1222 
1223       bool is_joinable_property =
1224           old_property_config.joinable_config().value_type() !=
1225           JoinableConfig::ValueType::NONE;
1226       if (is_joinable_property) {
1227         old_joinable_properties.insert(property_name);
1228       }
1229 
1230       // A nested-document property is a property of DataType::DOCUMENT.
1231       bool is_nested_document_property =
1232           old_property_config.data_type() ==
1233           PropertyConfigProto::DataType::DOCUMENT;
1234       if (is_nested_document_property) {
1235         old_nested_document_properties.insert(property_name);
1236       }
1237 
1238       auto new_property_name_and_config =
1239           new_parsed_property_configs.property_config_map.find(
1240               old_property_config.property_name());
1241 
1242       if (new_property_name_and_config ==
1243           new_parsed_property_configs.property_config_map.end()) {
1244         // Didn't find the old property
1245         ICING_VLOG(1) << absl_ports::StrCat(
1246             "Previously defined property type '", old_type_config.schema_type(),
1247             ".", old_property_config.property_name(),
1248             "' was not defined in new schema");
1249         is_incompatible = true;
1250         is_index_incompatible |= is_indexed_property;
1251         is_join_incompatible |=
1252             is_joinable_property || is_nested_document_property;
1253         continue;
1254       }
1255 
1256       const PropertyConfigProto* new_property_config =
1257           new_property_name_and_config->second;
1258       if (!has_property_changed &&
1259           !ArePropertiesEqual(old_property_config, *new_property_config)) {
1260         // Finally found a property that changed.
1261         has_property_changed = true;
1262       }
1263 
1264       if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
1265         ICING_VLOG(1) << absl_ports::StrCat(
1266             "Property '", old_type_config.schema_type(), ".",
1267             old_property_config.property_name(), "' is incompatible.");
1268         is_incompatible = true;
1269       }
1270 
1271       // Any change in the indexed property requires a reindexing
1272       if (!IsTermMatchTypeCompatible(
1273               old_property_config.string_indexing_config(),
1274               new_property_config->string_indexing_config()) ||
1275           !IsIntegerNumericMatchTypeCompatible(
1276               old_property_config.integer_indexing_config(),
1277               new_property_config->integer_indexing_config()) ||
1278           !IsDocumentIndexingCompatible(
1279               old_property_config.document_indexing_config(),
1280               new_property_config->document_indexing_config()) ||
1281           !IsEmbeddingIndexingCompatible(
1282               old_property_config.embedding_indexing_config(),
1283               new_property_config->embedding_indexing_config())) {
1284         is_index_incompatible = true;
1285       }
1286 
1287       if (old_property_config.joinable_config().value_type() !=
1288           new_property_config->joinable_config().value_type()) {
1289         is_join_incompatible = true;
1290       }
1291     }
1292 
1293     // We can't have new properties that are REQUIRED since we won't know how
1294     // to backfill the data, and the existing data will be invalid. We're
1295     // guaranteed from our previous checks that all the old properties are also
1296     // present in the new property config, so we can do a simple int comparison
1297     // here to detect new required properties.
1298     if (!IsSubset(new_parsed_property_configs.required_properties,
1299                   old_required_properties)) {
1300       ICING_VLOG(1) << absl_ports::StrCat(
1301           "New schema '", old_type_config.schema_type(),
1302           "' has REQUIRED properties that are not "
1303           "present in the previously defined schema");
1304       is_incompatible = true;
1305     }
1306 
1307     // If we've gained any new indexed properties (this includes gaining new
1308     // indexed nested document properties), then the section ids may change.
1309     // Since the section ids are stored in the index, we'll need to
1310     // reindex everything.
1311     if (!IsSubset(new_parsed_property_configs.indexed_properties,
1312                   old_indexed_properties)) {
1313       ICING_VLOG(1) << "Set of indexed properties in schema type '"
1314                     << old_type_config.schema_type()
1315                     << "' has changed, required reindexing.";
1316       is_index_incompatible = true;
1317     }
1318 
1319     // If we've gained any new joinable properties, then the joinable property
1320     // ids may change. Since the joinable property ids are stored in the cache,
1321     // we'll need to reconstruct join index.
1322     // If we've gained any new nested document properties, we also rebuild the
1323     // join index. This is because we index all nested joinable properties, so
1324     // adding a nested document property will most probably result in having
1325     // more joinable properties.
1326     if (!IsSubset(new_parsed_property_configs.joinable_properties,
1327                   old_joinable_properties) ||
1328         !IsSubset(new_parsed_property_configs.nested_document_properties,
1329                   old_nested_document_properties)) {
1330       ICING_VLOG(1) << "Set of joinable properties in schema type '"
1331                     << old_type_config.schema_type()
1332                     << "' has changed, required reconstructing joinable cache.";
1333       is_join_incompatible = true;
1334     }
1335 
1336     if (is_incompatible) {
1337       AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
1338                                    old_type_config, new_schema_dependent_map,
1339                                    old_type_config_map, new_type_config_map);
1340     }
1341 
1342     if (is_index_incompatible) {
1343       AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
1344                                    old_type_config, new_schema_dependent_map,
1345                                    old_type_config_map, new_type_config_map);
1346     }
1347 
1348     if (is_join_incompatible) {
1349       AddIncompatibleChangeToDelta(schema_delta.schema_types_join_incompatible,
1350                                    old_type_config, new_schema_dependent_map,
1351                                    old_type_config_map, new_type_config_map);
1352     }
1353 
1354     if (!is_incompatible && !is_index_incompatible && !is_join_incompatible &&
1355         has_property_changed) {
1356       schema_delta.schema_types_changed_fully_compatible.insert(
1357           old_type_config.schema_type());
1358     }
1359 
1360     // Lastly, remove this type from the map. We know that this type can't
1361     // come up in future iterations through the old schema types because the old
1362     // type config has unique types.
1363     new_type_config_map.erase(old_type_config.schema_type());
1364   }
1365 
1366   // Any types that are still present in the new_type_config_map are newly added
1367   // types.
1368   schema_delta.schema_types_new.reserve(new_type_config_map.size());
1369   for (auto& kvp : new_type_config_map) {
1370     schema_delta.schema_types_new.insert(std::move(kvp.first));
1371   }
1372 
1373   return schema_delta;
1374 }
1375 
1376 }  // namespace lib
1377 }  // namespace icing
1378