xref: /aosp_15_r20/external/icing/icing/schema/section-manager.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/schema/section-manager.h"
16 
17 #include <algorithm>
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/legacy/core/icing-string-util.h"
28 #include "icing/proto/document.pb.h"
29 #include "icing/proto/schema.pb.h"
30 #include "icing/proto/term.pb.h"
31 #include "icing/schema/property-util.h"
32 #include "icing/schema/section.h"
33 #include "icing/store/document-filter-data.h"
34 #include "icing/store/key-mapper.h"
35 #include "icing/util/status-macros.h"
36 
37 namespace icing {
38 namespace lib {
39 
40 namespace {
41 
42 // Helper function to append a new section metadata
AppendNewSectionMetadata(std::vector<SectionMetadata> * metadata_list,std::string && concatenated_path,const PropertyConfigProto & property_config)43 libtextclassifier3::Status AppendNewSectionMetadata(
44     std::vector<SectionMetadata>* metadata_list,
45     std::string&& concatenated_path,
46     const PropertyConfigProto& property_config) {
47   // Validates next section id, makes sure that section id is the same as the
48   // list index so that we could find any section metadata by id in O(1) later.
49   SectionId new_section_id = static_cast<SectionId>(metadata_list->size());
50   if (!IsSectionIdValid(new_section_id)) {
51     // Max number of sections reached
52     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
53         "Too many properties to be indexed, max number of properties "
54         "allowed: %d",
55         kMaxSectionId - kMinSectionId + 1));
56   }
57 
58   // Creates section metadata
59   metadata_list->push_back(SectionMetadata(
60       new_section_id, property_config.data_type(),
61       property_config.string_indexing_config().tokenizer_type(),
62       property_config.string_indexing_config().term_match_type(),
63       property_config.integer_indexing_config().numeric_match_type(),
64       property_config.embedding_indexing_config().embedding_indexing_type(),
65       property_config.embedding_indexing_config().quantization_type(),
66       std::move(concatenated_path)));
67   return libtextclassifier3::Status::OK;
68 }
69 
70 template <typename T>
AppendSection(SectionMetadata section_metadata,libtextclassifier3::StatusOr<std::vector<T>> && section_content_or,std::vector<Section<T>> & sections_out)71 void AppendSection(
72     SectionMetadata section_metadata,
73     libtextclassifier3::StatusOr<std::vector<T>>&& section_content_or,
74     std::vector<Section<T>>& sections_out) {
75   if (!section_content_or.ok()) {
76     return;
77   }
78 
79   std::vector<T> section_content = std::move(section_content_or).ValueOrDie();
80   if (!section_content.empty()) {
81     // Adds to result vector if section is found in document
82     sections_out.emplace_back(std::move(section_metadata),
83                               std::move(section_content));
84   }
85 }
86 
87 }  // namespace
88 
89 libtextclassifier3::Status
ProcessSchemaTypePropertyConfig(SchemaTypeId schema_type_id,const PropertyConfigProto & property_config,std::string && property_path)90 SectionManager::Builder::ProcessSchemaTypePropertyConfig(
91     SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
92     std::string&& property_path) {
93   if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
94     return absl_ports::InvalidArgumentError("Invalid schema type id");
95   }
96 
97   // We don't need to check if the property is indexable. This method will
98   // only be called properties that should consume sectionIds, even if the
99   // property's indexing configuration itself is not indexable.
100   // This would be the case for unknown and non-indexable property paths that
101   // are defined in the indexable_nested_properties_list.
102   ICING_RETURN_IF_ERROR(
103       AppendNewSectionMetadata(&section_metadata_cache_[schema_type_id],
104                                std::move(property_path), property_config));
105   return libtextclassifier3::Status::OK;
106 }
107 
108 libtextclassifier3::StatusOr<const SectionMetadata*>
GetSectionMetadata(SchemaTypeId schema_type_id,SectionId section_id) const109 SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
110                                    SectionId section_id) const {
111   if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
112     return absl_ports::InvalidArgumentError("Invalid schema type id");
113   }
114   if (!IsSectionIdValid(section_id)) {
115     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
116         "Section id %d is greater than the max value %d", section_id,
117         kMaxSectionId));
118   }
119 
120   const std::vector<SectionMetadata>& section_metadatas =
121       section_metadata_cache_[schema_type_id];
122   if (section_id >= section_metadatas.size()) {
123     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
124         "Section with id %d doesn't exist in type config with id %d",
125         section_id, schema_type_id));
126   }
127 
128   // The index of metadata list is the same as the section id, so we can use
129   // section id as the index.
130   return &section_metadatas[section_id];
131 }
132 
ExtractSections(const DocumentProto & document) const133 libtextclassifier3::StatusOr<SectionGroup> SectionManager::ExtractSections(
134     const DocumentProto& document) const {
135   ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
136                          GetMetadataList(document.schema()));
137   SectionGroup section_group;
138   for (const SectionMetadata& section_metadata : *metadata_list) {
139     switch (section_metadata.data_type) {
140       case PropertyConfigProto::DataType::STRING: {
141         if (section_metadata.term_match_type == TermMatchType::UNKNOWN ||
142             section_metadata.tokenizer ==
143                 StringIndexingConfig::TokenizerType::NONE) {
144           // Skip if term-match type is UNKNOWN, or if the tokenizer-type is
145           // NONE.
146           break;
147         }
148         AppendSection(
149             section_metadata,
150             property_util::ExtractPropertyValuesFromDocument<std::string_view>(
151                 document, section_metadata.path),
152             section_group.string_sections);
153         break;
154       }
155       case PropertyConfigProto::DataType::INT64: {
156         if (section_metadata.numeric_match_type ==
157             IntegerIndexingConfig::NumericMatchType::UNKNOWN) {
158           // Skip if numeric-match type is UNKNOWN.
159           break;
160         }
161         AppendSection(section_metadata,
162                       property_util::ExtractPropertyValuesFromDocument<int64_t>(
163                           document, section_metadata.path),
164                       section_group.integer_sections);
165         break;
166       }
167       case PropertyConfigProto::DataType::VECTOR: {
168         if (section_metadata.embedding_indexing_type ==
169             EmbeddingIndexingConfig::EmbeddingIndexingType::UNKNOWN) {
170           // Skip if embedding indexing type is UNKNOWN.
171           break;
172         }
173         AppendSection(
174             section_metadata,
175             property_util::ExtractPropertyValuesFromDocument<
176                 PropertyProto::VectorProto>(document, section_metadata.path),
177             section_group.vector_sections);
178         break;
179       }
180       default: {
181         // Skip other data types.
182         break;
183       }
184     }
185   }
186   return section_group;
187 }
188 
189 libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
GetMetadataList(const std::string & type_config_name) const190 SectionManager::GetMetadataList(const std::string& type_config_name) const {
191   ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
192                          schema_type_mapper_.Get(type_config_name));
193   return &section_metadata_cache_.at(schema_type_id);
194 }
195 
196 }  // namespace lib
197 }  // namespace icing
198