xref: /aosp_15_r20/external/icing/icing/schema/section-manager.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_SECTION_MANAGER_H_
16 #define ICING_SCHEMA_SECTION_MANAGER_H_
17 
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/proto/document.pb.h"
25 #include "icing/schema/section.h"
26 #include "icing/store/document-filter-data.h"
27 #include "icing/store/key-mapper.h"
28 
29 namespace icing {
30 namespace lib {
31 
32 // This class provides section-related operations. It assigns sections according
33 // to type configs and extracts section / sections from documents.
34 // The actual instance is created together with JoinablePropertyManager and both
35 // of them are wrapped into SchemaTypeManager.
36 //
37 // Note: SectionManager assumes schema type ids are consecutive integers
38 // starting from 0, so it maintains a vector with size
39 // schema_type_mapper_->num_keys() that maps schema type id to a list (2nd level
40 // vector) of SectionMetadatas. Therefore, all schema type ids stored in
41 // schema_type_mapper_ must be in range [0, schema_type_mapper_->num_keys() - 1]
42 // and unique.
43 class SectionManager {
44  public:
45   // Builder class to create a SectionManager which does not take ownership of
46   // any input components, and all pointers must refer to valid objects that
47   // outlive the created SectionManager instance.
48   class Builder {
49    public:
Builder(const KeyMapper<SchemaTypeId> & schema_type_mapper)50     explicit Builder(const KeyMapper<SchemaTypeId>& schema_type_mapper)
51         : schema_type_mapper_(schema_type_mapper),
52           section_metadata_cache_(schema_type_mapper.num_keys()) {}
53 
54     // Checks and appends a new SectionMetadata for the schema type id if the
55     // given property config is indexable.
56     //
57     // Returns:
58     //   - OK on success
59     //   - INVALID_ARGUMENT_ERROR if schema type id is invalid (not in range [0,
60     //     schema_type_mapper_.num_keys() - 1])
61     //   - OUT_OF_RANGE_ERROR if # of indexable properties in a single Schema
62     //     exceeds the threshold (kTotalNumSections)
63     libtextclassifier3::Status ProcessSchemaTypePropertyConfig(
64         SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
65         std::string&& property_path);
66 
67     // Builds and returns a SectionManager instance.
Build()68     std::unique_ptr<SectionManager> Build() && {
69       return std::unique_ptr<SectionManager>(new SectionManager(
70           schema_type_mapper_, std::move(section_metadata_cache_)));
71     }
72 
73    private:
74     const KeyMapper<SchemaTypeId>& schema_type_mapper_;  // Does not own.
75     std::vector<std::vector<SectionMetadata>> section_metadata_cache_;
76   };
77 
78   SectionManager(const SectionManager&) = delete;
79   SectionManager& operator=(const SectionManager&) = delete;
80 
81   // Returns the SectionMetadata associated with the SectionId that's in the
82   // SchemaTypeId.
83   //
84   // Returns:
85   //   pointer to SectionMetadata on success
86   //   INVALID_ARGUMENT if schema type id or section is invalid
87   libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
88       SchemaTypeId schema_type_id, SectionId section_id) const;
89 
90   // Extracts all sections of different types from the given document and group
91   // them by type.
92   // - Sections are sorted by section id in ascending order.
93   // - Section ids start from 0.
94   // - Sections with empty content won't be returned.
95   //
96   // Returns:
97   //   A SectionGroup instance on success
98   //   NOT_FOUND if the type config name of document is not present in
99   //     schema_type_mapper_
100   libtextclassifier3::StatusOr<SectionGroup> ExtractSections(
101       const DocumentProto& document) const;
102 
103   // Returns:
104   //   - On success, the section metadatas for the specified type
105   //   - NOT_FOUND if the type config name is not present in schema_type_mapper_
106   libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
107   GetMetadataList(const std::string& type_config_name) const;
108 
109  private:
SectionManager(const KeyMapper<SchemaTypeId> & schema_type_mapper,std::vector<std::vector<SectionMetadata>> && section_metadata_cache)110   explicit SectionManager(
111       const KeyMapper<SchemaTypeId>& schema_type_mapper,
112       std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
113       : schema_type_mapper_(schema_type_mapper),
114         section_metadata_cache_(std::move(section_metadata_cache)) {}
115 
116   // Maps schema types to a densely-assigned unique id.
117   const KeyMapper<SchemaTypeId>& schema_type_mapper_;  // Does not own
118 
119   // The index of section_metadata_cache_ corresponds to a schema type's
120   // SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The
121   // inner vector's index corresponds to a section's SectionId. At the SectionId
122   // index, we store the SectionMetadata of that section.
123   //
124   // For example, pretend "email" had a SchemaTypeId of 0 and it had a section
125   // called "subject" with a SectionId of 1. Then there would exist a vector
126   // that holds the "subject" property's SectionMetadata at index 1. This vector
127   // would be stored at index 0 of the section_metadata_cache_ vector.
128   const std::vector<std::vector<SectionMetadata>> section_metadata_cache_;
129 };
130 
131 }  // namespace lib
132 }  // namespace icing
133 
134 #endif  // ICING_SCHEMA_SECTION_MANAGER_H_
135