xref: /aosp_15_r20/external/icing/icing/index/iterator/section-restrict-data.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
16 #define ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
17 
18 #include <cstdint>
19 #include <set>
20 #include <string>
21 #include <unordered_map>
22 #include <utility>
23 #include <vector>
24 
25 #include "icing/index/iterator/doc-hit-info-iterator.h"
26 #include "icing/schema/schema-store.h"
27 #include "icing/schema/section.h"
28 #include "icing/store/document-id.h"
29 #include "icing/store/document-store.h"
30 
31 namespace icing {
32 namespace lib {
33 
34 class SectionRestrictData {
35  public:
36   // Does not take any ownership, and all pointers must refer to valid objects
37   // that outlive the one constructed.
SectionRestrictData(const DocumentStore * document_store,const SchemaStore * schema_store,int64_t current_time_ms,std::unordered_map<std::string,std::set<std::string>> type_property_filters)38   SectionRestrictData(const DocumentStore* document_store,
39                       const SchemaStore* schema_store, int64_t current_time_ms,
40                       std::unordered_map<std::string, std::set<std::string>>
41                           type_property_filters)
42       : document_store_(*document_store),
43         schema_store_(*schema_store),
44         current_time_ms_(current_time_ms),
45         type_property_filters_(std::move(type_property_filters)) {}
46 
47   // Calculates the section mask of allowed sections(determined by the
48   // property filters map) for the given schema type and caches the same for any
49   // future calls.
50   //
51   // Returns:
52   //  - If type_property_filters_ has an entry for the given schema type or
53   //    wildcard(*), return a bitwise or of section IDs in the schema type
54   //    that are also present in the relevant filter list.
55   //  - Otherwise, return kSectionIdMaskAll.
56   SectionIdMask ComputeAllowedSectionsMask(const std::string& schema_type);
57 
58   // Calculates the section mask of allowed sections(determined by the
59   // property filters map) for the given document id, by retrieving its schema
60   // type name and calling the above method.
61   //
62   // Returns:
63   //  - If type_property_filters_ has an entry for the given document's schema
64   //    type or wildcard(*), return a bitwise or of section IDs in the schema
65   //    type that are also present in the relevant filter list.
66   //  - Otherwise, return kSectionIdMaskAll.
67   SectionIdMask ComputeAllowedSectionsMask(DocumentId document_id);
68 
document_store()69   const DocumentStore& document_store() const { return document_store_; }
70 
schema_store()71   const SchemaStore& schema_store() const { return schema_store_; }
72 
current_time_ms()73   int64_t current_time_ms() const { return current_time_ms_; }
74 
75   const std::unordered_map<std::string, std::set<std::string>>&
type_property_filters()76   type_property_filters() const {
77     return type_property_filters_;
78   }
79 
80  private:
81   const DocumentStore& document_store_;
82   const SchemaStore& schema_store_;
83   int64_t current_time_ms_;
84 
85   // Map of property filters per schema type. Supports wildcard(*) for schema
86   // type that will apply to all schema types that are not specifically
87   // specified in the mapping otherwise.
88   std::unordered_map<std::string, std::set<std::string>> type_property_filters_;
89   // Mapping of schema type to the section mask of allowed sections for that
90   // schema type. This section mask is lazily calculated based on the
91   // specified property filters and cached for any future use.
92   std::unordered_map<std::string, SectionIdMask> type_property_masks_;
93 
94   // Generates a section mask for the given schema type and the target
95   // sections.
96   //
97   // Returns:
98   //  - A bitwise or of section IDs in the schema_type that that are also
99   //    present in the target_sections list.
100   //  - If none of the sections in the schema_type are present in the
101   //    target_sections list, return kSectionIdMaskNone.
102   // This is done by doing a bitwise or of the target section ids for the
103   // given schema type.
104   SectionIdMask GenerateSectionMask(
105       const std::string& schema_type,
106       const std::set<std::string>& target_sections) const;
107 };
108 
109 // Indicate that the iterator can internally handle the section restriction
110 // logic by itself.
111 //
112 // This is helpful when some iterators want to have better control for
113 // optimization. For example, embedding iterator will be able to filter out
114 // embedding hits from unwanted sections to avoid retrieving unnecessary vectors
115 // and calculate scores for them.
116 class DocHitInfoIteratorHandlingSectionRestrict
117     : public DocHitInfoLeafIterator {
118  protected:
HandleSectionRestriction(SectionRestrictData * other_data)119   bool HandleSectionRestriction(SectionRestrictData* other_data) override {
120     section_restrict_data_.push_back(other_data);
121     return true;
122   }
123 
ComputeAllowedSectionsMask(DocumentId document_id)124   SectionIdMask ComputeAllowedSectionsMask(DocumentId document_id) {
125     SectionIdMask result = kSectionIdMaskAll;
126     for (SectionRestrictData* section_restrict_data : section_restrict_data_) {
127       result &= section_restrict_data->ComputeAllowedSectionsMask(document_id);
128     }
129     return result;
130   }
131 
132   // Does not own the pointers.
133   std::vector<SectionRestrictData*> section_restrict_data_;
134 };
135 
136 }  // namespace lib
137 }  // namespace icing
138 
139 #endif  // ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_
140