1 // Copyright (C) 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_ 16 #define ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_ 17 18 #include <cstdint> 19 #include <set> 20 #include <string> 21 #include <unordered_map> 22 #include <utility> 23 #include <vector> 24 25 #include "icing/index/iterator/doc-hit-info-iterator.h" 26 #include "icing/schema/schema-store.h" 27 #include "icing/schema/section.h" 28 #include "icing/store/document-id.h" 29 #include "icing/store/document-store.h" 30 31 namespace icing { 32 namespace lib { 33 34 class SectionRestrictData { 35 public: 36 // Does not take any ownership, and all pointers must refer to valid objects 37 // that outlive the one constructed. SectionRestrictData(const DocumentStore * document_store,const SchemaStore * schema_store,int64_t current_time_ms,std::unordered_map<std::string,std::set<std::string>> type_property_filters)38 SectionRestrictData(const DocumentStore* document_store, 39 const SchemaStore* schema_store, int64_t current_time_ms, 40 std::unordered_map<std::string, std::set<std::string>> 41 type_property_filters) 42 : document_store_(*document_store), 43 schema_store_(*schema_store), 44 current_time_ms_(current_time_ms), 45 type_property_filters_(std::move(type_property_filters)) {} 46 47 // Calculates the section mask of allowed sections(determined by the 48 // property filters map) for the given schema type and caches the same for any 49 // future calls. 50 // 51 // Returns: 52 // - If type_property_filters_ has an entry for the given schema type or 53 // wildcard(*), return a bitwise or of section IDs in the schema type 54 // that are also present in the relevant filter list. 55 // - Otherwise, return kSectionIdMaskAll. 56 SectionIdMask ComputeAllowedSectionsMask(const std::string& schema_type); 57 58 // Calculates the section mask of allowed sections(determined by the 59 // property filters map) for the given document id, by retrieving its schema 60 // type name and calling the above method. 61 // 62 // Returns: 63 // - If type_property_filters_ has an entry for the given document's schema 64 // type or wildcard(*), return a bitwise or of section IDs in the schema 65 // type that are also present in the relevant filter list. 66 // - Otherwise, return kSectionIdMaskAll. 67 SectionIdMask ComputeAllowedSectionsMask(DocumentId document_id); 68 document_store()69 const DocumentStore& document_store() const { return document_store_; } 70 schema_store()71 const SchemaStore& schema_store() const { return schema_store_; } 72 current_time_ms()73 int64_t current_time_ms() const { return current_time_ms_; } 74 75 const std::unordered_map<std::string, std::set<std::string>>& type_property_filters()76 type_property_filters() const { 77 return type_property_filters_; 78 } 79 80 private: 81 const DocumentStore& document_store_; 82 const SchemaStore& schema_store_; 83 int64_t current_time_ms_; 84 85 // Map of property filters per schema type. Supports wildcard(*) for schema 86 // type that will apply to all schema types that are not specifically 87 // specified in the mapping otherwise. 88 std::unordered_map<std::string, std::set<std::string>> type_property_filters_; 89 // Mapping of schema type to the section mask of allowed sections for that 90 // schema type. This section mask is lazily calculated based on the 91 // specified property filters and cached for any future use. 92 std::unordered_map<std::string, SectionIdMask> type_property_masks_; 93 94 // Generates a section mask for the given schema type and the target 95 // sections. 96 // 97 // Returns: 98 // - A bitwise or of section IDs in the schema_type that that are also 99 // present in the target_sections list. 100 // - If none of the sections in the schema_type are present in the 101 // target_sections list, return kSectionIdMaskNone. 102 // This is done by doing a bitwise or of the target section ids for the 103 // given schema type. 104 SectionIdMask GenerateSectionMask( 105 const std::string& schema_type, 106 const std::set<std::string>& target_sections) const; 107 }; 108 109 // Indicate that the iterator can internally handle the section restriction 110 // logic by itself. 111 // 112 // This is helpful when some iterators want to have better control for 113 // optimization. For example, embedding iterator will be able to filter out 114 // embedding hits from unwanted sections to avoid retrieving unnecessary vectors 115 // and calculate scores for them. 116 class DocHitInfoIteratorHandlingSectionRestrict 117 : public DocHitInfoLeafIterator { 118 protected: HandleSectionRestriction(SectionRestrictData * other_data)119 bool HandleSectionRestriction(SectionRestrictData* other_data) override { 120 section_restrict_data_.push_back(other_data); 121 return true; 122 } 123 ComputeAllowedSectionsMask(DocumentId document_id)124 SectionIdMask ComputeAllowedSectionsMask(DocumentId document_id) { 125 SectionIdMask result = kSectionIdMaskAll; 126 for (SectionRestrictData* section_restrict_data : section_restrict_data_) { 127 result &= section_restrict_data->ComputeAllowedSectionsMask(document_id); 128 } 129 return result; 130 } 131 132 // Does not own the pointers. 133 std::vector<SectionRestrictData*> section_restrict_data_; 134 }; 135 136 } // namespace lib 137 } // namespace icing 138 139 #endif // ICING_INDEX_ITERATOR_SECTION_RESTRICT_DATA_H_ 140