1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2019 Google LLC 2*8b6cd535SAndroid Build Coastguard Worker // 3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); 4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License. 5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at 6*8b6cd535SAndroid Build Coastguard Worker // 7*8b6cd535SAndroid Build Coastguard Worker // http://www.apache.org/licenses/LICENSE-2.0 8*8b6cd535SAndroid Build Coastguard Worker // 9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software 10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, 11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and 13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License. 14*8b6cd535SAndroid Build Coastguard Worker 15*8b6cd535SAndroid Build Coastguard Worker #ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_ 16*8b6cd535SAndroid Build Coastguard Worker #define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_ 17*8b6cd535SAndroid Build Coastguard Worker 18*8b6cd535SAndroid Build Coastguard Worker #include <array> 19*8b6cd535SAndroid Build Coastguard Worker #include <string> 20*8b6cd535SAndroid Build Coastguard Worker #include <utility> 21*8b6cd535SAndroid Build Coastguard Worker #include <vector> 22*8b6cd535SAndroid Build Coastguard Worker 23*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/status.h" 24*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/statusor.h" 25*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/hit/doc-hit-info.h" 26*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/hit/hit.h" 27*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/iterator/doc-hit-info-iterator.h" 28*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/lite/lite-index.h" 29*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/term-id-codec.h" 30*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/section.h" 31*8b6cd535SAndroid Build Coastguard Worker 32*8b6cd535SAndroid Build Coastguard Worker namespace icing { 33*8b6cd535SAndroid Build Coastguard Worker namespace lib { 34*8b6cd535SAndroid Build Coastguard Worker 35*8b6cd535SAndroid Build Coastguard Worker class DocHitInfoIteratorTermLite : public DocHitInfoLeafIterator { 36*8b6cd535SAndroid Build Coastguard Worker public: DocHitInfoIteratorTermLite(const TermIdCodec * term_id_codec,LiteIndex * lite_index,const std::string & term,int term_start_index,int unnormalized_term_length,SectionIdMask section_restrict_mask,bool need_hit_term_frequency)37*8b6cd535SAndroid Build Coastguard Worker explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec, 38*8b6cd535SAndroid Build Coastguard Worker LiteIndex* lite_index, 39*8b6cd535SAndroid Build Coastguard Worker const std::string& term, 40*8b6cd535SAndroid Build Coastguard Worker int term_start_index, 41*8b6cd535SAndroid Build Coastguard Worker int unnormalized_term_length, 42*8b6cd535SAndroid Build Coastguard Worker SectionIdMask section_restrict_mask, 43*8b6cd535SAndroid Build Coastguard Worker bool need_hit_term_frequency) 44*8b6cd535SAndroid Build Coastguard Worker : term_(term), 45*8b6cd535SAndroid Build Coastguard Worker term_start_index_(term_start_index), 46*8b6cd535SAndroid Build Coastguard Worker unnormalized_term_length_(unnormalized_term_length), 47*8b6cd535SAndroid Build Coastguard Worker lite_index_(lite_index), 48*8b6cd535SAndroid Build Coastguard Worker cached_hits_idx_(-1), 49*8b6cd535SAndroid Build Coastguard Worker term_id_codec_(term_id_codec), 50*8b6cd535SAndroid Build Coastguard Worker num_advance_calls_(0), 51*8b6cd535SAndroid Build Coastguard Worker section_restrict_mask_(section_restrict_mask), 52*8b6cd535SAndroid Build Coastguard Worker need_hit_term_frequency_(need_hit_term_frequency) {} 53*8b6cd535SAndroid Build Coastguard Worker 54*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status Advance() override; 55*8b6cd535SAndroid Build Coastguard Worker 56*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override; 57*8b6cd535SAndroid Build Coastguard Worker GetCallStats()58*8b6cd535SAndroid Build Coastguard Worker CallStats GetCallStats() const override { 59*8b6cd535SAndroid Build Coastguard Worker return CallStats( 60*8b6cd535SAndroid Build Coastguard Worker /*num_leaf_advance_calls_lite_index_in=*/num_advance_calls_, 61*8b6cd535SAndroid Build Coastguard Worker /*num_leaf_advance_calls_main_index_in=*/0, 62*8b6cd535SAndroid Build Coastguard Worker /*num_leaf_advance_calls_integer_index_in=*/0, 63*8b6cd535SAndroid Build Coastguard Worker /*num_leaf_advance_calls_no_index_in=*/0, 64*8b6cd535SAndroid Build Coastguard Worker /*num_blocks_inspected_in=*/0); 65*8b6cd535SAndroid Build Coastguard Worker } 66*8b6cd535SAndroid Build Coastguard Worker 67*8b6cd535SAndroid Build Coastguard Worker void PopulateMatchedTermsStats( 68*8b6cd535SAndroid Build Coastguard Worker std::vector<TermMatchInfo>* matched_terms_stats, 69*8b6cd535SAndroid Build Coastguard Worker SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { 70*8b6cd535SAndroid Build Coastguard Worker if (cached_hits_idx_ == -1 || cached_hits_idx_ >= cached_hits_.size()) { 71*8b6cd535SAndroid Build Coastguard Worker // Current hit isn't valid, return. 72*8b6cd535SAndroid Build Coastguard Worker return; 73*8b6cd535SAndroid Build Coastguard Worker } 74*8b6cd535SAndroid Build Coastguard Worker SectionIdMask section_mask = 75*8b6cd535SAndroid Build Coastguard Worker doc_hit_info_.hit_section_ids_mask() & filtering_section_mask; 76*8b6cd535SAndroid Build Coastguard Worker SectionIdMask section_mask_copy = section_mask; 77*8b6cd535SAndroid Build Coastguard Worker std::array<Hit::TermFrequency, kTotalNumSections> section_term_frequencies = 78*8b6cd535SAndroid Build Coastguard Worker {Hit::kNoTermFrequency}; 79*8b6cd535SAndroid Build Coastguard Worker while (section_mask_copy) { 80*8b6cd535SAndroid Build Coastguard Worker SectionId section_id = __builtin_ctzll(section_mask_copy); 81*8b6cd535SAndroid Build Coastguard Worker if (need_hit_term_frequency_) { 82*8b6cd535SAndroid Build Coastguard Worker section_term_frequencies.at(section_id) = 83*8b6cd535SAndroid Build Coastguard Worker cached_hit_term_frequency_.at(cached_hits_idx_)[section_id]; 84*8b6cd535SAndroid Build Coastguard Worker } 85*8b6cd535SAndroid Build Coastguard Worker section_mask_copy &= ~(UINT64_C(1) << section_id); 86*8b6cd535SAndroid Build Coastguard Worker } 87*8b6cd535SAndroid Build Coastguard Worker TermMatchInfo term_stats(term_, section_mask, 88*8b6cd535SAndroid Build Coastguard Worker std::move(section_term_frequencies)); 89*8b6cd535SAndroid Build Coastguard Worker 90*8b6cd535SAndroid Build Coastguard Worker for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) { 91*8b6cd535SAndroid Build Coastguard Worker if (cur_term_stats.term == term_stats.term) { 92*8b6cd535SAndroid Build Coastguard Worker // Same docId and same term, we don't need to add the term and the term 93*8b6cd535SAndroid Build Coastguard Worker // frequency should always be the same 94*8b6cd535SAndroid Build Coastguard Worker return; 95*8b6cd535SAndroid Build Coastguard Worker } 96*8b6cd535SAndroid Build Coastguard Worker } 97*8b6cd535SAndroid Build Coastguard Worker matched_terms_stats->push_back(std::move(term_stats)); 98*8b6cd535SAndroid Build Coastguard Worker } 99*8b6cd535SAndroid Build Coastguard Worker 100*8b6cd535SAndroid Build Coastguard Worker protected: 101*8b6cd535SAndroid Build Coastguard Worker // Add DocHitInfos corresponding to term_ to cached_hits_. 102*8b6cd535SAndroid Build Coastguard Worker // 103*8b6cd535SAndroid Build Coastguard Worker // Returns: 104*8b6cd535SAndroid Build Coastguard Worker // - OK, on success 105*8b6cd535SAndroid Build Coastguard Worker // - NOT_FOUND if no term matching term_ was found in the lexicon. 106*8b6cd535SAndroid Build Coastguard Worker // - INVALID_ARGUMENT if unable to properly encode the termid 107*8b6cd535SAndroid Build Coastguard Worker virtual libtextclassifier3::Status RetrieveMoreHits() = 0; 108*8b6cd535SAndroid Build Coastguard Worker 109*8b6cd535SAndroid Build Coastguard Worker const std::string term_; 110*8b6cd535SAndroid Build Coastguard Worker // The start index of the given term in the search query 111*8b6cd535SAndroid Build Coastguard Worker int term_start_index_; 112*8b6cd535SAndroid Build Coastguard Worker // The length of the given unnormalized term in the search query 113*8b6cd535SAndroid Build Coastguard Worker int unnormalized_term_length_; 114*8b6cd535SAndroid Build Coastguard Worker LiteIndex* const lite_index_; 115*8b6cd535SAndroid Build Coastguard Worker // Stores hits retrieved from the index. This may only be a subset of the hits 116*8b6cd535SAndroid Build Coastguard Worker // that are present in the index. Current value pointed to by the Iterator is 117*8b6cd535SAndroid Build Coastguard Worker // tracked by cached_hits_idx_. 118*8b6cd535SAndroid Build Coastguard Worker std::vector<DocHitInfo> cached_hits_; 119*8b6cd535SAndroid Build Coastguard Worker std::vector<Hit::TermFrequencyArray> cached_hit_term_frequency_; 120*8b6cd535SAndroid Build Coastguard Worker int cached_hits_idx_; 121*8b6cd535SAndroid Build Coastguard Worker const TermIdCodec* term_id_codec_; 122*8b6cd535SAndroid Build Coastguard Worker int num_advance_calls_; 123*8b6cd535SAndroid Build Coastguard Worker // Mask indicating which sections hits should be considered for. 124*8b6cd535SAndroid Build Coastguard Worker // Ex. 0000 0000 0000 0010 means that only hits from section 1 are desired. 125*8b6cd535SAndroid Build Coastguard Worker const SectionIdMask section_restrict_mask_; 126*8b6cd535SAndroid Build Coastguard Worker const bool need_hit_term_frequency_; 127*8b6cd535SAndroid Build Coastguard Worker }; 128*8b6cd535SAndroid Build Coastguard Worker 129*8b6cd535SAndroid Build Coastguard Worker class DocHitInfoIteratorTermLiteExact : public DocHitInfoIteratorTermLite { 130*8b6cd535SAndroid Build Coastguard Worker public: DocHitInfoIteratorTermLiteExact(const TermIdCodec * term_id_codec,LiteIndex * lite_index,const std::string & term,int term_start_index,int unnormalized_term_length,SectionIdMask section_id_mask,bool need_hit_term_frequency)131*8b6cd535SAndroid Build Coastguard Worker explicit DocHitInfoIteratorTermLiteExact(const TermIdCodec* term_id_codec, 132*8b6cd535SAndroid Build Coastguard Worker LiteIndex* lite_index, 133*8b6cd535SAndroid Build Coastguard Worker const std::string& term, 134*8b6cd535SAndroid Build Coastguard Worker int term_start_index, 135*8b6cd535SAndroid Build Coastguard Worker int unnormalized_term_length, 136*8b6cd535SAndroid Build Coastguard Worker SectionIdMask section_id_mask, 137*8b6cd535SAndroid Build Coastguard Worker bool need_hit_term_frequency) 138*8b6cd535SAndroid Build Coastguard Worker : DocHitInfoIteratorTermLite(term_id_codec, lite_index, term, 139*8b6cd535SAndroid Build Coastguard Worker term_start_index, unnormalized_term_length, 140*8b6cd535SAndroid Build Coastguard Worker section_id_mask, need_hit_term_frequency) {} 141*8b6cd535SAndroid Build Coastguard Worker 142*8b6cd535SAndroid Build Coastguard Worker std::string ToString() const override; 143*8b6cd535SAndroid Build Coastguard Worker 144*8b6cd535SAndroid Build Coastguard Worker protected: 145*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status RetrieveMoreHits() override; 146*8b6cd535SAndroid Build Coastguard Worker }; 147*8b6cd535SAndroid Build Coastguard Worker 148*8b6cd535SAndroid Build Coastguard Worker class DocHitInfoIteratorTermLitePrefix : public DocHitInfoIteratorTermLite { 149*8b6cd535SAndroid Build Coastguard Worker public: DocHitInfoIteratorTermLitePrefix(const TermIdCodec * term_id_codec,LiteIndex * lite_index,const std::string & term,int term_start_index,int unnormalized_term_length,SectionIdMask section_id_mask,bool need_hit_term_frequency)150*8b6cd535SAndroid Build Coastguard Worker explicit DocHitInfoIteratorTermLitePrefix(const TermIdCodec* term_id_codec, 151*8b6cd535SAndroid Build Coastguard Worker LiteIndex* lite_index, 152*8b6cd535SAndroid Build Coastguard Worker const std::string& term, 153*8b6cd535SAndroid Build Coastguard Worker int term_start_index, 154*8b6cd535SAndroid Build Coastguard Worker int unnormalized_term_length, 155*8b6cd535SAndroid Build Coastguard Worker SectionIdMask section_id_mask, 156*8b6cd535SAndroid Build Coastguard Worker bool need_hit_term_frequency) 157*8b6cd535SAndroid Build Coastguard Worker : DocHitInfoIteratorTermLite(term_id_codec, lite_index, term, 158*8b6cd535SAndroid Build Coastguard Worker term_start_index, unnormalized_term_length, 159*8b6cd535SAndroid Build Coastguard Worker section_id_mask, need_hit_term_frequency) {} 160*8b6cd535SAndroid Build Coastguard Worker 161*8b6cd535SAndroid Build Coastguard Worker std::string ToString() const override; 162*8b6cd535SAndroid Build Coastguard Worker 163*8b6cd535SAndroid Build Coastguard Worker protected: 164*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status RetrieveMoreHits() override; 165*8b6cd535SAndroid Build Coastguard Worker 166*8b6cd535SAndroid Build Coastguard Worker private: 167*8b6cd535SAndroid Build Coastguard Worker // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and 168*8b6cd535SAndroid Build Coastguard Worker // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be 169*8b6cd535SAndroid Build Coastguard Worker // merged. 170*8b6cd535SAndroid Build Coastguard Worker void SortDocumentIds(); 171*8b6cd535SAndroid Build Coastguard Worker void SortAndDedupeDocumentIds(); 172*8b6cd535SAndroid Build Coastguard Worker }; 173*8b6cd535SAndroid Build Coastguard Worker 174*8b6cd535SAndroid Build Coastguard Worker } // namespace lib 175*8b6cd535SAndroid Build Coastguard Worker } // namespace icing 176*8b6cd535SAndroid Build Coastguard Worker 177*8b6cd535SAndroid Build Coastguard Worker #endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_ 178