xref: /aosp_15_r20/external/icing/icing/scoring/priority-queue-scored-document-hits-ranker.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
16 #define ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
17 
18 #include <queue>
19 #include <vector>
20 
21 #include "icing/scoring/scored-document-hit.h"
22 #include "icing/scoring/scored-document-hits-ranker.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 // ScoredDocumentHitsRanker interface implementation, based on
28 // std::priority_queue. We can get next top hit in O(lgN) time.
29 template <typename ScoredDataType,
30           typename Converter = typename ScoredDataType::Converter>
31 class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker {
32  public:
33   explicit PriorityQueueScoredDocumentHitsRanker(
34       std::vector<ScoredDataType>&& scored_data_vec, bool is_descending = true);
35 
36   ~PriorityQueueScoredDocumentHitsRanker() override = default;
37 
38   // Note: ranker may store ScoredDocumentHit or JoinedScoredDocumentHit, so we
39   // have template for scored_data_pq_.
40   // - JoinedScoredDocumentHit is a superset of ScoredDocumentHit, so we unify
41   //   the return type of PopNext to use the superset type
42   //   JoinedScoredDocumentHit in order to make it simple, and rankers storing
43   //   ScoredDocumentHit should convert it to JoinedScoredDocumentHit before
44   //   returning. It makes the implementation simpler, especially for
45   //   ResultRetriever, which now only needs to deal with one single return
46   //   format.
47   // - JoinedScoredDocumentHit has ~2x size of ScoredDocumentHit. Since we cache
48   //   ranker (which contains a priority queue of data) in ResultState, if we
49   //   store the scored hits in JoinedScoredDocumentHit format directly, then it
50   //   doubles the memory usage. Therefore, we still keep the flexibility to
51   //   store ScoredDocumentHit or any other types of data, but require PopNext
52   //   to convert it to JoinedScoredDocumentHit.
53   JoinedScoredDocumentHit PopNext() override;
54 
55   void TruncateHitsTo(int new_size) override;
56 
size()57   int size() const override { return scored_data_pq_.size(); }
58 
empty()59   bool empty() const override { return scored_data_pq_.empty(); }
60 
61  private:
62   // Comparator for std::priority_queue. Since std::priority is a max heap
63   // (descending order), reverse it if we want ascending order.
64   class Comparator {
65    public:
Comparator(bool is_ascending)66     explicit Comparator(bool is_ascending) : is_ascending_(is_ascending) {}
67 
operator()68     bool operator()(const ScoredDataType& lhs,
69                     const ScoredDataType& rhs) const {
70       // STL comparator requirement: equal MUST return false.
71       // If writing `return is_ascending_ == !(lhs < rhs)`:
72       // - When lhs == rhs, !(lhs < rhs) is true
73       // - If is_ascending_ is true, then we return true for equal case!
74       if (is_ascending_) {
75         return rhs < lhs;
76       }
77       return lhs < rhs;
78     }
79 
80    private:
81     bool is_ascending_;
82   };
83 
84   Comparator comparator_;
85 
86   // Use priority queue to get top K hits in O(KlgN) time.
87   std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
88       scored_data_pq_;
89 
90   Converter converter_;
91 };
92 
93 template <typename ScoredDataType, typename Converter>
94 PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::
PriorityQueueScoredDocumentHitsRanker(std::vector<ScoredDataType> && scored_data_vec,bool is_descending)95     PriorityQueueScoredDocumentHitsRanker(
96         std::vector<ScoredDataType>&& scored_data_vec, bool is_descending)
97     : comparator_(/*is_ascending=*/!is_descending),
98       scored_data_pq_(comparator_, std::move(scored_data_vec)) {}
99 
100 template <typename ScoredDataType, typename Converter>
101 JoinedScoredDocumentHit
PopNext()102 PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::PopNext() {
103   ScoredDataType next_scored_data = scored_data_pq_.top();
104   scored_data_pq_.pop();
105   return converter_(std::move(next_scored_data));
106 }
107 
108 template <typename ScoredDataType, typename Converter>
109 void PriorityQueueScoredDocumentHitsRanker<
TruncateHitsTo(int new_size)110     ScoredDataType, Converter>::TruncateHitsTo(int new_size) {
111   if (new_size < 0 || scored_data_pq_.size() <= new_size) {
112     return;
113   }
114 
115   // Copying the best new_size results.
116   std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
117       new_pq(comparator_);
118   for (int i = 0; i < new_size; ++i) {
119     new_pq.push(scored_data_pq_.top());
120     scored_data_pq_.pop();
121   }
122   scored_data_pq_ = std::move(new_pq);
123 }
124 
125 }  // namespace lib
126 }  // namespace icing
127 
128 #endif  // ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
129