1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
16 #define ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
17
18 #include <queue>
19 #include <vector>
20
21 #include "icing/scoring/scored-document-hit.h"
22 #include "icing/scoring/scored-document-hits-ranker.h"
23
24 namespace icing {
25 namespace lib {
26
27 // ScoredDocumentHitsRanker interface implementation, based on
28 // std::priority_queue. We can get next top hit in O(lgN) time.
29 template <typename ScoredDataType,
30 typename Converter = typename ScoredDataType::Converter>
31 class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker {
32 public:
33 explicit PriorityQueueScoredDocumentHitsRanker(
34 std::vector<ScoredDataType>&& scored_data_vec, bool is_descending = true);
35
36 ~PriorityQueueScoredDocumentHitsRanker() override = default;
37
38 // Note: ranker may store ScoredDocumentHit or JoinedScoredDocumentHit, so we
39 // have template for scored_data_pq_.
40 // - JoinedScoredDocumentHit is a superset of ScoredDocumentHit, so we unify
41 // the return type of PopNext to use the superset type
42 // JoinedScoredDocumentHit in order to make it simple, and rankers storing
43 // ScoredDocumentHit should convert it to JoinedScoredDocumentHit before
44 // returning. It makes the implementation simpler, especially for
45 // ResultRetriever, which now only needs to deal with one single return
46 // format.
47 // - JoinedScoredDocumentHit has ~2x size of ScoredDocumentHit. Since we cache
48 // ranker (which contains a priority queue of data) in ResultState, if we
49 // store the scored hits in JoinedScoredDocumentHit format directly, then it
50 // doubles the memory usage. Therefore, we still keep the flexibility to
51 // store ScoredDocumentHit or any other types of data, but require PopNext
52 // to convert it to JoinedScoredDocumentHit.
53 JoinedScoredDocumentHit PopNext() override;
54
55 void TruncateHitsTo(int new_size) override;
56
size()57 int size() const override { return scored_data_pq_.size(); }
58
empty()59 bool empty() const override { return scored_data_pq_.empty(); }
60
61 private:
62 // Comparator for std::priority_queue. Since std::priority is a max heap
63 // (descending order), reverse it if we want ascending order.
64 class Comparator {
65 public:
Comparator(bool is_ascending)66 explicit Comparator(bool is_ascending) : is_ascending_(is_ascending) {}
67
operator()68 bool operator()(const ScoredDataType& lhs,
69 const ScoredDataType& rhs) const {
70 // STL comparator requirement: equal MUST return false.
71 // If writing `return is_ascending_ == !(lhs < rhs)`:
72 // - When lhs == rhs, !(lhs < rhs) is true
73 // - If is_ascending_ is true, then we return true for equal case!
74 if (is_ascending_) {
75 return rhs < lhs;
76 }
77 return lhs < rhs;
78 }
79
80 private:
81 bool is_ascending_;
82 };
83
84 Comparator comparator_;
85
86 // Use priority queue to get top K hits in O(KlgN) time.
87 std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
88 scored_data_pq_;
89
90 Converter converter_;
91 };
92
93 template <typename ScoredDataType, typename Converter>
94 PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::
PriorityQueueScoredDocumentHitsRanker(std::vector<ScoredDataType> && scored_data_vec,bool is_descending)95 PriorityQueueScoredDocumentHitsRanker(
96 std::vector<ScoredDataType>&& scored_data_vec, bool is_descending)
97 : comparator_(/*is_ascending=*/!is_descending),
98 scored_data_pq_(comparator_, std::move(scored_data_vec)) {}
99
100 template <typename ScoredDataType, typename Converter>
101 JoinedScoredDocumentHit
PopNext()102 PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::PopNext() {
103 ScoredDataType next_scored_data = scored_data_pq_.top();
104 scored_data_pq_.pop();
105 return converter_(std::move(next_scored_data));
106 }
107
108 template <typename ScoredDataType, typename Converter>
109 void PriorityQueueScoredDocumentHitsRanker<
TruncateHitsTo(int new_size)110 ScoredDataType, Converter>::TruncateHitsTo(int new_size) {
111 if (new_size < 0 || scored_data_pq_.size() <= new_size) {
112 return;
113 }
114
115 // Copying the best new_size results.
116 std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
117 new_pq(comparator_);
118 for (int i = 0; i < new_size; ++i) {
119 new_pq.push(scored_data_pq_.top());
120 scored_data_pq_.pop();
121 }
122 scored_data_pq_ = std::move(new_pq);
123 }
124
125 } // namespace lib
126 } // namespace icing
127
128 #endif // ICING_SCORING_PRIORITY_QUEUE_SCORED_DOCUMENT_HITS_RANKER_H_
129