1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCORING_RANKER_H_ 16 #define ICING_SCORING_RANKER_H_ 17 18 #include <vector> 19 20 #include "icing/text_classifier/lib3/utils/base/statusor.h" 21 #include "icing/index/term-metadata.h" 22 #include "icing/scoring/scored-document-hit.h" 23 24 // Provides functionality to get the top N results from an unsorted vector. 25 namespace icing { 26 namespace lib { 27 28 // Builds a heap of scored document hits. The same vector is used to store the 29 // heap structure. 30 // 31 // REQUIRED: scored_document_hits is not null. 32 void BuildHeapInPlace( 33 std::vector<ScoredDocumentHit>* scored_document_hits, 34 const ScoredDocumentHitComparator& scored_document_hit_comparator); 35 36 // Returns the single next top result (i.e. the current root element) from the 37 // given heap and remove it from the heap. The heap structure will be 38 // maintained. 39 // 40 // Returns: 41 // The next top result element on success 42 // RESOURCE_EXHAUSTED_ERROR if heap is empty 43 libtextclassifier3::StatusOr<ScoredDocumentHit> PopNextTopResultFromHeap( 44 std::vector<ScoredDocumentHit>* scored_document_hits_heap, 45 const ScoredDocumentHitComparator& scored_document_hit_comparator); 46 47 // Returns the top num_results results from the given heap and remove those 48 // results from the heap. An empty vector will be returned if heap is empty. 49 // 50 // REQUIRED: scored_document_hits_heap is not null. 51 std::vector<ScoredDocumentHit> PopTopResultsFromHeap( 52 std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results, 53 const ScoredDocumentHitComparator& scored_document_hit_comparator); 54 55 // The heap is a min-heap. So that we can avoid some push operations by 56 // comparing to the root term, and only pushing if greater than root. The time 57 // complexity for a single push is O(lgK) which K is the number_to_return. 58 // REQUIRED: scored_terms_heap is not null. 59 void PushToTermHeap(TermMetadata term, int number_to_return, 60 std::vector<TermMetadata>& scored_terms_heap); 61 62 // Return all terms from the given terms heap. And since the heap is a min-heap, 63 // the output vector will be increasing order. 64 // REQUIRED: scored_terms_heap is not null. 65 std::vector<TermMetadata> PopAllTermsFromHeap( 66 std::vector<TermMetadata>& scored_terms_heap); 67 } // namespace lib 68 } // namespace icing 69 70 #endif // ICING_SCORING_RANKER_H_ 71