xref: /aosp_15_r20/external/icing/icing/index/iterator/doc-hit-info-iterator-or.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/iterator/doc-hit-info-iterator-or.h"
16 
17 #include <cstdint>
18 
19 #include "icing/text_classifier/lib3/utils/base/status.h"
20 #include "icing/absl_ports/canonical_errors.h"
21 #include "icing/absl_ports/str_cat.h"
22 #include "icing/index/hit/doc-hit-info.h"
23 #include "icing/index/iterator/doc-hit-info-iterator.h"
24 #include "icing/store/document-id.h"
25 #include "icing/util/status-macros.h"
26 
27 namespace icing {
28 namespace lib {
29 
30 namespace {
31 
32 // When combining Or iterators, n-ary operator has better performance when
33 // number of operands > 2 according to benchmark cl/243321264
34 constexpr int kBinaryOrIteratorPerformanceThreshold = 2;
35 
36 }  // namespace
37 
CreateOrIterator(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)38 std::unique_ptr<DocHitInfoIterator> CreateOrIterator(
39     std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
40   if (iterators.size() == 1) {
41     return std::move(iterators.at(0));
42   }
43 
44   std::unique_ptr<DocHitInfoIterator> iterator;
45   if (iterators.size() == kBinaryOrIteratorPerformanceThreshold) {
46     iterator = std::make_unique<DocHitInfoIteratorOr>(std::move(iterators[0]),
47                                                       std::move(iterators[1]));
48   } else {
49     // If the vector is too small, the OrNary iterator can handle it and return
50     // an error on the Advance call
51     iterator = std::make_unique<DocHitInfoIteratorOrNary>(std::move(iterators));
52   }
53 
54   return iterator;
55 }
56 
DocHitInfoIteratorOr(std::unique_ptr<DocHitInfoIterator> left_it,std::unique_ptr<DocHitInfoIterator> right_it)57 DocHitInfoIteratorOr::DocHitInfoIteratorOr(
58     std::unique_ptr<DocHitInfoIterator> left_it,
59     std::unique_ptr<DocHitInfoIterator> right_it)
60     : left_(std::move(left_it)), right_(std::move(right_it)) {}
61 
62 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()63 DocHitInfoIteratorOr::TrimRightMostNode() && {
64   // Trim the whole OR iterator. Only keep the prefix of the right iterator.
65   //
66   // The OR operator has higher priority, it is not possible that we have an
67   // unfinished prefix in the nested iterator right-most child we need to search
68   // suggestion for.
69   //
70   // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
71   // unfinished last term to be filled.
72   //
73   // If we need to trim a OR iterator for search suggestion, the right child
74   // must be the last term. We don't need left side information to
75   // generate suggestion for the right side.
76   ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
77                          std::move(*right_).TrimRightMostNode());
78   trimmed_right.iterator_ = nullptr;
79   return trimmed_right;
80 }
81 
Advance()82 libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
83   // Cache the document_id of the left iterator for comparison to the right.
84   DocumentId orig_left_document_id = left_document_id_;
85 
86   // Advance the left iterator if necessary.
87   if (left_document_id_ != kInvalidDocumentId) {
88     if (right_document_id_ == kInvalidDocumentId ||
89         left_document_id_ >= right_document_id_) {
90       if (left_->Advance().ok()) {
91         left_document_id_ = left_->doc_hit_info().document_id();
92       } else {
93         left_document_id_ = kInvalidDocumentId;
94       }
95     }
96   }
97 
98   // Advance the right iterator if necessary, by comparing to the original
99   // left document_id (not the one which may have been updated).
100   if (right_document_id_ != kInvalidDocumentId) {
101     if (orig_left_document_id == kInvalidDocumentId ||
102         right_document_id_ >= orig_left_document_id) {
103       if (right_->Advance().ok()) {
104         right_document_id_ = right_->doc_hit_info().document_id();
105       } else {
106         right_document_id_ = kInvalidDocumentId;
107       }
108     }
109   }
110 
111   // Done, we either found a match or we reached the end of potential
112   // DocHitInfos
113   if (left_document_id_ == kInvalidDocumentId &&
114       right_document_id_ == kInvalidDocumentId) {
115     // Reached the end, set these to invalid values and return
116     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
117     return absl_ports::ResourceExhaustedError(
118         "No more DocHitInfos in iterator");
119   }
120 
121   // Now chose the best one that is not invalid.
122   DocHitInfoIterator* chosen;
123   if (left_document_id_ == kInvalidDocumentId) {
124     chosen = right_.get();
125   } else if (right_document_id_ == kInvalidDocumentId) {
126     chosen = left_.get();
127   } else if (left_document_id_ < right_document_id_) {
128     chosen = right_.get();
129   } else {
130     chosen = left_.get();
131   }
132   current_ = chosen;
133 
134   doc_hit_info_ = chosen->doc_hit_info();
135 
136   // If equal, combine.
137   if (left_document_id_ == right_document_id_) {
138     doc_hit_info_.MergeSectionsFrom(
139         right_->doc_hit_info().hit_section_ids_mask());
140   }
141 
142   return libtextclassifier3::Status::OK;
143 }
144 
ToString() const145 std::string DocHitInfoIteratorOr::ToString() const {
146   return absl_ports::StrCat("(", left_->ToString(), " OR ", right_->ToString(),
147                             ")");
148 }
149 
DocHitInfoIteratorOrNary(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)150 DocHitInfoIteratorOrNary::DocHitInfoIteratorOrNary(
151     std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
152     : iterators_(std::move(iterators)) {}
153 
154 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()155 DocHitInfoIteratorOrNary::TrimRightMostNode() && {
156   // Trim the whole OR iterator.
157   //
158   // The OR operator has higher priority, it is not possible that we have an
159   // unfinished prefix in the nested iterator right-most child we need to search
160   // suggestion for.
161   //
162   // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
163   // unfinished last term to be filled.
164   //
165   // If we need to trim a OR iterator for search suggestion, the right-most
166   // child must be the last term. We don't need left side information to
167   // generate suggestion for the right side.
168   ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
169                          std::move(*iterators_.back()).TrimRightMostNode());
170   trimmed_right.iterator_ = nullptr;
171   return trimmed_right;
172 }
173 
Advance()174 libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
175   current_iterators_.clear();
176   if (iterators_.size() < 2) {
177     return absl_ports::InvalidArgumentError(
178         "Not enough iterators to OR together");
179   }
180 
181   if (doc_hit_info_.document_id() == 0) {
182     // 0 is the smallest (last) DocumentId, can't advance further. Reset to
183     // invalid values and return directly
184     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
185     return absl_ports::ResourceExhaustedError(
186         "No more DocHitInfos in iterator");
187   }
188   // The maximum possible doc id for the current Advance() call.
189   const DocumentId next_document_id_max = doc_hit_info_.document_id() - 1;
190   doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
191   DocumentId next_document_id = kInvalidDocumentId;
192   // Go through the iterators and try to find the maximum document_id that is
193   // equal to or smaller than next_document_id_max
194   for (const auto& iterator : iterators_) {
195     if (iterator->doc_hit_info().document_id() > next_document_id_max) {
196       // Advance the iterator until its value is equal to or smaller than
197       // next_document_id_max
198       if (!AdvanceTo(iterator.get(), next_document_id_max).ok()) {
199         continue;
200       }
201     }
202     // Now iterator->get_document_id() <= next_document_id_max
203     if (next_document_id == kInvalidDocumentId) {
204       next_document_id = iterator->doc_hit_info().document_id();
205     } else {
206       next_document_id =
207           std::max(next_document_id, iterator->doc_hit_info().document_id());
208     }
209   }
210   if (next_document_id == kInvalidDocumentId) {
211     // None of the iterators had a next document_id, reset to invalid values and
212     // return
213     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
214     return absl_ports::ResourceExhaustedError(
215         "No more DocHitInfos in iterator");
216   }
217 
218   // Found the next hit DocumentId, now calculate the section info.
219   for (const auto& iterator : iterators_) {
220     if (iterator->doc_hit_info().document_id() == next_document_id) {
221       current_iterators_.push_back(iterator.get());
222       if (doc_hit_info_.document_id() == kInvalidDocumentId) {
223         doc_hit_info_ = iterator->doc_hit_info();
224       } else {
225         doc_hit_info_.MergeSectionsFrom(
226             iterator->doc_hit_info().hit_section_ids_mask());
227       }
228     }
229   }
230   return libtextclassifier3::Status::OK;
231 }
232 
GetCallStats() const233 DocHitInfoIterator::CallStats DocHitInfoIteratorOrNary::GetCallStats() const {
234   CallStats call_stats;
235   for (const auto& iter : iterators_) {
236     call_stats += iter->GetCallStats();
237   }
238   return call_stats;
239 }
240 
ToString() const241 std::string DocHitInfoIteratorOrNary::ToString() const {
242   std::string ret = "(";
243 
244   for (int i = 0; i < iterators_.size(); ++i) {
245     absl_ports::StrAppend(&ret, iterators_.at(i)->ToString());
246     if (i != iterators_.size() - 1) {
247       // Not the last element in vector
248       absl_ports::StrAppend(&ret, " OR ");
249     }
250   }
251 
252   absl_ports::StrAppend(&ret, ")");
253   return ret;
254 }
255 
256 }  // namespace lib
257 }  // namespace icing
258