1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/iterator/doc-hit-info-iterator-or.h"
16
17 #include <cstdint>
18
19 #include "icing/text_classifier/lib3/utils/base/status.h"
20 #include "icing/absl_ports/canonical_errors.h"
21 #include "icing/absl_ports/str_cat.h"
22 #include "icing/index/hit/doc-hit-info.h"
23 #include "icing/index/iterator/doc-hit-info-iterator.h"
24 #include "icing/store/document-id.h"
25 #include "icing/util/status-macros.h"
26
27 namespace icing {
28 namespace lib {
29
30 namespace {
31
32 // When combining Or iterators, n-ary operator has better performance when
33 // number of operands > 2 according to benchmark cl/243321264
34 constexpr int kBinaryOrIteratorPerformanceThreshold = 2;
35
36 } // namespace
37
CreateOrIterator(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)38 std::unique_ptr<DocHitInfoIterator> CreateOrIterator(
39 std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
40 if (iterators.size() == 1) {
41 return std::move(iterators.at(0));
42 }
43
44 std::unique_ptr<DocHitInfoIterator> iterator;
45 if (iterators.size() == kBinaryOrIteratorPerformanceThreshold) {
46 iterator = std::make_unique<DocHitInfoIteratorOr>(std::move(iterators[0]),
47 std::move(iterators[1]));
48 } else {
49 // If the vector is too small, the OrNary iterator can handle it and return
50 // an error on the Advance call
51 iterator = std::make_unique<DocHitInfoIteratorOrNary>(std::move(iterators));
52 }
53
54 return iterator;
55 }
56
DocHitInfoIteratorOr(std::unique_ptr<DocHitInfoIterator> left_it,std::unique_ptr<DocHitInfoIterator> right_it)57 DocHitInfoIteratorOr::DocHitInfoIteratorOr(
58 std::unique_ptr<DocHitInfoIterator> left_it,
59 std::unique_ptr<DocHitInfoIterator> right_it)
60 : left_(std::move(left_it)), right_(std::move(right_it)) {}
61
62 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()63 DocHitInfoIteratorOr::TrimRightMostNode() && {
64 // Trim the whole OR iterator. Only keep the prefix of the right iterator.
65 //
66 // The OR operator has higher priority, it is not possible that we have an
67 // unfinished prefix in the nested iterator right-most child we need to search
68 // suggestion for.
69 //
70 // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
71 // unfinished last term to be filled.
72 //
73 // If we need to trim a OR iterator for search suggestion, the right child
74 // must be the last term. We don't need left side information to
75 // generate suggestion for the right side.
76 ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
77 std::move(*right_).TrimRightMostNode());
78 trimmed_right.iterator_ = nullptr;
79 return trimmed_right;
80 }
81
Advance()82 libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
83 // Cache the document_id of the left iterator for comparison to the right.
84 DocumentId orig_left_document_id = left_document_id_;
85
86 // Advance the left iterator if necessary.
87 if (left_document_id_ != kInvalidDocumentId) {
88 if (right_document_id_ == kInvalidDocumentId ||
89 left_document_id_ >= right_document_id_) {
90 if (left_->Advance().ok()) {
91 left_document_id_ = left_->doc_hit_info().document_id();
92 } else {
93 left_document_id_ = kInvalidDocumentId;
94 }
95 }
96 }
97
98 // Advance the right iterator if necessary, by comparing to the original
99 // left document_id (not the one which may have been updated).
100 if (right_document_id_ != kInvalidDocumentId) {
101 if (orig_left_document_id == kInvalidDocumentId ||
102 right_document_id_ >= orig_left_document_id) {
103 if (right_->Advance().ok()) {
104 right_document_id_ = right_->doc_hit_info().document_id();
105 } else {
106 right_document_id_ = kInvalidDocumentId;
107 }
108 }
109 }
110
111 // Done, we either found a match or we reached the end of potential
112 // DocHitInfos
113 if (left_document_id_ == kInvalidDocumentId &&
114 right_document_id_ == kInvalidDocumentId) {
115 // Reached the end, set these to invalid values and return
116 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
117 return absl_ports::ResourceExhaustedError(
118 "No more DocHitInfos in iterator");
119 }
120
121 // Now chose the best one that is not invalid.
122 DocHitInfoIterator* chosen;
123 if (left_document_id_ == kInvalidDocumentId) {
124 chosen = right_.get();
125 } else if (right_document_id_ == kInvalidDocumentId) {
126 chosen = left_.get();
127 } else if (left_document_id_ < right_document_id_) {
128 chosen = right_.get();
129 } else {
130 chosen = left_.get();
131 }
132 current_ = chosen;
133
134 doc_hit_info_ = chosen->doc_hit_info();
135
136 // If equal, combine.
137 if (left_document_id_ == right_document_id_) {
138 doc_hit_info_.MergeSectionsFrom(
139 right_->doc_hit_info().hit_section_ids_mask());
140 }
141
142 return libtextclassifier3::Status::OK;
143 }
144
ToString() const145 std::string DocHitInfoIteratorOr::ToString() const {
146 return absl_ports::StrCat("(", left_->ToString(), " OR ", right_->ToString(),
147 ")");
148 }
149
DocHitInfoIteratorOrNary(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)150 DocHitInfoIteratorOrNary::DocHitInfoIteratorOrNary(
151 std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
152 : iterators_(std::move(iterators)) {}
153
154 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()155 DocHitInfoIteratorOrNary::TrimRightMostNode() && {
156 // Trim the whole OR iterator.
157 //
158 // The OR operator has higher priority, it is not possible that we have an
159 // unfinished prefix in the nested iterator right-most child we need to search
160 // suggestion for.
161 //
162 // eg: `foo OR (bar baz)` is not valid for search suggestion since there is no
163 // unfinished last term to be filled.
164 //
165 // If we need to trim a OR iterator for search suggestion, the right-most
166 // child must be the last term. We don't need left side information to
167 // generate suggestion for the right side.
168 ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_right,
169 std::move(*iterators_.back()).TrimRightMostNode());
170 trimmed_right.iterator_ = nullptr;
171 return trimmed_right;
172 }
173
Advance()174 libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
175 current_iterators_.clear();
176 if (iterators_.size() < 2) {
177 return absl_ports::InvalidArgumentError(
178 "Not enough iterators to OR together");
179 }
180
181 if (doc_hit_info_.document_id() == 0) {
182 // 0 is the smallest (last) DocumentId, can't advance further. Reset to
183 // invalid values and return directly
184 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
185 return absl_ports::ResourceExhaustedError(
186 "No more DocHitInfos in iterator");
187 }
188 // The maximum possible doc id for the current Advance() call.
189 const DocumentId next_document_id_max = doc_hit_info_.document_id() - 1;
190 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
191 DocumentId next_document_id = kInvalidDocumentId;
192 // Go through the iterators and try to find the maximum document_id that is
193 // equal to or smaller than next_document_id_max
194 for (const auto& iterator : iterators_) {
195 if (iterator->doc_hit_info().document_id() > next_document_id_max) {
196 // Advance the iterator until its value is equal to or smaller than
197 // next_document_id_max
198 if (!AdvanceTo(iterator.get(), next_document_id_max).ok()) {
199 continue;
200 }
201 }
202 // Now iterator->get_document_id() <= next_document_id_max
203 if (next_document_id == kInvalidDocumentId) {
204 next_document_id = iterator->doc_hit_info().document_id();
205 } else {
206 next_document_id =
207 std::max(next_document_id, iterator->doc_hit_info().document_id());
208 }
209 }
210 if (next_document_id == kInvalidDocumentId) {
211 // None of the iterators had a next document_id, reset to invalid values and
212 // return
213 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
214 return absl_ports::ResourceExhaustedError(
215 "No more DocHitInfos in iterator");
216 }
217
218 // Found the next hit DocumentId, now calculate the section info.
219 for (const auto& iterator : iterators_) {
220 if (iterator->doc_hit_info().document_id() == next_document_id) {
221 current_iterators_.push_back(iterator.get());
222 if (doc_hit_info_.document_id() == kInvalidDocumentId) {
223 doc_hit_info_ = iterator->doc_hit_info();
224 } else {
225 doc_hit_info_.MergeSectionsFrom(
226 iterator->doc_hit_info().hit_section_ids_mask());
227 }
228 }
229 }
230 return libtextclassifier3::Status::OK;
231 }
232
GetCallStats() const233 DocHitInfoIterator::CallStats DocHitInfoIteratorOrNary::GetCallStats() const {
234 CallStats call_stats;
235 for (const auto& iter : iterators_) {
236 call_stats += iter->GetCallStats();
237 }
238 return call_stats;
239 }
240
ToString() const241 std::string DocHitInfoIteratorOrNary::ToString() const {
242 std::string ret = "(";
243
244 for (int i = 0; i < iterators_.size(); ++i) {
245 absl_ports::StrAppend(&ret, iterators_.at(i)->ToString());
246 if (i != iterators_.size() - 1) {
247 // Not the last element in vector
248 absl_ports::StrAppend(&ret, " OR ");
249 }
250 }
251
252 absl_ports::StrAppend(&ret, ")");
253 return ret;
254 }
255
256 } // namespace lib
257 } // namespace icing
258