1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/index.h"
16
17 #include <unistd.h>
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <random>
24 #include <string>
25 #include <string_view>
26 #include <unordered_map>
27 #include <utility>
28 #include <vector>
29
30 #include "icing/text_classifier/lib3/utils/base/status.h"
31 #include "gmock/gmock.h"
32 #include "gtest/gtest.h"
33 #include "icing/file/filesystem.h"
34 #include "icing/index/hit/doc-hit-info.h"
35 #include "icing/index/iterator/doc-hit-info-iterator.h"
36 #include "icing/index/lite/term-id-hit-pair.h"
37 #include "icing/legacy/index/icing-filesystem.h"
38 #include "icing/legacy/index/icing-mock-filesystem.h"
39 #include "icing/proto/debug.pb.h"
40 #include "icing/proto/logging.pb.h"
41 #include "icing/proto/storage.pb.h"
42 #include "icing/proto/term.pb.h"
43 #include "icing/schema/section.h"
44 #include "icing/store/document-id.h"
45 #include "icing/testing/always-true-suggestion-result-checker-impl.h"
46 #include "icing/testing/common-matchers.h"
47 #include "icing/testing/random-string.h"
48 #include "icing/testing/tmp-directory.h"
49 #include "icing/util/crc32.h"
50 #include "icing/util/logging.h"
51
52 namespace icing {
53 namespace lib {
54
55 namespace {
56
57 using ::testing::ContainerEq;
58 using ::testing::ElementsAre;
59 using ::testing::Eq;
60 using ::testing::Ge;
61 using ::testing::Gt;
62 using ::testing::IsEmpty;
63 using ::testing::IsFalse;
64 using ::testing::IsTrue;
65 using ::testing::Ne;
66 using ::testing::NiceMock;
67 using ::testing::Not;
68 using ::testing::Return;
69 using ::testing::SizeIs;
70 using ::testing::StrEq;
71 using ::testing::StrNe;
72 using ::testing::Test;
73 using ::testing::UnorderedElementsAre;
74
GetBlockSize()75 int GetBlockSize() { return getpagesize(); }
76
77 class IndexTest : public Test {
78 protected:
SetUp()79 void SetUp() override {
80 index_dir_ = GetTestTempDir() + "/index_test/";
81 Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
82 /*lite_index_sort_at_indexing=*/true,
83 /*lite_index_sort_size=*/1024 * 8);
84 ICING_ASSERT_OK_AND_ASSIGN(
85 index_, Index::Create(options, &filesystem_, &icing_filesystem_));
86 }
87
TearDown()88 void TearDown() override {
89 index_.reset();
90 icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
91 }
92
GetHits(std::unique_ptr<DocHitInfoIterator> iterator)93 std::vector<DocHitInfo> GetHits(
94 std::unique_ptr<DocHitInfoIterator> iterator) {
95 std::vector<DocHitInfo> infos;
96 while (iterator->Advance().ok()) {
97 infos.push_back(iterator->doc_hit_info());
98 }
99 return infos;
100 }
101
GetHits(std::string term,int term_start_index,int unnormalized_term_length,TermMatchType::Code match_type)102 libtextclassifier3::StatusOr<std::vector<DocHitInfo>> GetHits(
103 std::string term, int term_start_index, int unnormalized_term_length,
104 TermMatchType::Code match_type) {
105 ICING_ASSIGN_OR_RETURN(
106 std::unique_ptr<DocHitInfoIterator> itr,
107 index_->GetIterator(term, term_start_index, unnormalized_term_length,
108 kSectionIdMaskAll, match_type));
109 return GetHits(std::move(itr));
110 }
111
112 Filesystem filesystem_;
113 IcingFilesystem icing_filesystem_;
114 std::string index_dir_;
115 std::unique_ptr<Index> index_;
116 };
117
118 constexpr DocumentId kDocumentId0 = 0;
119 constexpr DocumentId kDocumentId1 = 1;
120 constexpr DocumentId kDocumentId2 = 2;
121 constexpr DocumentId kDocumentId3 = 3;
122 constexpr DocumentId kDocumentId4 = 4;
123 constexpr DocumentId kDocumentId5 = 5;
124 constexpr DocumentId kDocumentId6 = 6;
125 constexpr DocumentId kDocumentId7 = 7;
126 constexpr DocumentId kDocumentId8 = 8;
127 constexpr SectionId kSectionId2 = 2;
128 constexpr SectionId kSectionId3 = 3;
129
130 MATCHER_P2(EqualsDocHitInfo, document_id, sections, "") {
131 const DocHitInfo& actual = arg;
132 SectionIdMask section_mask = kSectionIdMaskNone;
133 for (SectionId section : sections) {
134 section_mask |= UINT64_C(1) << section;
135 }
136 *result_listener << "actual is {document_id=" << actual.document_id()
137 << ", section_mask=" << actual.hit_section_ids_mask()
138 << "}, but expected was {document_id=" << document_id
139 << ", section_mask=" << section_mask << "}.";
140 return actual.document_id() == document_id &&
141 actual.hit_section_ids_mask() == section_mask;
142 }
143
144 MATCHER_P2(EqualsTermMetadata, content, hit_count, "") {
145 const TermMetadata& actual = arg;
146 *result_listener << "actual is {content=" << actual.content
147 << ", score=" << actual.score
148 << "}, but expected was {content=" << content
149 << ", score=" << hit_count << "}.";
150 return actual.content == content && actual.score == hit_count;
151 }
152
TEST_F(IndexTest,CreationWithNullPointerShouldFail)153 TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
154 Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
155 /*lite_index_sort_at_indexing=*/true,
156 /*lite_index_sort_size=*/1024 * 8);
157 EXPECT_THAT(
158 Index::Create(options, &filesystem_, /*icing_filesystem=*/nullptr),
159 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
160 EXPECT_THAT(
161 Index::Create(options, /*filesystem=*/nullptr, &icing_filesystem_),
162 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
163 }
164
TEST_F(IndexTest,EmptyIndex)165 TEST_F(IndexTest, EmptyIndex) {
166 ICING_ASSERT_OK_AND_ASSIGN(
167 std::unique_ptr<DocHitInfoIterator> itr,
168 index_->GetIterator("foo", /*term_start_index=*/0,
169 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
170 TermMatchType::EXACT_ONLY));
171 EXPECT_THAT(itr->Advance(),
172 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
173
174 ICING_ASSERT_OK_AND_ASSIGN(
175 itr, index_->GetIterator("foo", /*term_start_index=*/0,
176 /*unnormalized_term_length=*/0,
177 kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
178 EXPECT_THAT(itr->Advance(),
179 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
180 }
181
TEST_F(IndexTest,EmptyIndexAfterMerge)182 TEST_F(IndexTest, EmptyIndexAfterMerge) {
183 // Merging an empty index should succeed, but have no effects.
184 ICING_ASSERT_OK(index_->Merge());
185
186 ICING_ASSERT_OK_AND_ASSIGN(
187 std::unique_ptr<DocHitInfoIterator> itr,
188 index_->GetIterator("foo", /*term_start_index=*/0,
189 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
190 TermMatchType::EXACT_ONLY));
191 EXPECT_THAT(itr->Advance(),
192 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
193
194 ICING_ASSERT_OK_AND_ASSIGN(
195 itr, index_->GetIterator("foo", /*term_start_index=*/0,
196 /*unnormalized_term_length=*/0,
197 kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
198 EXPECT_THAT(itr->Advance(),
199 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
200 }
201
TEST_F(IndexTest,CreationWithLiteIndexSortAtIndexingEnabledShouldSort)202 TEST_F(IndexTest, CreationWithLiteIndexSortAtIndexingEnabledShouldSort) {
203 // Make the index with lite_index_sort_at_indexing=false and a very small sort
204 // threshold.
205 Index::Options options(index_dir_, /*index_merge_size=*/1024,
206 /*lite_index_sort_at_indexing=*/false,
207 /*lite_index_sort_size=*/16);
208 ICING_ASSERT_OK_AND_ASSIGN(
209 index_, Index::Create(options, &filesystem_, &icing_filesystem_));
210
211 Index::Editor edit =
212 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
213 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
214 ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
215 ASSERT_THAT(edit.BufferTerm("baz", TermMatchType::EXACT_ONLY), IsOk());
216 ASSERT_THAT(edit.IndexAllBufferedTerms(), IsOk());
217
218 // Persist and recreate the index with lite_index_sort_at_indexing=true
219 ASSERT_THAT(index_->PersistToDisk(), IsOk());
220 options = Index::Options(index_dir_, /*index_merge_size=*/1024,
221 /*lite_index_sort_at_indexing=*/true,
222 /*lite_index_sort_size=*/16);
223 ICING_ASSERT_OK_AND_ASSIGN(
224 index_, Index::Create(options, &filesystem_, &icing_filesystem_));
225
226 // Check that the index is sorted after recreating with
227 // lite_index_sort_at_indexing, with the unsorted HitBuffer exceeding the sort
228 // threshold.
229 EXPECT_THAT(index_->LiteIndexNeedSort(), IsFalse());
230 }
231
TEST_F(IndexTest,AdvancePastEnd)232 TEST_F(IndexTest, AdvancePastEnd) {
233 Index::Editor edit =
234 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
235 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
236 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
237
238 ICING_ASSERT_OK_AND_ASSIGN(
239 std::unique_ptr<DocHitInfoIterator> itr,
240 index_->GetIterator("bar", /*term_start_index=*/0,
241 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
242 TermMatchType::EXACT_ONLY));
243 EXPECT_THAT(itr->Advance(),
244 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
245 EXPECT_THAT(itr->doc_hit_info(),
246 EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
247
248 ICING_ASSERT_OK_AND_ASSIGN(
249 itr, index_->GetIterator("foo", /*term_start_index=*/0,
250 /*unnormalized_term_length=*/0,
251 kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
252 EXPECT_THAT(itr->Advance(), IsOk());
253 EXPECT_THAT(itr->Advance(),
254 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
255 EXPECT_THAT(itr->doc_hit_info(),
256 EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
257 }
258
TEST_F(IndexTest,AdvancePastEndAfterMerge)259 TEST_F(IndexTest, AdvancePastEndAfterMerge) {
260 Index::Editor edit =
261 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
262 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
263 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
264
265 ICING_ASSERT_OK(index_->Merge());
266
267 ICING_ASSERT_OK_AND_ASSIGN(
268 std::unique_ptr<DocHitInfoIterator> itr,
269 index_->GetIterator("bar", /*term_start_index=*/0,
270 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
271 TermMatchType::EXACT_ONLY));
272 EXPECT_THAT(itr->Advance(),
273 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
274 EXPECT_THAT(itr->doc_hit_info(),
275 EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
276
277 ICING_ASSERT_OK_AND_ASSIGN(
278 itr, index_->GetIterator("foo", /*term_start_index=*/0,
279 /*unnormalized_term_length=*/0,
280 kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
281 EXPECT_THAT(itr->Advance(), IsOk());
282 EXPECT_THAT(itr->Advance(),
283 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
284 EXPECT_THAT(itr->doc_hit_info(),
285 EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
286 }
287
TEST_F(IndexTest,IteratorGetCallStats_mainIndexOnly)288 TEST_F(IndexTest, IteratorGetCallStats_mainIndexOnly) {
289 Index::Editor edit =
290 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
291 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
292 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
293 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
294
295 edit = index_->Edit(kDocumentId1, kSectionId2,
296 /*namespace_id=*/0);
297 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
298 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
299
300 // Merge the index.
301 ICING_ASSERT_OK(index_->Merge());
302
303 ICING_ASSERT_OK_AND_ASSIGN(
304 std::unique_ptr<DocHitInfoIterator> itr,
305 index_->GetIterator("foo", /*term_start_index=*/0,
306 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
307 TermMatchType::EXACT_ONLY));
308
309 // Before Advance().
310 EXPECT_THAT(
311 itr->GetCallStats(),
312 EqualsDocHitInfoIteratorCallStats(
313 /*num_leaf_advance_calls_lite_index=*/0,
314 /*num_leaf_advance_calls_main_index=*/0,
315 /*num_leaf_advance_calls_integer_index=*/0,
316 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
317
318 // 1st Advance().
319 ICING_ASSERT_OK(itr->Advance());
320 EXPECT_THAT(
321 itr->GetCallStats(),
322 EqualsDocHitInfoIteratorCallStats(
323 /*num_leaf_advance_calls_lite_index=*/0,
324 /*num_leaf_advance_calls_main_index=*/1,
325 /*num_leaf_advance_calls_integer_index=*/0,
326 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
327
328 // 2nd Advance().
329 ICING_ASSERT_OK(itr->Advance());
330 EXPECT_THAT(
331 itr->GetCallStats(),
332 EqualsDocHitInfoIteratorCallStats(
333 /*num_leaf_advance_calls_lite_index=*/0,
334 /*num_leaf_advance_calls_main_index=*/2,
335 /*num_leaf_advance_calls_integer_index=*/0,
336 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
337
338 // 3rd Advance().
339 ASSERT_THAT(itr->Advance(),
340 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
341 EXPECT_THAT(
342 itr->GetCallStats(),
343 EqualsDocHitInfoIteratorCallStats(
344 /*num_leaf_advance_calls_lite_index=*/0,
345 /*num_leaf_advance_calls_main_index=*/2,
346 /*num_leaf_advance_calls_integer_index=*/0,
347 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
348 }
349
TEST_F(IndexTest,IteratorGetCallStats_liteIndexOnly)350 TEST_F(IndexTest, IteratorGetCallStats_liteIndexOnly) {
351 Index::Editor edit =
352 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
353 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
354 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
355 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
356
357 edit = index_->Edit(kDocumentId1, kSectionId2,
358 /*namespace_id=*/0);
359 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
360 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
361
362 ICING_ASSERT_OK_AND_ASSIGN(
363 std::unique_ptr<DocHitInfoIterator> itr,
364 index_->GetIterator("foo", /*term_start_index=*/0,
365 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
366 TermMatchType::EXACT_ONLY));
367
368 // Before Advance().
369 EXPECT_THAT(
370 itr->GetCallStats(),
371 EqualsDocHitInfoIteratorCallStats(
372 /*num_leaf_advance_calls_lite_index=*/0,
373 /*num_leaf_advance_calls_main_index=*/0,
374 /*num_leaf_advance_calls_integer_index=*/0,
375 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
376
377 // 1st Advance().
378 ICING_ASSERT_OK(itr->Advance());
379 EXPECT_THAT(
380 itr->GetCallStats(),
381 EqualsDocHitInfoIteratorCallStats(
382 /*num_leaf_advance_calls_lite_index=*/1,
383 /*num_leaf_advance_calls_main_index=*/0,
384 /*num_leaf_advance_calls_integer_index=*/0,
385 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
386
387 // 2nd Advance().
388 ICING_ASSERT_OK(itr->Advance());
389 EXPECT_THAT(
390 itr->GetCallStats(),
391 EqualsDocHitInfoIteratorCallStats(
392 /*num_leaf_advance_calls_lite_index=*/2,
393 /*num_leaf_advance_calls_main_index=*/0,
394 /*num_leaf_advance_calls_integer_index=*/0,
395 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
396
397 // 3rd Advance().
398 ASSERT_THAT(itr->Advance(),
399 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
400 EXPECT_THAT(
401 itr->GetCallStats(),
402 EqualsDocHitInfoIteratorCallStats(
403 /*num_leaf_advance_calls_lite_index=*/2,
404 /*num_leaf_advance_calls_main_index=*/0,
405 /*num_leaf_advance_calls_integer_index=*/0,
406 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
407 }
408
TEST_F(IndexTest,IteratorGetCallStats)409 TEST_F(IndexTest, IteratorGetCallStats) {
410 Index::Editor edit =
411 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
412 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
413 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
414 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
415
416 edit = index_->Edit(kDocumentId1, kSectionId2,
417 /*namespace_id=*/0);
418 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
419 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
420
421 // Merge the index. 2 hits for "foo" will be merged into the main index.
422 ICING_ASSERT_OK(index_->Merge());
423
424 // Insert 2 more hits for "foo". It will be in the lite index.
425 edit = index_->Edit(kDocumentId2, kSectionId2,
426 /*namespace_id=*/0);
427 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
428 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
429
430 edit = index_->Edit(kDocumentId3, kSectionId2,
431 /*namespace_id=*/0);
432 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
433 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
434
435 ICING_ASSERT_OK_AND_ASSIGN(
436 std::unique_ptr<DocHitInfoIterator> itr,
437 index_->GetIterator("foo", /*term_start_index=*/0,
438 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
439 TermMatchType::EXACT_ONLY));
440
441 // Before Advance().
442 EXPECT_THAT(
443 itr->GetCallStats(),
444 EqualsDocHitInfoIteratorCallStats(
445 /*num_leaf_advance_calls_lite_index=*/0,
446 /*num_leaf_advance_calls_main_index=*/0,
447 /*num_leaf_advance_calls_integer_index=*/0,
448 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
449
450 // 1st Advance(). DocHitInfoIteratorOr will advance both left and right
451 // iterator (i.e. lite and main index iterator) once, compare document ids,
452 // and return the hit with larger document id. In this case, hit from lite
453 // index will be chosen and returned.
454 ICING_ASSERT_OK(itr->Advance());
455 EXPECT_THAT(
456 itr->GetCallStats(),
457 EqualsDocHitInfoIteratorCallStats(
458 /*num_leaf_advance_calls_lite_index=*/1,
459 /*num_leaf_advance_calls_main_index=*/1,
460 /*num_leaf_advance_calls_integer_index=*/0,
461 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
462
463 // 2nd Advance(). Since lite index iterator has larger document id in the
464 // previous round, we advance lite index iterator in this round. We still
465 // choose and return hit from lite index.
466 ICING_ASSERT_OK(itr->Advance());
467 EXPECT_THAT(
468 itr->GetCallStats(),
469 EqualsDocHitInfoIteratorCallStats(
470 /*num_leaf_advance_calls_lite_index=*/2,
471 /*num_leaf_advance_calls_main_index=*/1,
472 /*num_leaf_advance_calls_integer_index=*/0,
473 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
474
475 // 3rd Advance(). Since lite index iterator has larger document id in the
476 // previous round, we advance lite index iterator in this round. However,
477 // there is no hit from lite index anymore, so we choose and return hit from
478 // main index.
479 ICING_ASSERT_OK(itr->Advance());
480 EXPECT_THAT(
481 itr->GetCallStats(),
482 EqualsDocHitInfoIteratorCallStats(
483 /*num_leaf_advance_calls_lite_index=*/2,
484 /*num_leaf_advance_calls_main_index=*/1,
485 /*num_leaf_advance_calls_integer_index=*/0,
486 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
487
488 // 4th Advance(). Advance main index.
489 ICING_ASSERT_OK(itr->Advance());
490 EXPECT_THAT(
491 itr->GetCallStats(),
492 EqualsDocHitInfoIteratorCallStats(
493 /*num_leaf_advance_calls_lite_index=*/2,
494 /*num_leaf_advance_calls_main_index=*/2,
495 /*num_leaf_advance_calls_integer_index=*/0,
496 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
497
498 // 5th Advance(). Reach the end.
499 ASSERT_THAT(itr->Advance(),
500 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
501 EXPECT_THAT(
502 itr->GetCallStats(),
503 EqualsDocHitInfoIteratorCallStats(
504 /*num_leaf_advance_calls_lite_index=*/2,
505 /*num_leaf_advance_calls_main_index=*/2,
506 /*num_leaf_advance_calls_integer_index=*/0,
507 /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
508 }
509
TEST_F(IndexTest,SingleHitSingleTermIndex)510 TEST_F(IndexTest, SingleHitSingleTermIndex) {
511 Index::Editor edit =
512 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
513 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
514 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
515
516 ICING_ASSERT_OK_AND_ASSIGN(
517 std::unique_ptr<DocHitInfoIterator> itr,
518 index_->GetIterator("foo", /*term_start_index=*/0,
519 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
520 TermMatchType::EXACT_ONLY));
521 EXPECT_THAT(GetHits(std::move(itr)),
522 ElementsAre(EqualsDocHitInfo(
523 kDocumentId0, std::vector<SectionId>{kSectionId2})));
524 }
525
TEST_F(IndexTest,SingleHitSingleTermIndexAfterMerge)526 TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
527 Index::Editor edit =
528 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
529 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
530 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
531
532 ICING_ASSERT_OK(index_->Merge());
533
534 ICING_ASSERT_OK_AND_ASSIGN(
535 std::unique_ptr<DocHitInfoIterator> itr,
536 index_->GetIterator("foo", /*term_start_index=*/0,
537 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
538 TermMatchType::EXACT_ONLY));
539 EXPECT_THAT(GetHits(std::move(itr)),
540 ElementsAre(EqualsDocHitInfo(
541 kDocumentId0, std::vector<SectionId>{kSectionId2})));
542 }
543
TEST_F(IndexTest,SingleHitSingleTermIndexAfterOptimize)544 TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) {
545 Index::Editor edit =
546 index_->Edit(kDocumentId2, kSectionId2, /*namespace_id=*/0);
547 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
548 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
549 index_->set_last_added_document_id(kDocumentId2);
550
551 ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
552 /*new_last_added_document_id=*/2));
553 EXPECT_THAT(
554 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
555 TermMatchType::EXACT_ONLY),
556 IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
557 kDocumentId2, std::vector<SectionId>{kSectionId2}))));
558 EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
559
560 // Mapping to a different docid will translate the hit
561 ICING_ASSERT_OK(index_->Optimize(
562 /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
563 /*new_last_added_document_id=*/1));
564 EXPECT_THAT(
565 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
566 TermMatchType::EXACT_ONLY),
567 IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
568 kDocumentId1, std::vector<SectionId>{kSectionId2}))));
569 EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
570
571 // Mapping to kInvalidDocumentId will remove the hit.
572 ICING_ASSERT_OK(
573 index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
574 /*new_last_added_document_id=*/0));
575 EXPECT_THAT(
576 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
577 TermMatchType::EXACT_ONLY),
578 IsOkAndHolds(IsEmpty()));
579 EXPECT_EQ(index_->last_added_document_id(), kDocumentId0);
580 }
581
TEST_F(IndexTest,SingleHitSingleTermIndexAfterMergeAndOptimize)582 TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) {
583 Index::Editor edit =
584 index_->Edit(kDocumentId2, kSectionId2, /*namespace_id=*/0);
585 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
586 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
587 index_->set_last_added_document_id(kDocumentId2);
588
589 ICING_ASSERT_OK(index_->Merge());
590
591 ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
592 /*new_last_added_document_id=*/2));
593 EXPECT_THAT(
594 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
595 TermMatchType::EXACT_ONLY),
596 IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
597 kDocumentId2, std::vector<SectionId>{kSectionId2}))));
598 EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
599
600 // Mapping to a different docid will translate the hit
601 ICING_ASSERT_OK(index_->Optimize(
602 /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
603 /*new_last_added_document_id=*/1));
604 EXPECT_THAT(
605 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
606 TermMatchType::EXACT_ONLY),
607 IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
608 kDocumentId1, std::vector<SectionId>{kSectionId2}))));
609 EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
610
611 // Mapping to kInvalidDocumentId will remove the hit.
612 ICING_ASSERT_OK(
613 index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
614 /*new_last_added_document_id=*/0));
615 EXPECT_THAT(
616 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
617 TermMatchType::EXACT_ONLY),
618 IsOkAndHolds(IsEmpty()));
619 EXPECT_EQ(index_->last_added_document_id(), 0);
620 }
621
TEST_F(IndexTest,SingleHitMultiTermIndex)622 TEST_F(IndexTest, SingleHitMultiTermIndex) {
623 Index::Editor edit =
624 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
625 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
626 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
627 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
628
629 ICING_ASSERT_OK_AND_ASSIGN(
630 std::unique_ptr<DocHitInfoIterator> itr,
631 index_->GetIterator("foo", /*term_start_index=*/0,
632 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
633 TermMatchType::EXACT_ONLY));
634 EXPECT_THAT(GetHits(std::move(itr)),
635 ElementsAre(EqualsDocHitInfo(
636 kDocumentId0, std::vector<SectionId>{kSectionId2})));
637 }
638
TEST_F(IndexTest,SingleHitMultiTermIndexAfterMerge)639 TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
640 Index::Editor edit =
641 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
642 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
643 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
644 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
645
646 ICING_ASSERT_OK(index_->Merge());
647
648 ICING_ASSERT_OK_AND_ASSIGN(
649 std::unique_ptr<DocHitInfoIterator> itr,
650 index_->GetIterator("foo", /*term_start_index=*/0,
651 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
652 TermMatchType::EXACT_ONLY));
653 EXPECT_THAT(GetHits(std::move(itr)),
654 ElementsAre(EqualsDocHitInfo(
655 kDocumentId0, std::vector<SectionId>{kSectionId2})));
656 }
657
TEST_F(IndexTest,MultiHitMultiTermIndexAfterOptimize)658 TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) {
659 Index::Editor edit =
660 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
661 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
662 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
663
664 edit = index_->Edit(kDocumentId1, kSectionId2,
665 /*namespace_id=*/0);
666 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
667 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
668
669 edit = index_->Edit(kDocumentId2, kSectionId3,
670 /*namespace_id=*/0);
671 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
672 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
673 index_->set_last_added_document_id(kDocumentId2);
674
675 ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
676 /*new_last_added_document_id=*/2));
677 EXPECT_THAT(
678 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
679 TermMatchType::EXACT_ONLY),
680 IsOkAndHolds(ElementsAre(
681 EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
682 EqualsDocHitInfo(kDocumentId0,
683 std::vector<SectionId>{kSectionId2}))));
684 EXPECT_THAT(
685 GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
686 TermMatchType::EXACT_ONLY),
687 IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
688 kDocumentId1, std::vector<SectionId>{kSectionId2}))));
689 EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
690
691 // Delete document id 1, and document id 2 is translated to 1.
692 ICING_ASSERT_OK(
693 index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
694 /*new_last_added_document_id=*/1));
695 EXPECT_THAT(
696 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
697 TermMatchType::EXACT_ONLY),
698 IsOkAndHolds(ElementsAre(
699 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
700 EqualsDocHitInfo(kDocumentId0,
701 std::vector<SectionId>{kSectionId2}))));
702 EXPECT_THAT(
703 GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
704 TermMatchType::EXACT_ONLY),
705 IsOkAndHolds(IsEmpty()));
706 EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
707
708 // Delete all the rest documents.
709 ICING_ASSERT_OK(index_->Optimize(
710 /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
711 /*new_last_added_document_id=*/kInvalidDocumentId));
712 EXPECT_THAT(
713 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
714 TermMatchType::EXACT_ONLY),
715 IsOkAndHolds(IsEmpty()));
716 EXPECT_THAT(
717 GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
718 TermMatchType::EXACT_ONLY),
719 IsOkAndHolds(IsEmpty()));
720 EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
721 }
722
TEST_F(IndexTest,MultiHitMultiTermIndexAfterMergeAndOptimize)723 TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) {
724 Index::Editor edit =
725 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
726 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
727 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
728
729 edit = index_->Edit(kDocumentId1, kSectionId2,
730 /*namespace_id=*/0);
731 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
732 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
733
734 edit = index_->Edit(kDocumentId2, kSectionId3,
735 /*namespace_id=*/0);
736 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
737 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
738 index_->set_last_added_document_id(kDocumentId2);
739
740 ICING_ASSERT_OK(index_->Merge());
741
742 ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
743 /*new_last_added_document_id=*/2));
744 EXPECT_THAT(
745 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
746 TermMatchType::EXACT_ONLY),
747 IsOkAndHolds(ElementsAre(
748 EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
749 EqualsDocHitInfo(kDocumentId0,
750 std::vector<SectionId>{kSectionId2}))));
751 EXPECT_THAT(
752 GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
753 TermMatchType::EXACT_ONLY),
754 IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
755 kDocumentId1, std::vector<SectionId>{kSectionId2}))));
756 EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
757
758 // Delete document id 1, and document id 2 is translated to 1.
759 ICING_ASSERT_OK(
760 index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
761 /*new_last_added_document_id=*/1));
762 EXPECT_THAT(
763 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
764 TermMatchType::EXACT_ONLY),
765 IsOkAndHolds(ElementsAre(
766 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
767 EqualsDocHitInfo(kDocumentId0,
768 std::vector<SectionId>{kSectionId2}))));
769 EXPECT_THAT(
770 GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
771 TermMatchType::EXACT_ONLY),
772 IsOkAndHolds(IsEmpty()));
773 EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
774
775 // Delete all the rest documents.
776 ICING_ASSERT_OK(index_->Optimize(
777 /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
778 /*new_last_added_document_id=*/kInvalidDocumentId));
779 EXPECT_THAT(
780 GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
781 TermMatchType::EXACT_ONLY),
782 IsOkAndHolds(IsEmpty()));
783 EXPECT_THAT(
784 GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
785 TermMatchType::EXACT_ONLY),
786 IsOkAndHolds(IsEmpty()));
787 EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
788 }
789
TEST_F(IndexTest,NoHitMultiTermIndex)790 TEST_F(IndexTest, NoHitMultiTermIndex) {
791 Index::Editor edit =
792 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
793 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
794 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
795 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
796
797 ICING_ASSERT_OK_AND_ASSIGN(
798 std::unique_ptr<DocHitInfoIterator> itr,
799 index_->GetIterator("baz", /*term_start_index=*/0,
800 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
801 TermMatchType::EXACT_ONLY));
802 EXPECT_THAT(itr->Advance(),
803 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
804 }
805
TEST_F(IndexTest,NoHitMultiTermIndexAfterMerge)806 TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
807 Index::Editor edit =
808 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
809 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
810 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
811 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
812
813 ICING_ASSERT_OK(index_->Merge());
814
815 ICING_ASSERT_OK_AND_ASSIGN(
816 std::unique_ptr<DocHitInfoIterator> itr,
817 index_->GetIterator("baz", /*term_start_index=*/0,
818 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
819 TermMatchType::EXACT_ONLY));
820 EXPECT_THAT(itr->Advance(),
821 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
822 }
823
TEST_F(IndexTest,MultiHitMultiTermIndex)824 TEST_F(IndexTest, MultiHitMultiTermIndex) {
825 Index::Editor edit =
826 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
827 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
828 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
829
830 edit = index_->Edit(kDocumentId1, kSectionId2,
831 /*namespace_id=*/0);
832 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
833 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
834
835 edit = index_->Edit(kDocumentId2, kSectionId3,
836 /*namespace_id=*/0);
837 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
838 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
839
840 ICING_ASSERT_OK_AND_ASSIGN(
841 std::unique_ptr<DocHitInfoIterator> itr,
842 index_->GetIterator("foo", /*term_start_index=*/0,
843 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
844 TermMatchType::EXACT_ONLY));
845 EXPECT_THAT(
846 GetHits(std::move(itr)),
847 ElementsAre(
848 EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
849 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
850 }
851
TEST_F(IndexTest,MultiHitMultiTermIndexAfterMerge)852 TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
853 Index::Editor edit =
854 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
855 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
856 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
857
858 edit = index_->Edit(kDocumentId1, kSectionId2,
859 /*namespace_id=*/0);
860 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
861 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
862
863 edit = index_->Edit(kDocumentId2, kSectionId3,
864 /*namespace_id=*/0);
865 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
866 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
867
868 ICING_ASSERT_OK(index_->Merge());
869
870 ICING_ASSERT_OK_AND_ASSIGN(
871 std::unique_ptr<DocHitInfoIterator> itr,
872 index_->GetIterator("foo", /*term_start_index=*/0,
873 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
874 TermMatchType::EXACT_ONLY));
875 EXPECT_THAT(
876 GetHits(std::move(itr)),
877 ElementsAre(
878 EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
879 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
880 }
881
TEST_F(IndexTest,MultiHitSectionRestrict)882 TEST_F(IndexTest, MultiHitSectionRestrict) {
883 Index::Editor edit =
884 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
885 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
886 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
887
888 edit = index_->Edit(kDocumentId1, kSectionId3,
889 /*namespace_id=*/0);
890 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
891 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
892
893 SectionIdMask desired_section = 1U << kSectionId2;
894 ICING_ASSERT_OK_AND_ASSIGN(
895 std::unique_ptr<DocHitInfoIterator> itr,
896 index_->GetIterator("foo", /*term_start_index=*/0,
897 /*unnormalized_term_length=*/0, desired_section,
898 TermMatchType::EXACT_ONLY));
899 EXPECT_THAT(GetHits(std::move(itr)),
900 ElementsAre(EqualsDocHitInfo(
901 kDocumentId0, std::vector<SectionId>{kSectionId2})));
902 }
903
TEST_F(IndexTest,MultiHitSectionRestrictAfterMerge)904 TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
905 Index::Editor edit =
906 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
907 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
908 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
909
910 edit = index_->Edit(kDocumentId1, kSectionId3,
911 /*namespace_id=*/0);
912 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
913 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
914
915 ICING_ASSERT_OK(index_->Merge());
916
917 SectionIdMask desired_section = 1U << kSectionId2;
918 ICING_ASSERT_OK_AND_ASSIGN(
919 std::unique_ptr<DocHitInfoIterator> itr,
920 index_->GetIterator("foo", /*term_start_index=*/0,
921 /*unnormalized_term_length=*/0, desired_section,
922 TermMatchType::EXACT_ONLY));
923 EXPECT_THAT(GetHits(std::move(itr)),
924 ElementsAre(EqualsDocHitInfo(
925 kDocumentId0, std::vector<SectionId>{kSectionId2})));
926 }
927
TEST_F(IndexTest,SingleHitDedupeIndex)928 TEST_F(IndexTest, SingleHitDedupeIndex) {
929 ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
930 EXPECT_THAT(size, Eq(0));
931 Index::Editor edit =
932 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
933 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
934 ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
935 EXPECT_THAT(size, Gt(0));
936 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
937 ICING_ASSERT_OK_AND_ASSIGN(int64_t new_size, index_->GetElementsSize());
938 EXPECT_THAT(new_size, Eq(size));
939 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
940
941 ICING_ASSERT_OK_AND_ASSIGN(
942 std::unique_ptr<DocHitInfoIterator> itr,
943 index_->GetIterator("foo", /*term_start_index=*/0,
944 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
945 TermMatchType::EXACT_ONLY));
946 EXPECT_THAT(GetHits(std::move(itr)),
947 ElementsAre(EqualsDocHitInfo(
948 kDocumentId0, std::vector<SectionId>{kSectionId2})));
949 }
950
TEST_F(IndexTest,PrefixHit)951 TEST_F(IndexTest, PrefixHit) {
952 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
953 /*namespace_id=*/0);
954 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
955 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
956
957 ICING_ASSERT_OK_AND_ASSIGN(
958 std::unique_ptr<DocHitInfoIterator> itr,
959 index_->GetIterator("foo", /*term_start_index=*/0,
960 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
961 TermMatchType::PREFIX));
962 EXPECT_THAT(GetHits(std::move(itr)),
963 ElementsAre(EqualsDocHitInfo(
964 kDocumentId0, std::vector<SectionId>{kSectionId2})));
965 }
966
TEST_F(IndexTest,PrefixHitAfterMerge)967 TEST_F(IndexTest, PrefixHitAfterMerge) {
968 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
969 /*namespace_id=*/0);
970 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
971 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
972
973 ICING_ASSERT_OK(index_->Merge());
974
975 ICING_ASSERT_OK_AND_ASSIGN(
976 std::unique_ptr<DocHitInfoIterator> itr,
977 index_->GetIterator("foo", /*term_start_index=*/0,
978 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
979 TermMatchType::PREFIX));
980 EXPECT_THAT(GetHits(std::move(itr)),
981 ElementsAre(EqualsDocHitInfo(
982 kDocumentId0, std::vector<SectionId>{kSectionId2})));
983 }
984
TEST_F(IndexTest,MultiPrefixHit)985 TEST_F(IndexTest, MultiPrefixHit) {
986 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
987 /*namespace_id=*/0);
988 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
989 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
990
991 edit = index_->Edit(kDocumentId1, kSectionId3,
992 /*namespace_id=*/0);
993 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
994 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
995
996 ICING_ASSERT_OK_AND_ASSIGN(
997 std::unique_ptr<DocHitInfoIterator> itr,
998 index_->GetIterator("foo", /*term_start_index=*/0,
999 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1000 TermMatchType::PREFIX));
1001 EXPECT_THAT(
1002 GetHits(std::move(itr)),
1003 ElementsAre(
1004 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
1005 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
1006 }
1007
TEST_F(IndexTest,MultiPrefixHitAfterMerge)1008 TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
1009 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1010 /*namespace_id=*/0);
1011 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1012 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1013
1014 edit = index_->Edit(kDocumentId1, kSectionId3,
1015 /*namespace_id=*/0);
1016 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1017 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1018
1019 ICING_ASSERT_OK(index_->Merge());
1020
1021 ICING_ASSERT_OK_AND_ASSIGN(
1022 std::unique_ptr<DocHitInfoIterator> itr,
1023 index_->GetIterator("foo", /*term_start_index=*/0,
1024 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1025 TermMatchType::PREFIX));
1026 EXPECT_THAT(
1027 GetHits(std::move(itr)),
1028 ElementsAre(
1029 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
1030 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
1031 }
1032
TEST_F(IndexTest,NoExactHitInPrefixQuery)1033 TEST_F(IndexTest, NoExactHitInPrefixQuery) {
1034 Index::Editor edit =
1035 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1036 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1037 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1038
1039 edit = index_->Edit(kDocumentId1, kSectionId3,
1040 /*namespace_id=*/0);
1041 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1042 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1043
1044 ICING_ASSERT_OK_AND_ASSIGN(
1045 std::unique_ptr<DocHitInfoIterator> itr,
1046 index_->GetIterator("foo", /*term_start_index=*/0,
1047 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1048 TermMatchType::PREFIX));
1049 EXPECT_THAT(GetHits(std::move(itr)),
1050 ElementsAre(EqualsDocHitInfo(
1051 kDocumentId1, std::vector<SectionId>{kSectionId3})));
1052 }
1053
TEST_F(IndexTest,NoExactHitInPrefixQueryAfterMerge)1054 TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
1055 Index::Editor edit =
1056 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1057 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1058 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1059
1060 edit = index_->Edit(kDocumentId1, kSectionId3,
1061 /*namespace_id=*/0);
1062 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1063 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1064
1065 ICING_ASSERT_OK(index_->Merge());
1066
1067 ICING_ASSERT_OK_AND_ASSIGN(
1068 std::unique_ptr<DocHitInfoIterator> itr,
1069 index_->GetIterator("foo", /*term_start_index=*/0,
1070 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1071 TermMatchType::PREFIX));
1072 EXPECT_THAT(GetHits(std::move(itr)),
1073 ElementsAre(EqualsDocHitInfo(
1074 kDocumentId1, std::vector<SectionId>{kSectionId3})));
1075 }
1076
TEST_F(IndexTest,PrefixHitDedupe)1077 TEST_F(IndexTest, PrefixHitDedupe) {
1078 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1079 /*namespace_id=*/0);
1080 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1081 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1082 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1083
1084 ICING_ASSERT_OK_AND_ASSIGN(
1085 std::unique_ptr<DocHitInfoIterator> itr,
1086 index_->GetIterator("foo", /*term_start_index=*/0,
1087 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1088 TermMatchType::PREFIX));
1089 EXPECT_THAT(GetHits(std::move(itr)),
1090 ElementsAre(EqualsDocHitInfo(
1091 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1092 }
1093
TEST_F(IndexTest,PrefixHitDedupeAfterMerge)1094 TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
1095 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1096 /*namespace_id=*/0);
1097 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1098 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1099 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1100
1101 ICING_ASSERT_OK(index_->Merge());
1102
1103 ICING_ASSERT_OK_AND_ASSIGN(
1104 std::unique_ptr<DocHitInfoIterator> itr,
1105 index_->GetIterator("foo", /*term_start_index=*/0,
1106 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1107 TermMatchType::PREFIX));
1108 EXPECT_THAT(GetHits(std::move(itr)),
1109 ElementsAre(EqualsDocHitInfo(
1110 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1111 }
1112
TEST_F(IndexTest,PrefixToString)1113 TEST_F(IndexTest, PrefixToString) {
1114 SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
1115 ICING_ASSERT_OK_AND_ASSIGN(
1116 std::unique_ptr<DocHitInfoIterator> itr,
1117 index_->GetIterator("foo", /*term_start_index=*/0,
1118 /*unnormalized_term_length=*/0, id_mask,
1119 TermMatchType::PREFIX));
1120 EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1121 "000000000000000001100:foo* OR "
1122 "00000000000000000000000000000000000000000000"
1123 "00000000000000001100:foo*)"));
1124
1125 ICING_ASSERT_OK_AND_ASSIGN(
1126 itr, index_->GetIterator("foo", /*term_start_index=*/0,
1127 /*unnormalized_term_length=*/0,
1128 kSectionIdMaskAll, TermMatchType::PREFIX));
1129 EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
1130 "111111111111111111111:foo* OR "
1131 "11111111111111111111111111111111111111111111"
1132 "11111111111111111111:foo*)"));
1133
1134 ICING_ASSERT_OK_AND_ASSIGN(
1135 itr, index_->GetIterator("foo", /*term_start_index=*/0,
1136 /*unnormalized_term_length=*/0,
1137 kSectionIdMaskNone, TermMatchType::PREFIX));
1138 EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1139 "000000000000000000000:foo* OR "
1140 "00000000000000000000000000000000000000000000"
1141 "00000000000000000000:foo*)"));
1142 }
1143
TEST_F(IndexTest,ExactToString)1144 TEST_F(IndexTest, ExactToString) {
1145 SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
1146 ICING_ASSERT_OK_AND_ASSIGN(
1147 std::unique_ptr<DocHitInfoIterator> itr,
1148 index_->GetIterator("foo", /*term_start_index=*/0,
1149 /*unnormalized_term_length=*/0, id_mask,
1150 TermMatchType::EXACT_ONLY));
1151 EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1152 "000000000000000001100:foo OR "
1153 "00000000000000000000000000000000000000000000"
1154 "00000000000000001100:foo)"));
1155
1156 ICING_ASSERT_OK_AND_ASSIGN(
1157 itr, index_->GetIterator("foo", /*term_start_index=*/0,
1158 /*unnormalized_term_length=*/0,
1159 kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
1160 EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
1161 "111111111111111111111:foo OR "
1162 "11111111111111111111111111111111111111111111"
1163 "11111111111111111111:foo)"));
1164
1165 ICING_ASSERT_OK_AND_ASSIGN(
1166 itr, index_->GetIterator("foo", /*term_start_index=*/0,
1167 /*unnormalized_term_length=*/0,
1168 kSectionIdMaskNone, TermMatchType::EXACT_ONLY));
1169 EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1170 "000000000000000000000:foo OR "
1171 "00000000000000000000000000000000000000000000"
1172 "00000000000000000000:foo)"));
1173 }
1174
TEST_F(IndexTest,NonAsciiTerms)1175 TEST_F(IndexTest, NonAsciiTerms) {
1176 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1177 /*namespace_id=*/0);
1178 ASSERT_THAT(edit.BufferTerm("こんにちは", TermMatchType::PREFIX), IsOk());
1179 ASSERT_THAT(edit.BufferTerm("あなた", TermMatchType::PREFIX), IsOk());
1180 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1181
1182 ICING_ASSERT_OK_AND_ASSIGN(
1183 std::unique_ptr<DocHitInfoIterator> itr,
1184 index_->GetIterator("こんに", /*term_start_index=*/0,
1185 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1186 TermMatchType::PREFIX));
1187 EXPECT_THAT(GetHits(std::move(itr)),
1188 ElementsAre(EqualsDocHitInfo(
1189 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1190
1191 ICING_ASSERT_OK_AND_ASSIGN(
1192 itr, index_->GetIterator("あなた", /*term_start_index=*/0,
1193 /*unnormalized_term_length=*/0,
1194 kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
1195 EXPECT_THAT(GetHits(std::move(itr)),
1196 ElementsAre(EqualsDocHitInfo(
1197 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1198 }
1199
TEST_F(IndexTest,NonAsciiTermsAfterMerge)1200 TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
1201 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1202 /*namespace_id=*/0);
1203 ASSERT_THAT(edit.BufferTerm("こんにちは", TermMatchType::PREFIX), IsOk());
1204 ASSERT_THAT(edit.BufferTerm("あなた", TermMatchType::PREFIX), IsOk());
1205 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1206
1207 ICING_ASSERT_OK(index_->Merge());
1208
1209 ICING_ASSERT_OK_AND_ASSIGN(
1210 std::unique_ptr<DocHitInfoIterator> itr,
1211 index_->GetIterator("こんに", /*term_start_index=*/0,
1212 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1213 TermMatchType::PREFIX));
1214 EXPECT_THAT(GetHits(std::move(itr)),
1215 ElementsAre(EqualsDocHitInfo(
1216 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1217
1218 ICING_ASSERT_OK_AND_ASSIGN(
1219 itr, index_->GetIterator("あなた", /*term_start_index=*/0,
1220 /*unnormalized_term_length=*/0,
1221 kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
1222 EXPECT_THAT(GetHits(std::move(itr)),
1223 ElementsAre(EqualsDocHitInfo(
1224 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1225 }
1226
TEST_F(IndexTest,FullIndex)1227 TEST_F(IndexTest, FullIndex) {
1228 // Make a smaller index so that it's easier to fill up.
1229 Index::Options options(index_dir_, /*index_merge_size=*/1024,
1230 /*lite_index_sort_at_indexing=*/true,
1231 /*lite_index_sort_size=*/64);
1232 ICING_ASSERT_OK_AND_ASSIGN(
1233 index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1234
1235 std::default_random_engine random;
1236 std::vector<std::string> query_terms;
1237 std::string prefix = "prefix";
1238 for (int i = 0; i < 2600; ++i) {
1239 constexpr int kTokenSize = 5;
1240 query_terms.push_back(prefix +
1241 RandomString(kAlNumAlphabet, kTokenSize, &random));
1242 }
1243
1244 DocumentId document_id = 0;
1245 libtextclassifier3::Status status = libtextclassifier3::Status::OK;
1246 std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
1247 while (status.ok()) {
1248 for (int i = 0; i < 100; ++i) {
1249 Index::Editor edit = index_->Edit(document_id, kSectionId2,
1250 /*namespace_id=*/0);
1251 size_t idx = uniform(random);
1252 status =
1253 edit.BufferTerm(query_terms.at(idx).c_str(), TermMatchType::PREFIX);
1254 if (!status.ok()) {
1255 break;
1256 }
1257 status = edit.IndexAllBufferedTerms();
1258 if (!status.ok()) {
1259 break;
1260 }
1261 }
1262 ++document_id;
1263 }
1264
1265 // Adding more hits should fail.
1266 Index::Editor edit = index_->Edit(document_id + 1, kSectionId2,
1267 /*namespace_id=*/0);
1268 std::string term = prefix + "foo";
1269 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1270 term = prefix + "bar";
1271 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1272 term = prefix + "baz";
1273 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1274 EXPECT_THAT(edit.IndexAllBufferedTerms(),
1275 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1276
1277 for (int i = 0; i < query_terms.size(); i += 25) {
1278 ICING_ASSERT_OK_AND_ASSIGN(
1279 std::unique_ptr<DocHitInfoIterator> itr,
1280 index_->GetIterator(query_terms.at(i).c_str(), /*term_start_index=*/0,
1281 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1282 TermMatchType::PREFIX));
1283 // Each query term should contain at least one hit - there may have been
1284 // other hits for this term that were added.
1285 EXPECT_THAT(itr->Advance(), IsOk());
1286 }
1287 ICING_ASSERT_OK_AND_ASSIGN(
1288 std::unique_ptr<DocHitInfoIterator> last_itr,
1289 index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
1290 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1291 TermMatchType::PREFIX));
1292 EXPECT_THAT(last_itr->Advance(), IsOk());
1293 EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
1294 }
1295
TEST_F(IndexTest,FullIndexMerge)1296 TEST_F(IndexTest, FullIndexMerge) {
1297 // Make a smaller index so that it's easier to fill up.
1298 Index::Options options(index_dir_, /*index_merge_size=*/1024,
1299 /*lite_index_sort_at_indexing=*/true,
1300 /*lite_index_sort_size=*/64);
1301 ICING_ASSERT_OK_AND_ASSIGN(
1302 index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1303
1304 std::default_random_engine random;
1305 std::vector<std::string> query_terms;
1306 std::string prefix = "prefix";
1307 for (int i = 0; i < 2600; ++i) {
1308 constexpr int kTokenSize = 5;
1309 query_terms.push_back(prefix +
1310 RandomString(kAlNumAlphabet, kTokenSize, &random));
1311 }
1312
1313 DocumentId document_id = 0;
1314 libtextclassifier3::Status status = libtextclassifier3::Status::OK;
1315 std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
1316 while (status.ok()) {
1317 for (int i = 0; i < 100; ++i) {
1318 Index::Editor edit = index_->Edit(document_id, kSectionId2,
1319 /*namespace_id=*/0);
1320 size_t idx = uniform(random);
1321 status =
1322 edit.BufferTerm(query_terms.at(idx).c_str(), TermMatchType::PREFIX);
1323 if (!status.ok()) {
1324 break;
1325 }
1326 status = edit.IndexAllBufferedTerms();
1327 if (!status.ok()) {
1328 break;
1329 }
1330 }
1331 ++document_id;
1332 }
1333 EXPECT_THAT(status,
1334 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1335
1336 // Adding more hits should fail.
1337 Index::Editor edit = index_->Edit(document_id + 1, kSectionId2,
1338 /*namespace_id=*/0);
1339 std::string term = prefix + "foo";
1340 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1341 term = prefix + "bar";
1342 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1343 term = prefix + "baz";
1344 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1345 EXPECT_THAT(edit.IndexAllBufferedTerms(),
1346 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1347 ICING_ASSERT_OK_AND_ASSIGN(
1348 std::unique_ptr<DocHitInfoIterator> last_itr,
1349 index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
1350 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1351 TermMatchType::PREFIX));
1352 EXPECT_THAT(last_itr->Advance(), IsOk());
1353 EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
1354
1355 // After merging with the main index. Adding more hits should succeed now.
1356 ICING_ASSERT_OK(index_->Merge());
1357 edit = index_->Edit(document_id + 1, kSectionId2, 0);
1358 prefix + "foo";
1359 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1360 term = prefix + "bar";
1361 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1362 term = prefix + "baz";
1363 EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1364 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1365 ICING_ASSERT_OK_AND_ASSIGN(
1366 std::unique_ptr<DocHitInfoIterator> itr,
1367 index_->GetIterator(prefix + "bar", /*term_start_index=*/0,
1368 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1369 TermMatchType::EXACT_ONLY));
1370 // We know that "bar" should have at least one hit because we just added it!
1371 EXPECT_THAT(itr->Advance(), IsOk());
1372 EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
1373 ICING_ASSERT_OK_AND_ASSIGN(
1374 last_itr, index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
1375 /*unnormalized_term_length=*/0,
1376 kSectionIdMaskAll, TermMatchType::PREFIX));
1377 EXPECT_THAT(last_itr->Advance(), IsOk());
1378 EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id + 1));
1379 }
1380
TEST_F(IndexTest,OptimizeShouldWorkForEmptyIndex)1381 TEST_F(IndexTest, OptimizeShouldWorkForEmptyIndex) {
1382 // Optimize an empty index should succeed, but have no effects.
1383 ICING_ASSERT_OK(
1384 index_->Optimize(std::vector<DocumentId>(),
1385 /*new_last_added_document_id=*/kInvalidDocumentId));
1386 EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
1387
1388 ICING_ASSERT_OK_AND_ASSIGN(
1389 std::unique_ptr<DocHitInfoIterator> itr,
1390 index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
1391 /*unnormalized_term_length=*/0,
1392 TermMatchType::EXACT_ONLY));
1393 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
1394
1395 ICING_ASSERT_OK_AND_ASSIGN(
1396 itr, index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
1397 /*unnormalized_term_length=*/0,
1398 TermMatchType::PREFIX));
1399 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
1400 }
1401
TEST_F(IndexTest,IndexShouldWorkAtSectionLimit)1402 TEST_F(IndexTest, IndexShouldWorkAtSectionLimit) {
1403 std::string prefix = "prefix";
1404 std::default_random_engine random;
1405 std::vector<std::string> query_terms;
1406 // Add 2048 hits to main index, and 2048 hits to lite index.
1407 for (int i = 0; i < 4096; ++i) {
1408 if (i == 1024) {
1409 ICING_ASSERT_OK(index_->Merge());
1410 }
1411 // Generate a unique term for document i.
1412 query_terms.push_back(prefix + RandomString("abcdefg", 5, &random) +
1413 std::to_string(i));
1414 TermMatchType::Code term_match_type = TermMatchType::PREFIX;
1415 SectionId section_id = i % 64;
1416 if (section_id == 2) {
1417 // Make section 2 an exact section.
1418 term_match_type = TermMatchType::EXACT_ONLY;
1419 }
1420 Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
1421 /*namespace_id=*/0);
1422 ICING_ASSERT_OK(
1423 edit.BufferTerm(query_terms.at(i).c_str(), term_match_type));
1424 ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
1425 }
1426
1427 std::vector<DocHitInfo> exp_prefix_hits;
1428 for (int i = 0; i < 4096; ++i) {
1429 if (i % 64 == 2) {
1430 // Section 2 is an exact section, so we should not see any hits in
1431 // prefix search.
1432 continue;
1433 }
1434 exp_prefix_hits.push_back(DocHitInfo(i));
1435 exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
1436 }
1437 std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
1438
1439 // Check prefix search.
1440 ICING_ASSERT_OK_AND_ASSIGN(
1441 std::vector<DocHitInfo> hits,
1442 GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
1443 TermMatchType::PREFIX));
1444 EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1445
1446 // Check exact search.
1447 for (int i = 0; i < 4096; ++i) {
1448 ICING_ASSERT_OK_AND_ASSIGN(
1449 hits,
1450 GetHits(query_terms[i], /*term_start_index=*/0,
1451 /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1452 EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1453 i, std::vector<SectionId>{(SectionId)(i % 64)})));
1454 }
1455 }
1456
1457 // Skip this test on Android because of timeout.
1458 #if !defined(__ANDROID__)
TEST_F(IndexTest,IndexShouldWorkAtDocumentLimit)1459 TEST_F(IndexTest, IndexShouldWorkAtDocumentLimit) {
1460 std::string prefix = "pre";
1461 std::default_random_engine random;
1462 const int max_lite_index_size = 1024 * 1024 / 8;
1463 int lite_index_size = 0;
1464 for (int i = 0; i <= kMaxDocumentId; ++i) {
1465 if (i % max_lite_index_size == 0 && i != 0) {
1466 ICING_ASSERT_OK(index_->Merge());
1467 lite_index_size = 0;
1468 }
1469 std::string term;
1470 TermMatchType::Code term_match_type = TermMatchType::PREFIX;
1471 SectionId section_id = i % 64;
1472 if (section_id == 2) {
1473 // Make section 2 an exact section.
1474 term_match_type = TermMatchType::EXACT_ONLY;
1475 term = std::to_string(i);
1476 } else {
1477 term = prefix + RandomString("abcd", 5, &random);
1478 }
1479 Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
1480 /*namespace_id=*/0);
1481 ICING_ASSERT_OK(edit.BufferTerm(term.c_str(), term_match_type));
1482 ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
1483 ++lite_index_size;
1484 index_->set_last_added_document_id(i);
1485 }
1486 // Ensure that the lite index still contains some data to better test both
1487 // indexes.
1488 ASSERT_THAT(lite_index_size, Eq(max_lite_index_size - 1));
1489 EXPECT_EQ(index_->last_added_document_id(), kMaxDocumentId);
1490
1491 std::vector<DocHitInfo> exp_prefix_hits;
1492 for (int i = 0; i <= kMaxDocumentId; ++i) {
1493 if (i % 64 == 2) {
1494 // Section 2 is an exact section, so we should not see any hits in
1495 // prefix search.
1496 continue;
1497 }
1498 exp_prefix_hits.push_back(DocHitInfo(i));
1499 exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
1500 }
1501 std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
1502
1503 // Check prefix search.
1504 ICING_ASSERT_OK_AND_ASSIGN(
1505 std::vector<DocHitInfo> hits,
1506 GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
1507 TermMatchType::PREFIX));
1508 EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1509
1510 // Check exact search.
1511 for (int i = 0; i <= kMaxDocumentId; ++i) {
1512 if (i % 64 == 2) {
1513 // Only section 2 is an exact section
1514 ICING_ASSERT_OK_AND_ASSIGN(
1515 hits,
1516 GetHits(std::to_string(i), /*term_start_index=*/0,
1517 /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1518 EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1519 i, std::vector<SectionId>{(SectionId)(2)})));
1520 }
1521 }
1522 }
1523 #endif // if !defined(__ANDROID__)
1524
TEST_F(IndexTest,IndexOptimize)1525 TEST_F(IndexTest, IndexOptimize) {
1526 std::string prefix = "prefix";
1527 std::default_random_engine random;
1528 std::vector<std::string> query_terms;
1529 // Add 1024 hits to main index, and 1024 hits to lite index.
1530 for (int i = 0; i < 2048; ++i) {
1531 if (i == 1024) {
1532 ICING_ASSERT_OK(index_->Merge());
1533 }
1534 // Generate a unique term for document i.
1535 query_terms.push_back(prefix + RandomString("abcdefg", 5, &random) +
1536 std::to_string(i));
1537 TermMatchType::Code term_match_type = TermMatchType::PREFIX;
1538 SectionId section_id = i % 64;
1539 if (section_id == 2) {
1540 // Make section 2 an exact section.
1541 term_match_type = TermMatchType::EXACT_ONLY;
1542 }
1543 Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
1544 /*namespace_id=*/0);
1545 ICING_ASSERT_OK(
1546 edit.BufferTerm(query_terms.at(i).c_str(), term_match_type));
1547 ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
1548 index_->set_last_added_document_id(i);
1549 }
1550
1551 // Delete one document for every three documents.
1552 DocumentId document_id = 0;
1553 DocumentId new_last_added_document_id = kInvalidDocumentId;
1554 std::vector<DocumentId> document_id_old_to_new;
1555 for (int i = 0; i < 2048; ++i) {
1556 if (i % 3 == 0) {
1557 document_id_old_to_new.push_back(kInvalidDocumentId);
1558 } else {
1559 new_last_added_document_id = document_id++;
1560 document_id_old_to_new.push_back(new_last_added_document_id);
1561 }
1562 }
1563
1564 std::vector<DocHitInfo> exp_prefix_hits;
1565 for (int i = 0; i < 2048; ++i) {
1566 if (document_id_old_to_new[i] == kInvalidDocumentId) {
1567 continue;
1568 }
1569 if (i % 64 == 2) {
1570 // Section 2 is an exact section, so we should not see any hits in
1571 // prefix search.
1572 continue;
1573 }
1574 exp_prefix_hits.push_back(DocHitInfo(document_id_old_to_new[i]));
1575 exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
1576 }
1577 std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
1578
1579 // Check that optimize is correct
1580 ICING_ASSERT_OK(
1581 index_->Optimize(document_id_old_to_new, new_last_added_document_id));
1582 EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
1583 // Check prefix search.
1584 ICING_ASSERT_OK_AND_ASSIGN(
1585 std::vector<DocHitInfo> hits,
1586 GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
1587 TermMatchType::PREFIX));
1588 EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1589 // Check exact search.
1590 for (int i = 0; i < 2048; ++i) {
1591 ICING_ASSERT_OK_AND_ASSIGN(
1592 hits,
1593 GetHits(query_terms[i], /*term_start_index=*/0,
1594 /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1595 if (document_id_old_to_new[i] == kInvalidDocumentId) {
1596 EXPECT_THAT(hits, IsEmpty());
1597 } else {
1598 EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1599 document_id_old_to_new[i],
1600 std::vector<SectionId>{(SectionId)(i % 64)})));
1601 }
1602 }
1603
1604 // Check that optimize does not block merge.
1605 ICING_ASSERT_OK(index_->Merge());
1606 EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
1607 // Check prefix search.
1608 ICING_ASSERT_OK_AND_ASSIGN(
1609 hits, GetHits(prefix, /*term_start_index=*/0,
1610 /*unnormalized_term_length=*/0, TermMatchType::PREFIX));
1611 EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1612 // Check exact search.
1613 for (int i = 0; i < 2048; ++i) {
1614 ICING_ASSERT_OK_AND_ASSIGN(
1615 hits,
1616 GetHits(query_terms[i], /*term_start_index=*/0,
1617 /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1618 if (document_id_old_to_new[i] == kInvalidDocumentId) {
1619 EXPECT_THAT(hits, IsEmpty());
1620 } else {
1621 EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1622 document_id_old_to_new[i],
1623 std::vector<SectionId>{(SectionId)(i % 64)})));
1624 }
1625 }
1626 }
1627
TEST_F(IndexTest,IndexCreateIOFailure)1628 TEST_F(IndexTest, IndexCreateIOFailure) {
1629 // Create the index with mock filesystem. By default, Mock will return false,
1630 // so the first attempted file operation will fail.
1631 NiceMock<IcingMockFilesystem> mock_icing_filesystem;
1632 ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
1633 .WillByDefault(Return(false));
1634 Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1635 /*lite_index_sort_at_indexing=*/true,
1636 /*lite_index_sort_size=*/1024 * 8);
1637 EXPECT_THAT(Index::Create(options, &filesystem_, &mock_icing_filesystem),
1638 StatusIs(libtextclassifier3::StatusCode::INTERNAL));
1639 }
1640
TEST_F(IndexTest,IndexCreateCorruptionFailure)1641 TEST_F(IndexTest, IndexCreateCorruptionFailure) {
1642 // Add some content to the index
1643 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1644 /*namespace_id=*/0);
1645 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1646 ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1647 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1648
1649 // Close the index.
1650 index_.reset();
1651
1652 // Corrrupt the index file.
1653 std::string hit_buffer_filename = index_dir_ + "/idx/lite.hb";
1654 ScopedFd sfd(icing_filesystem_.OpenForWrite(hit_buffer_filename.c_str()));
1655 ASSERT_THAT(sfd.is_valid(), IsTrue());
1656
1657 constexpr std::string_view kCorruptBytes = "ffffffffffffffffffffff";
1658 // The first page of the hit_buffer is taken up by the header. Overwrite the
1659 // first page of content.
1660 int hit_buffer_start_offset = GetBlockSize();
1661 ASSERT_THAT(
1662 icing_filesystem_.PWrite(sfd.get(), hit_buffer_start_offset,
1663 kCorruptBytes.data(), kCorruptBytes.length()),
1664 IsTrue());
1665
1666 // Recreate the index.
1667 Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1668 /*lite_index_sort_at_indexing=*/true,
1669 /*lite_index_sort_size=*/1024 * 8);
1670 EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
1671 StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
1672 }
1673
TEST_F(IndexTest,UpdateChecksum)1674 TEST_F(IndexTest, UpdateChecksum) {
1675 // Add some content to the index
1676 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1677 /*namespace_id=*/0);
1678 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1679 ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1680 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1681 Crc32 lite_only_crc = index_->GetChecksum();
1682 EXPECT_THAT(index_->UpdateChecksum(), Eq(lite_only_crc));
1683 EXPECT_THAT(index_->GetChecksum(), Eq(lite_only_crc));
1684
1685 // Merge content into the main index.
1686 ASSERT_THAT(index_->Merge(), IsOk());
1687 Crc32 main_only_crc = index_->GetChecksum();
1688 EXPECT_THAT(main_only_crc, Not(Eq(lite_only_crc)));
1689 EXPECT_THAT(index_->UpdateChecksum(), Eq(main_only_crc));
1690 EXPECT_THAT(index_->GetChecksum(), Eq(main_only_crc));
1691
1692 // Add some more content to the lite index
1693 edit = index_->Edit(kDocumentId1, kSectionId2,
1694 /*namespace_id=*/0);
1695 ASSERT_THAT(edit.BufferTerm("baz", TermMatchType::PREFIX), IsOk());
1696 ASSERT_THAT(edit.BufferTerm("bat", TermMatchType::PREFIX), IsOk());
1697 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1698 Crc32 both_crc = index_->GetChecksum();
1699 EXPECT_THAT(both_crc, Not(Eq(lite_only_crc)));
1700 EXPECT_THAT(both_crc, Not(Eq(main_only_crc)));
1701 EXPECT_THAT(index_->UpdateChecksum(), Eq(both_crc));
1702 EXPECT_THAT(index_->GetChecksum(), Eq(both_crc));
1703 }
1704
TEST_F(IndexTest,IndexPersistence)1705 TEST_F(IndexTest, IndexPersistence) {
1706 // Add some content to the index
1707 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1708 /*namespace_id=*/0);
1709 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1710 ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1711 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1712 EXPECT_THAT(index_->PersistToDisk(), IsOk());
1713
1714 // Close the index.
1715 index_.reset();
1716
1717 // Recreate the index.
1718 Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1719 /*lite_index_sort_at_indexing=*/true,
1720 /*lite_index_sort_size=*/1024 * 8);
1721 ICING_ASSERT_OK_AND_ASSIGN(
1722 index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1723
1724 // Check that the hits are present.
1725 ICING_ASSERT_OK_AND_ASSIGN(
1726 std::unique_ptr<DocHitInfoIterator> itr,
1727 index_->GetIterator("f", /*term_start_index=*/0,
1728 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1729 TermMatchType::PREFIX));
1730 EXPECT_THAT(GetHits(std::move(itr)),
1731 ElementsAre(EqualsDocHitInfo(
1732 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1733 }
1734
TEST_F(IndexTest,IndexPersistenceAfterMerge)1735 TEST_F(IndexTest, IndexPersistenceAfterMerge) {
1736 // Add some content to the index
1737 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1738 /*namespace_id=*/0);
1739 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1740 ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1741 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1742 ICING_ASSERT_OK(index_->Merge());
1743 EXPECT_THAT(index_->PersistToDisk(), IsOk());
1744
1745 // Close the index.
1746 index_.reset();
1747
1748 // Recreate the index.
1749 Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1750 /*lite_index_sort_at_indexing=*/true,
1751 /*lite_index_sort_size=*/1024 * 8);
1752 ICING_ASSERT_OK_AND_ASSIGN(
1753 index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1754
1755 // Check that the hits are present.
1756 ICING_ASSERT_OK_AND_ASSIGN(
1757 std::unique_ptr<DocHitInfoIterator> itr,
1758 index_->GetIterator("f", /*term_start_index=*/0,
1759 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1760 TermMatchType::PREFIX));
1761 EXPECT_THAT(GetHits(std::move(itr)),
1762 ElementsAre(EqualsDocHitInfo(
1763 kDocumentId0, std::vector<SectionId>{kSectionId2})));
1764 }
1765
TEST_F(IndexTest,InvalidHitBufferSize)1766 TEST_F(IndexTest, InvalidHitBufferSize) {
1767 Index::Options options(
1768 index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max(),
1769 /*lite_index_sort_at_indexing=*/true, /*lite_index_sort_size=*/1024 * 8);
1770 EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
1771 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
1772 }
1773
TEST_F(IndexTest,FindTermByPrefixShouldReturnEmpty)1774 TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
1775 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1776 /*namespace_id=*/0);
1777 AlwaysTrueSuggestionResultCheckerImpl impl;
1778 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1779 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1780
1781 EXPECT_THAT(
1782 index_->FindTermsByPrefix(
1783 /*prefix=*/"foo", /*num_to_return=*/0, TermMatchType::PREFIX,
1784 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1785 &impl),
1786 IsOkAndHolds(IsEmpty()));
1787 EXPECT_THAT(
1788 index_->FindTermsByPrefix(
1789 /*prefix=*/"foo", /*num_to_return=*/-1, TermMatchType::PREFIX,
1790 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1791 &impl),
1792 IsOkAndHolds(IsEmpty()));
1793
1794 ICING_ASSERT_OK(index_->Merge());
1795
1796 EXPECT_THAT(
1797 index_->FindTermsByPrefix(
1798 /*prefix=*/"foo", /*num_to_return=*/0, TermMatchType::PREFIX,
1799 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1800 &impl),
1801 IsOkAndHolds(IsEmpty()));
1802 EXPECT_THAT(
1803 index_->FindTermsByPrefix(
1804 /*prefix=*/"foo", /*num_to_return=*/-1, TermMatchType::PREFIX,
1805 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1806 &impl),
1807 IsOkAndHolds(IsEmpty()));
1808 }
1809
TEST_F(IndexTest,FindTermByPrefixShouldReturnCorrectResult)1810 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
1811 Index::Editor edit =
1812 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1813 AlwaysTrueSuggestionResultCheckerImpl impl;
1814 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1815 EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
1816 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1817
1818 // "b" should only match "bar" but not "foo".
1819 EXPECT_THAT(
1820 index_->FindTermsByPrefix(
1821 /*prefix=*/"b", /*num_to_return=*/10, TermMatchType::PREFIX,
1822 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1823 &impl),
1824 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
1825
1826 ICING_ASSERT_OK(index_->Merge());
1827
1828 // "b" should only match "bar" but not "foo".
1829 EXPECT_THAT(
1830 index_->FindTermsByPrefix(
1831 /*prefix=*/"b", /*num_to_return=*/10, TermMatchType::PREFIX,
1832 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1833 &impl),
1834 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
1835 }
1836
TEST_F(IndexTest,FindTermByPrefixShouldRespectNumToReturn)1837 TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
1838 Index::Editor edit =
1839 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1840 AlwaysTrueSuggestionResultCheckerImpl impl;
1841 EXPECT_THAT(edit.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
1842 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1843 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1844 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1845
1846 // We have 3 results but only 2 should be returned.
1847 EXPECT_THAT(
1848 index_->FindTermsByPrefix(
1849 /*prefix=*/"f", /*num_to_return=*/2, TermMatchType::PREFIX,
1850 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1851 &impl),
1852 IsOkAndHolds(SizeIs(2)));
1853
1854 ICING_ASSERT_OK(index_->Merge());
1855
1856 // We have 3 results but only 2 should be returned.
1857 EXPECT_THAT(
1858 index_->FindTermsByPrefix(
1859 /*prefix=*/"f", /*num_to_return=*/2, TermMatchType::PREFIX,
1860 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1861 &impl),
1862 IsOkAndHolds(SizeIs(2)));
1863 }
1864
TEST_F(IndexTest,FindTermByPrefixShouldReturnTermsInAllNamespaces)1865 TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
1866 Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
1867 /*namespace_id=*/0);
1868 AlwaysTrueSuggestionResultCheckerImpl impl;
1869 EXPECT_THAT(edit1.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
1870 EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
1871
1872 Index::Editor edit2 = index_->Edit(kDocumentId1, kSectionId2,
1873 /*namespace_id=*/1);
1874 EXPECT_THAT(edit2.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1875 EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
1876
1877 Index::Editor edit3 = index_->Edit(kDocumentId2, kSectionId2,
1878 /*namespace_id=*/2);
1879 EXPECT_THAT(edit3.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1880 EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
1881
1882 // Should return "fo", "foo" and "fool" across all namespaces.
1883 EXPECT_THAT(
1884 index_->FindTermsByPrefix(
1885 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1886 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1887 &impl),
1888 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
1889 EqualsTermMetadata("foo", 1),
1890 EqualsTermMetadata("fool", 1))));
1891
1892 ICING_ASSERT_OK(index_->Merge());
1893
1894 // Should return "fo", "foo" and "fool" across all namespaces.
1895 EXPECT_THAT(
1896 index_->FindTermsByPrefix(
1897 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1898 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1899 &impl),
1900 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
1901 EqualsTermMetadata("foo", 1),
1902 EqualsTermMetadata("fool", 1))));
1903 }
1904
TEST_F(IndexTest,FindTermByPrefixShouldReturnCorrectHitCount)1905 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
1906 Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
1907 /*namespace_id=*/0);
1908 AlwaysTrueSuggestionResultCheckerImpl impl;
1909 EXPECT_THAT(edit1.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1910 EXPECT_THAT(edit1.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1911 EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
1912
1913 Index::Editor edit2 = index_->Edit(kDocumentId1, kSectionId2,
1914 /*namespace_id=*/0);
1915 EXPECT_THAT(edit2.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1916 EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
1917
1918 // 'foo' has 1 hit, 'fool' has 2 hits.
1919 EXPECT_THAT(
1920 index_->FindTermsByPrefix(
1921 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1922 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1923 &impl),
1924 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
1925 EqualsTermMetadata("foo", 1))));
1926
1927 ICING_ASSERT_OK(index_->Merge());
1928
1929 EXPECT_THAT(
1930 index_->FindTermsByPrefix(
1931 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1932 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1933 &impl),
1934 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
1935 EqualsTermMetadata("foo", 1))));
1936 }
1937
TEST_F(IndexTest,FindTermByPrefixMultipleHitBatch)1938 TEST_F(IndexTest, FindTermByPrefixMultipleHitBatch) {
1939 AlwaysTrueSuggestionResultCheckerImpl impl;
1940 // Create multiple hit batches.
1941 for (int i = 0; i < 4000; i++) {
1942 Index::Editor edit = index_->Edit(i, kSectionId2,
1943 /*namespace_id=*/0);
1944 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1945 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1946 }
1947
1948 EXPECT_THAT(
1949 index_->FindTermsByPrefix(
1950 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1951 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1952 &impl),
1953 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 4000))));
1954
1955 ICING_ASSERT_OK(index_->Merge());
1956
1957 EXPECT_THAT(
1958 index_->FindTermsByPrefix(
1959 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1960 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1961 &impl),
1962 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 4000))));
1963 }
1964
TEST_F(IndexTest,FindTermByPrefixShouldReturnInOrder)1965 TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
1966 // Push 6 term-six, 5 term-five, 4 term-four, 3 term-three, 2 term-two and one
1967 // term-one into lite index.
1968 Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
1969 /*namespace_id=*/0);
1970 AlwaysTrueSuggestionResultCheckerImpl impl;
1971 EXPECT_THAT(edit1.BufferTerm("term-one", TermMatchType::EXACT_ONLY), IsOk());
1972 EXPECT_THAT(edit1.BufferTerm("term-two", TermMatchType::EXACT_ONLY), IsOk());
1973 EXPECT_THAT(edit1.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
1974 IsOk());
1975 EXPECT_THAT(edit1.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
1976 EXPECT_THAT(edit1.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
1977 EXPECT_THAT(edit1.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
1978 EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
1979
1980 Index::Editor edit2 = index_->Edit(kDocumentId2, kSectionId2,
1981 /*namespace_id=*/0);
1982 EXPECT_THAT(edit2.BufferTerm("term-two", TermMatchType::EXACT_ONLY), IsOk());
1983 EXPECT_THAT(edit2.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
1984 IsOk());
1985 EXPECT_THAT(edit2.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
1986 EXPECT_THAT(edit2.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
1987 EXPECT_THAT(edit2.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
1988 EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
1989
1990 Index::Editor edit3 = index_->Edit(kDocumentId3, kSectionId2,
1991 /*namespace_id=*/0);
1992 EXPECT_THAT(edit3.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
1993 IsOk());
1994 EXPECT_THAT(edit3.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
1995 EXPECT_THAT(edit3.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
1996 EXPECT_THAT(edit3.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
1997 EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
1998
1999 Index::Editor edit4 = index_->Edit(kDocumentId4, kSectionId2,
2000 /*namespace_id=*/0);
2001 EXPECT_THAT(edit4.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
2002 EXPECT_THAT(edit4.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2003 EXPECT_THAT(edit4.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
2004 EXPECT_THAT(edit4.IndexAllBufferedTerms(), IsOk());
2005
2006 Index::Editor edit5 = index_->Edit(kDocumentId5, kSectionId2,
2007 /*namespace_id=*/0);
2008 EXPECT_THAT(edit5.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2009 EXPECT_THAT(edit5.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
2010 EXPECT_THAT(edit5.IndexAllBufferedTerms(), IsOk());
2011
2012 Index::Editor edit6 = index_->Edit(kDocumentId6, kSectionId2,
2013 /*namespace_id=*/0);
2014 EXPECT_THAT(edit6.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
2015 EXPECT_THAT(edit6.IndexAllBufferedTerms(), IsOk());
2016
2017 // verify the order in lite index is correct.
2018 EXPECT_THAT(
2019 index_->FindTermsByPrefix(
2020 /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
2021 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2022 &impl),
2023 IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
2024 EqualsTermMetadata("term-five", 5),
2025 EqualsTermMetadata("term-four", 4),
2026 EqualsTermMetadata("term-three", 3),
2027 EqualsTermMetadata("term-two", 2),
2028 EqualsTermMetadata("term-one", 1))));
2029
2030 ICING_ASSERT_OK(index_->Merge());
2031
2032 EXPECT_THAT(
2033 index_->FindTermsByPrefix(
2034 /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
2035 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2036 &impl),
2037 IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
2038 EqualsTermMetadata("term-five", 5),
2039 EqualsTermMetadata("term-four", 4),
2040 EqualsTermMetadata("term-three", 3),
2041 EqualsTermMetadata("term-two", 2),
2042 EqualsTermMetadata("term-one", 1))));
2043
2044 // keep push terms to the lite index. We will add 2 document to term-five,
2045 // term-three and term-one. The output order should be 5-6-3-4-1-2.
2046 Index::Editor edit7 = index_->Edit(kDocumentId7, kSectionId2,
2047 /*namespace_id=*/0);
2048 EXPECT_THAT(edit7.BufferTerm("term-one", TermMatchType::EXACT_ONLY), IsOk());
2049 EXPECT_THAT(edit7.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
2050 IsOk());
2051 EXPECT_THAT(edit7.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2052 EXPECT_THAT(edit7.IndexAllBufferedTerms(), IsOk());
2053
2054 Index::Editor edit8 = index_->Edit(kDocumentId8, kSectionId2,
2055 /*namespace_id=*/0);
2056 EXPECT_THAT(edit8.BufferTerm("term-one", TermMatchType::EXACT_ONLY), IsOk());
2057 EXPECT_THAT(edit8.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
2058 IsOk());
2059 EXPECT_THAT(edit8.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2060 EXPECT_THAT(edit8.IndexAllBufferedTerms(), IsOk());
2061
2062 // verify the combination of lite index and main index is in correct order.
2063 EXPECT_THAT(
2064 index_->FindTermsByPrefix(
2065 /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
2066 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2067 &impl),
2068 IsOkAndHolds(ElementsAre(
2069 EqualsTermMetadata("term-five", 7), EqualsTermMetadata("term-six", 6),
2070 EqualsTermMetadata("term-three", 5),
2071 EqualsTermMetadata("term-four", 4), EqualsTermMetadata("term-one", 3),
2072 EqualsTermMetadata("term-two", 2))));
2073
2074 // Get the first three terms.
2075 EXPECT_THAT(
2076 index_->FindTermsByPrefix(
2077 /*prefix=*/"t", /*num_to_return=*/3, TermMatchType::PREFIX,
2078 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2079 &impl),
2080 IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-five", 7),
2081 EqualsTermMetadata("term-six", 6),
2082 EqualsTermMetadata("term-three", 5))));
2083 }
2084
TEST_F(IndexTest,FindTermByPrefix_InTermMatchTypePrefix_ShouldReturnInOrder)2085 TEST_F(IndexTest, FindTermByPrefix_InTermMatchTypePrefix_ShouldReturnInOrder) {
2086 Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
2087 /*namespace_id=*/0);
2088 AlwaysTrueSuggestionResultCheckerImpl impl;
2089 EXPECT_THAT(edit1.BufferTerm("fo", TermMatchType::PREFIX), IsOk());
2090 EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
2091
2092 Index::Editor edit2 = index_->Edit(kDocumentId2, kSectionId2,
2093 /*namespace_id=*/0);
2094 EXPECT_THAT(edit2.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2095 EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
2096
2097 Index::Editor edit3 = index_->Edit(kDocumentId3, kSectionId2,
2098 /*namespace_id=*/0);
2099 EXPECT_THAT(edit3.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
2100 EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
2101
2102 ICING_ASSERT_OK(index_->Merge());
2103 // verify the order in pls is correct
2104 // "fo" { {doc0, exact_hit}, {doc1, prefix_hit}, {doc2, prefix_hit} }
2105 // "foo" { {doc1, exact_hit}, {doc2, prefix_hit} }
2106 // "fool" { {doc2, exact_hit} }
2107 EXPECT_THAT(
2108 index_->FindTermsByPrefix(
2109 /*prefix=*/"f",
2110 /*num_to_return=*/10, TermMatchType::PREFIX,
2111 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2112 &impl),
2113 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
2114 EqualsTermMetadata("foo", 2),
2115 EqualsTermMetadata("fool", 1))));
2116 // Find by exact only, all terms should be equally.
2117 EXPECT_THAT(
2118 index_->FindTermsByPrefix(
2119 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2120 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2121 &impl),
2122 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
2123 EqualsTermMetadata("foo", 1),
2124 EqualsTermMetadata("fool", 1))));
2125 }
2126
TEST_F(IndexTest,FindTermByPrefixShouldReturnHitCountForMain)2127 TEST_F(IndexTest, FindTermByPrefixShouldReturnHitCountForMain) {
2128 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2129 /*namespace_id=*/0);
2130 AlwaysTrueSuggestionResultCheckerImpl impl;
2131 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2132 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2133 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2134
2135 edit = index_->Edit(kDocumentId1, kSectionId2,
2136 /*namespace_id=*/0);
2137 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2138 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2139 edit = index_->Edit(kDocumentId2, kSectionId2,
2140 /*namespace_id=*/0);
2141 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2142 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2143 edit = index_->Edit(kDocumentId3, kSectionId2,
2144 /*namespace_id=*/0);
2145 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2146 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2147 edit = index_->Edit(kDocumentId4, kSectionId2,
2148 /*namespace_id=*/0);
2149 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2150 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2151 edit = index_->Edit(kDocumentId5, kSectionId2,
2152 /*namespace_id=*/0);
2153 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2154 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2155 edit = index_->Edit(kDocumentId6, kSectionId2,
2156 /*namespace_id=*/0);
2157 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2158 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2159 edit = index_->Edit(kDocumentId7, kSectionId2,
2160 /*namespace_id=*/0);
2161 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2162 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2163
2164 // 'foo' has 1 hit, 'fool' has 8 hits.
2165 EXPECT_THAT(
2166 index_->FindTermsByPrefix(
2167 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2168 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2169 &impl),
2170 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 8),
2171 EqualsTermMetadata("foo", 1))));
2172
2173 ICING_ASSERT_OK(index_->Merge());
2174
2175 EXPECT_THAT(
2176 index_->FindTermsByPrefix(
2177 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2178 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2179 &impl),
2180 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
2181 EqualsTermMetadata("fool", 8))));
2182 }
2183
TEST_F(IndexTest,FindTermByPrefixShouldReturnCombinedHitCount)2184 TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
2185 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2186 /*namespace_id=*/0);
2187 AlwaysTrueSuggestionResultCheckerImpl impl;
2188 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2189 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2190 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2191
2192 ICING_ASSERT_OK(index_->Merge());
2193
2194 edit = index_->Edit(kDocumentId1, kSectionId2,
2195 /*namespace_id=*/0);
2196 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2197 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2198
2199 EXPECT_THAT(
2200 index_->FindTermsByPrefix(
2201 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2202 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2203 &impl),
2204 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
2205 EqualsTermMetadata("foo", 1))));
2206 }
2207
TEST_F(IndexTest,FindTermRankComparison)2208 TEST_F(IndexTest, FindTermRankComparison) {
2209 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2210 /*namespace_id=*/0);
2211 AlwaysTrueSuggestionResultCheckerImpl impl;
2212 EXPECT_THAT(edit.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
2213 EXPECT_THAT(edit.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
2214 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2215 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2216 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2217
2218 Index::Editor edit2 = index_->Edit(kDocumentId2, kSectionId2,
2219 /*namespace_id=*/0);
2220 EXPECT_THAT(edit2.BufferTerm("fo", TermMatchType::PREFIX), IsOk());
2221 EXPECT_THAT(edit2.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2222 EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
2223
2224 EXPECT_THAT(
2225 index_->FindTermsByPrefix(
2226 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2227 SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY,
2228 &impl),
2229 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
2230 EqualsTermMetadata("foo", 2),
2231 EqualsTermMetadata("fool", 1))));
2232 EXPECT_THAT(
2233 index_->FindTermsByPrefix(
2234 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2235 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2236 &impl),
2237 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 2),
2238 EqualsTermMetadata("foo", 2),
2239 EqualsTermMetadata("fool", 1))));
2240 EXPECT_THAT(
2241 index_->FindTermsByPrefix(
2242 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2243 SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE, &impl),
2244 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
2245 EqualsTermMetadata("foo", 1),
2246 EqualsTermMetadata("fool", 1))));
2247
2248 ICING_ASSERT_OK(index_->Merge());
2249
2250 EXPECT_THAT(
2251 index_->FindTermsByPrefix(
2252 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2253 SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY,
2254 &impl),
2255 IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
2256 EqualsTermMetadata("foo", 2),
2257 EqualsTermMetadata("fool", 1))));
2258 EXPECT_THAT(
2259 index_->FindTermsByPrefix(
2260 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2261 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2262 &impl),
2263 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 2),
2264 EqualsTermMetadata("foo", 2),
2265 EqualsTermMetadata("fool", 1))));
2266 EXPECT_THAT(
2267 index_->FindTermsByPrefix(
2268 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2269 SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE, &impl),
2270 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
2271 EqualsTermMetadata("foo", 1),
2272 EqualsTermMetadata("fool", 1))));
2273 }
2274
TEST_F(IndexTest,FindTermByPrefixShouldReturnTermsFromBothIndices)2275 TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
2276 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2277 /*namespace_id=*/0);
2278 AlwaysTrueSuggestionResultCheckerImpl impl;
2279
2280 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2281 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2282
2283 ICING_ASSERT_OK(index_->Merge());
2284
2285 edit = index_->Edit(kDocumentId1, kSectionId2,
2286 /*namespace_id=*/0);
2287 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2288 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2289
2290 // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
2291 EXPECT_THAT(
2292 index_->FindTermsByPrefix(
2293 /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2294 SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2295 &impl),
2296 IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
2297 EqualsTermMetadata("fool", 1))));
2298 }
2299
TEST_F(IndexTest,GetElementsSize)2300 TEST_F(IndexTest, GetElementsSize) {
2301 // Check empty index.
2302 ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
2303 EXPECT_THAT(size, Eq(0));
2304
2305 // Add an element.
2306 Index::Editor edit =
2307 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2308 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2309 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2310 ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
2311 EXPECT_THAT(size, Gt(0));
2312
2313 ASSERT_THAT(index_->Merge(), IsOk());
2314 ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
2315 EXPECT_THAT(size, Gt(0));
2316 }
2317
TEST_F(IndexTest,ExactResultsFromLiteAndMain)2318 TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
2319 Index::Editor edit =
2320 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2321 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2322 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2323 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2324 edit = index_->Edit(kDocumentId1, kSectionId3,
2325 /*namespace_id=*/0);
2326 EXPECT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2327 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2328 ICING_ASSERT_OK(index_->Merge());
2329
2330 edit = index_->Edit(kDocumentId2, kSectionId2,
2331 /*namespace_id=*/0);
2332 EXPECT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2333 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2334 edit = index_->Edit(kDocumentId2, kSectionId3,
2335 /*namespace_id=*/0);
2336 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2337 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2338
2339 ICING_ASSERT_OK_AND_ASSIGN(
2340 std::unique_ptr<DocHitInfoIterator> itr,
2341 index_->GetIterator("foo", /*term_start_index=*/0,
2342 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2343 TermMatchType::EXACT_ONLY));
2344 EXPECT_THAT(
2345 GetHits(std::move(itr)),
2346 ElementsAre(
2347 EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
2348 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2349 }
2350
TEST_F(IndexTest,PrefixResultsFromLiteAndMain)2351 TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
2352 Index::Editor edit =
2353 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2354 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2355 EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2356 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2357 edit = index_->Edit(kDocumentId1, kSectionId3,
2358 /*namespace_id=*/0);
2359 EXPECT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2360 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2361 ICING_ASSERT_OK(index_->Merge());
2362
2363 edit = index_->Edit(kDocumentId2, kSectionId2,
2364 /*namespace_id=*/0);
2365 EXPECT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2366 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2367 edit = index_->Edit(kDocumentId2, kSectionId3,
2368 /*namespace_id=*/0);
2369 EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2370 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2371
2372 ICING_ASSERT_OK_AND_ASSIGN(
2373 std::unique_ptr<DocHitInfoIterator> itr,
2374 index_->GetIterator("foo", /*term_start_index=*/0,
2375 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2376 TermMatchType::PREFIX));
2377 EXPECT_THAT(
2378 GetHits(std::move(itr)),
2379 ElementsAre(
2380 EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
2381 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2382 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2383 }
2384
TEST_F(IndexTest,GetDebugInfo)2385 TEST_F(IndexTest, GetDebugInfo) {
2386 // Add two documents to the lite index, merge them into the main index and
2387 // then add another doc to the lite index.
2388 Index::Editor edit =
2389 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2390 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2391 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2392 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2393 edit = index_->Edit(kDocumentId1, kSectionId3,
2394 /*namespace_id=*/0);
2395 index_->set_last_added_document_id(kDocumentId1);
2396 ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2397 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2398 ICING_ASSERT_OK(index_->Merge());
2399
2400 edit = index_->Edit(kDocumentId2, kSectionId2,
2401 /*namespace_id=*/0);
2402 index_->set_last_added_document_id(kDocumentId2);
2403 ASSERT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2404 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2405 edit = index_->Edit(kDocumentId2, kSectionId3,
2406 /*namespace_id=*/0);
2407 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2408 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2409
2410 IndexDebugInfoProto out0 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
2411 ICING_LOG(DBG) << "main_index_info:\n" << out0.main_index_info();
2412 ICING_LOG(DBG) << "lite_index_info:\n" << out0.lite_index_info();
2413 EXPECT_THAT(out0.main_index_info(), Not(IsEmpty()));
2414 EXPECT_THAT(out0.lite_index_info(), Not(IsEmpty()));
2415
2416 IndexDebugInfoProto out1 = index_->GetDebugInfo(DebugInfoVerbosity::DETAILED);
2417 ICING_LOG(DBG) << "main_index_info:\n" << out1.main_index_info();
2418 ICING_LOG(DBG) << "lite_index_info:\n" << out1.lite_index_info();
2419 EXPECT_THAT(out1.main_index_info(),
2420 SizeIs(Gt(out0.main_index_info().size())));
2421 EXPECT_THAT(out1.lite_index_info(),
2422 SizeIs(Gt(out0.lite_index_info().size())));
2423
2424 // Add one more doc to the lite index. Debug strings should change.
2425 edit = index_->Edit(kDocumentId3, kSectionId2,
2426 /*namespace_id=*/0);
2427 index_->set_last_added_document_id(kDocumentId3);
2428 ASSERT_THAT(edit.BufferTerm("far", TermMatchType::EXACT_ONLY), IsOk());
2429 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2430
2431 IndexDebugInfoProto out2 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
2432 ICING_LOG(DBG) << "main_index_info:\n" << out2.main_index_info();
2433 ICING_LOG(DBG) << "lite_index_info:\n" << out2.lite_index_info();
2434 EXPECT_THAT(out2.main_index_info(), Not(IsEmpty()));
2435 EXPECT_THAT(out2.lite_index_info(), Not(IsEmpty()));
2436 EXPECT_THAT(out2.main_index_info(), StrEq(out0.main_index_info()));
2437 EXPECT_THAT(out2.lite_index_info(), StrNe(out0.lite_index_info()));
2438
2439 // Merge into the man index. Debug strings should change again.
2440 ICING_ASSERT_OK(index_->Merge());
2441
2442 IndexDebugInfoProto out3 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
2443 EXPECT_TRUE(out3.has_index_storage_info());
2444 ICING_LOG(DBG) << "main_index_info:\n" << out3.main_index_info();
2445 ICING_LOG(DBG) << "lite_index_info:\n" << out3.lite_index_info();
2446 EXPECT_THAT(out3.main_index_info(), Not(IsEmpty()));
2447 EXPECT_THAT(out3.lite_index_info(), Not(IsEmpty()));
2448 EXPECT_THAT(out3.main_index_info(), StrNe(out2.main_index_info()));
2449 EXPECT_THAT(out3.lite_index_info(), StrNe(out2.lite_index_info()));
2450 }
2451
TEST_F(IndexTest,BackfillingMultipleTermsSucceeds)2452 TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
2453 // Add two documents to the lite index, merge them into the main index and
2454 // then add another doc to the lite index.
2455 Index::Editor edit =
2456 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2457 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2458 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2459 edit = index_->Edit(kDocumentId0, kSectionId3,
2460 /*namespace_id=*/0);
2461 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
2462 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2463 edit = index_->Edit(kDocumentId1, kSectionId3,
2464 /*namespace_id=*/0);
2465 ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2466 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2467
2468 // After this merge the index should have posting lists for
2469 // "fool" {(doc0,sec3)},
2470 // "foot" {(doc1,sec3)},
2471 // "foo" {(doc1,sec3),(doc0,sec3),(doc0,sec2)}
2472 ICING_ASSERT_OK(index_->Merge());
2473
2474 // Add one more doc to the lite index.
2475 edit = index_->Edit(kDocumentId2, kSectionId2,
2476 /*namespace_id=*/0);
2477 ASSERT_THAT(edit.BufferTerm("far", TermMatchType::EXACT_ONLY), IsOk());
2478 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2479
2480 // After this merge the index should add a posting list for "far" and a
2481 // backfill branch point for "f". In addition to the posting lists described
2482 // above, which are unaffected, the new posting lists should be
2483 // "far" {(doc2,sec2)},
2484 // "f" {(doc1,sec3),(doc0,sec3)}
2485 // Multiple pre-existing hits should be added to the new backfill branch
2486 // point.
2487 ICING_ASSERT_OK(index_->Merge());
2488
2489 ICING_ASSERT_OK_AND_ASSIGN(
2490 std::unique_ptr<DocHitInfoIterator> itr,
2491 index_->GetIterator("f", /*term_start_index=*/0,
2492 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2493 TermMatchType::PREFIX));
2494 EXPECT_THAT(
2495 GetHits(std::move(itr)),
2496 ElementsAre(
2497 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2498 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId3})));
2499 }
2500
TEST_F(IndexTest,BackfillingNewTermsSucceeds)2501 TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
2502 // Add two documents to the lite index, merge them into the main index and
2503 // then add another doc to the lite index.
2504 Index::Editor edit =
2505 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2506 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2507 ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2508 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2509 edit = index_->Edit(kDocumentId1, kSectionId3,
2510 /*namespace_id=*/0);
2511 ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2512 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2513 // After this merge the index should have posting lists for
2514 // "fool" {(doc0,sec2)},
2515 // "foot" {(doc1,sec3)},
2516 // "foo" {(doc1,sec3),(doc0,sec2)}
2517 ICING_ASSERT_OK(index_->Merge());
2518
2519 edit = index_->Edit(kDocumentId2, kSectionId2,
2520 /*namespace_id=*/0);
2521 ASSERT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2522 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2523 edit = index_->Edit(kDocumentId2, kSectionId3,
2524 /*namespace_id=*/0);
2525 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2526 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2527 // Add one more doc to the lite index. Debug strings should change.
2528 edit = index_->Edit(kDocumentId3, kSectionId2,
2529 /*namespace_id=*/0);
2530 ASSERT_THAT(edit.BufferTerm("far", TermMatchType::EXACT_ONLY), IsOk());
2531 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2532
2533 // After this merge the index should add posting lists for "far" and "footer"
2534 // and a backfill branch point for "f". The new posting lists should be
2535 // "fool" {(doc0,sec2)},
2536 // "foot" {(doc1,sec3)},
2537 // "foo" {(doc2,sec3),(doc1,sec3),(doc0,sec2)}
2538 // "footer" {(doc2,sec2)},
2539 // "far" {(doc3,sec2)},
2540 // "f" {(doc2,sec3),(doc1,sec3)}
2541 // Multiple pre-existing hits should be added to the new backfill branch
2542 // point.
2543 ICING_ASSERT_OK(index_->Merge());
2544
2545 ICING_ASSERT_OK_AND_ASSIGN(
2546 std::unique_ptr<DocHitInfoIterator> itr,
2547 index_->GetIterator("f", /*term_start_index=*/0,
2548 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2549 TermMatchType::PREFIX));
2550 EXPECT_THAT(
2551 GetHits(std::move(itr)),
2552 ElementsAre(
2553 EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
2554 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3})));
2555 }
2556
TEST_F(IndexTest,TruncateToInvalidDocumentIdHasNoEffect)2557 TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
2558 ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2559 EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
2560 ICING_ASSERT_OK_AND_ASSIGN(
2561 std::unique_ptr<DocHitInfoIterator> itr,
2562 index_->GetIterator("f", /*term_start_index=*/0,
2563 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2564 TermMatchType::PREFIX));
2565 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
2566
2567 // Add one document to the lite index
2568 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2569 /*namespace_id=*/0);
2570 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2571 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2572 // Clipping to invalid should have no effect.
2573 ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2574 ICING_ASSERT_OK_AND_ASSIGN(
2575 itr, index_->GetIterator("f", /*term_start_index=*/0,
2576 /*unnormalized_term_length=*/0,
2577 kSectionIdMaskAll, TermMatchType::PREFIX));
2578 EXPECT_THAT(GetHits(std::move(itr)),
2579 ElementsAre(EqualsDocHitInfo(
2580 kDocumentId0, std::vector<SectionId>{kSectionId2})));
2581
2582 // Clipping to invalid should still have no effect even if hits are in main.
2583 ICING_ASSERT_OK(index_->Merge());
2584 ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2585 ICING_ASSERT_OK_AND_ASSIGN(
2586 itr, index_->GetIterator("f", /*term_start_index=*/0,
2587 /*unnormalized_term_length=*/0,
2588 kSectionIdMaskAll, TermMatchType::PREFIX));
2589 EXPECT_THAT(GetHits(std::move(itr)),
2590 ElementsAre(EqualsDocHitInfo(
2591 kDocumentId0, std::vector<SectionId>{kSectionId2})));
2592
2593 edit = index_->Edit(kDocumentId1, kSectionId3,
2594 /*namespace_id=*/0);
2595 ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2596 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2597
2598 // Clipping to invalid should still have no effect even if both indices have
2599 // hits.
2600 ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2601 ICING_ASSERT_OK_AND_ASSIGN(
2602 itr, index_->GetIterator("f", /*term_start_index=*/0,
2603 /*unnormalized_term_length=*/0,
2604 kSectionIdMaskAll, TermMatchType::PREFIX));
2605 EXPECT_THAT(
2606 GetHits(std::move(itr)),
2607 ElementsAre(
2608 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2609 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2610 }
2611
TEST_F(IndexTest,TruncateToLastAddedDocumentIdHasNoEffect)2612 TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
2613 ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2614 EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
2615 ICING_ASSERT_OK_AND_ASSIGN(
2616 std::unique_ptr<DocHitInfoIterator> itr,
2617 index_->GetIterator("f", /*term_start_index=*/0,
2618 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2619 TermMatchType::PREFIX));
2620 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
2621
2622 // Add one document to the lite index
2623 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2624 /*namespace_id=*/0);
2625 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2626 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2627 index_->set_last_added_document_id(kDocumentId0);
2628 ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2629 // Clipping to invalid should have no effect.
2630 ICING_ASSERT_OK_AND_ASSIGN(
2631 itr, index_->GetIterator("f", /*term_start_index=*/0,
2632 /*unnormalized_term_length=*/0,
2633 kSectionIdMaskAll, TermMatchType::PREFIX));
2634 EXPECT_THAT(GetHits(std::move(itr)),
2635 ElementsAre(EqualsDocHitInfo(
2636 kDocumentId0, std::vector<SectionId>{kSectionId2})));
2637
2638 // Clipping to invalid should still have no effect even if hits are in main.
2639 ICING_ASSERT_OK(index_->Merge());
2640 ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2641 ICING_ASSERT_OK_AND_ASSIGN(
2642 itr, index_->GetIterator("f", /*term_start_index=*/0,
2643 /*unnormalized_term_length=*/0,
2644 kSectionIdMaskAll, TermMatchType::PREFIX));
2645 EXPECT_THAT(GetHits(std::move(itr)),
2646 ElementsAre(EqualsDocHitInfo(
2647 kDocumentId0, std::vector<SectionId>{kSectionId2})));
2648
2649 edit = index_->Edit(kDocumentId1, kSectionId3,
2650 /*namespace_id=*/0);
2651 ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2652 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2653 index_->set_last_added_document_id(kDocumentId1);
2654
2655 // Clipping to invalid should still have no effect even if both indices have
2656 // hits.
2657 ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2658 ICING_ASSERT_OK_AND_ASSIGN(
2659 itr, index_->GetIterator("f", /*term_start_index=*/0,
2660 /*unnormalized_term_length=*/0,
2661 kSectionIdMaskAll, TermMatchType::PREFIX));
2662 EXPECT_THAT(
2663 GetHits(std::move(itr)),
2664 ElementsAre(
2665 EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2666 EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2667 }
2668
TEST_F(IndexTest,TruncateToThrowsOutLiteIndex)2669 TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
2670 // Add one document to the lite index and merge it into main.
2671 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2672 /*namespace_id=*/0);
2673 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2674 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2675 index_->set_last_added_document_id(kDocumentId0);
2676
2677 ICING_ASSERT_OK(index_->Merge());
2678
2679 // Add another document to the lite index.
2680 edit = index_->Edit(kDocumentId1, kSectionId3,
2681 /*namespace_id=*/0);
2682 ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2683 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2684 index_->set_last_added_document_id(kDocumentId1);
2685
2686 EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
2687
2688 // Clipping to document 0 should toss out the lite index, but keep the main.
2689 ICING_ASSERT_OK_AND_ASSIGN(
2690 std::unique_ptr<DocHitInfoIterator> itr,
2691 index_->GetIterator("f", /*term_start_index=*/0,
2692 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2693 TermMatchType::PREFIX));
2694 EXPECT_THAT(GetHits(std::move(itr)),
2695 ElementsAre(EqualsDocHitInfo(
2696 kDocumentId0, std::vector<SectionId>{kSectionId2})));
2697 }
2698
TEST_F(IndexTest,TruncateToThrowsOutBothIndices)2699 TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
2700 // Add two documents to the lite index and merge them into main.
2701 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2702 /*namespace_id=*/0);
2703 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2704 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2705 index_->set_last_added_document_id(kDocumentId0);
2706 edit = index_->Edit(kDocumentId1, kSectionId2,
2707 /*namespace_id=*/0);
2708 ASSERT_THAT(edit.BufferTerm("foul", TermMatchType::PREFIX), IsOk());
2709 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2710 index_->set_last_added_document_id(kDocumentId1);
2711
2712 ICING_ASSERT_OK(index_->Merge());
2713
2714 // Add another document to the lite index.
2715 edit = index_->Edit(kDocumentId2, kSectionId3,
2716 /*namespace_id=*/0);
2717 ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2718 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2719 index_->set_last_added_document_id(kDocumentId2);
2720
2721 EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
2722
2723 // Clipping to document 0 should toss out both indices.
2724 ICING_ASSERT_OK_AND_ASSIGN(
2725 std::unique_ptr<DocHitInfoIterator> itr,
2726 index_->GetIterator("f", /*term_start_index=*/0,
2727 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2728 TermMatchType::PREFIX));
2729 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
2730 }
2731
TEST_F(IndexTest,IndexStorageInfoProto)2732 TEST_F(IndexTest, IndexStorageInfoProto) {
2733 // Add two documents to the lite index and merge them into main.
2734 {
2735 Index::Editor edit =
2736 index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2737 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2738 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2739 edit = index_->Edit(kDocumentId1, kSectionId2,
2740 /*namespace_id=*/0);
2741 ASSERT_THAT(edit.BufferTerm("foul", TermMatchType::PREFIX), IsOk());
2742 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2743
2744 ICING_ASSERT_OK(index_->Merge());
2745 }
2746
2747 IndexStorageInfoProto storage_info = index_->GetStorageInfo();
2748 EXPECT_THAT(storage_info.index_size(), Ge(0));
2749 EXPECT_THAT(storage_info.lite_index_lexicon_size(), Ge(0));
2750 EXPECT_THAT(storage_info.lite_index_hit_buffer_size(), Ge(0));
2751 EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
2752 EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
2753 EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
2754 // There should be 1 block for the header and 1 block for three posting lists
2755 // ("fo", "foo", "foul").
2756 EXPECT_THAT(storage_info.num_blocks(), Eq(2));
2757 EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
2758 }
2759
TEST_F(IndexTest,PublishQueryStats)2760 TEST_F(IndexTest, PublishQueryStats) {
2761 // Add two documents to the lite index without merging.
2762 Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2763 /*namespace_id=*/0);
2764 ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2765 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2766 edit = index_->Edit(kDocumentId1, kSectionId2,
2767 /*namespace_id=*/0);
2768 ASSERT_THAT(edit.BufferTerm("foul", TermMatchType::PREFIX), IsOk());
2769 EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2770
2771 // Verify query stats.
2772 QueryStatsProto query_stats1;
2773 index_->PublishQueryStats(&query_stats1);
2774 EXPECT_THAT(query_stats1.lite_index_hit_buffer_byte_size(),
2775 Eq(2 * sizeof(TermIdHitPair::Value)));
2776 EXPECT_THAT(query_stats1.lite_index_hit_buffer_unsorted_byte_size(),
2777 Ge(2 * sizeof(TermIdHitPair::Value)));
2778
2779 // Sort lite index.
2780 index_->SortLiteIndex();
2781 QueryStatsProto query_stats2;
2782 index_->PublishQueryStats(&query_stats2);
2783 EXPECT_THAT(query_stats2.lite_index_hit_buffer_byte_size(),
2784 Eq(2 * sizeof(TermIdHitPair::Value)));
2785 EXPECT_THAT(query_stats2.lite_index_hit_buffer_unsorted_byte_size(), Eq(0));
2786
2787 // Merge lite index to main index.
2788 ICING_ASSERT_OK(index_->Merge());
2789 QueryStatsProto query_stats3;
2790 index_->PublishQueryStats(&query_stats3);
2791 EXPECT_THAT(query_stats3.lite_index_hit_buffer_byte_size(), Eq(0));
2792 EXPECT_THAT(query_stats3.lite_index_hit_buffer_unsorted_byte_size(), Eq(0));
2793 }
2794
2795 } // namespace
2796
2797 } // namespace lib
2798 } // namespace icing
2799