xref: /aosp_15_r20/external/icing/icing/index/index_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/index.h"
16 
17 #include <unistd.h>
18 
19 #include <algorithm>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <random>
24 #include <string>
25 #include <string_view>
26 #include <unordered_map>
27 #include <utility>
28 #include <vector>
29 
30 #include "icing/text_classifier/lib3/utils/base/status.h"
31 #include "gmock/gmock.h"
32 #include "gtest/gtest.h"
33 #include "icing/file/filesystem.h"
34 #include "icing/index/hit/doc-hit-info.h"
35 #include "icing/index/iterator/doc-hit-info-iterator.h"
36 #include "icing/index/lite/term-id-hit-pair.h"
37 #include "icing/legacy/index/icing-filesystem.h"
38 #include "icing/legacy/index/icing-mock-filesystem.h"
39 #include "icing/proto/debug.pb.h"
40 #include "icing/proto/logging.pb.h"
41 #include "icing/proto/storage.pb.h"
42 #include "icing/proto/term.pb.h"
43 #include "icing/schema/section.h"
44 #include "icing/store/document-id.h"
45 #include "icing/testing/always-true-suggestion-result-checker-impl.h"
46 #include "icing/testing/common-matchers.h"
47 #include "icing/testing/random-string.h"
48 #include "icing/testing/tmp-directory.h"
49 #include "icing/util/crc32.h"
50 #include "icing/util/logging.h"
51 
52 namespace icing {
53 namespace lib {
54 
55 namespace {
56 
57 using ::testing::ContainerEq;
58 using ::testing::ElementsAre;
59 using ::testing::Eq;
60 using ::testing::Ge;
61 using ::testing::Gt;
62 using ::testing::IsEmpty;
63 using ::testing::IsFalse;
64 using ::testing::IsTrue;
65 using ::testing::Ne;
66 using ::testing::NiceMock;
67 using ::testing::Not;
68 using ::testing::Return;
69 using ::testing::SizeIs;
70 using ::testing::StrEq;
71 using ::testing::StrNe;
72 using ::testing::Test;
73 using ::testing::UnorderedElementsAre;
74 
GetBlockSize()75 int GetBlockSize() { return getpagesize(); }
76 
77 class IndexTest : public Test {
78  protected:
SetUp()79   void SetUp() override {
80     index_dir_ = GetTestTempDir() + "/index_test/";
81     Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
82                            /*lite_index_sort_at_indexing=*/true,
83                            /*lite_index_sort_size=*/1024 * 8);
84     ICING_ASSERT_OK_AND_ASSIGN(
85         index_, Index::Create(options, &filesystem_, &icing_filesystem_));
86   }
87 
TearDown()88   void TearDown() override {
89     index_.reset();
90     icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
91   }
92 
GetHits(std::unique_ptr<DocHitInfoIterator> iterator)93   std::vector<DocHitInfo> GetHits(
94       std::unique_ptr<DocHitInfoIterator> iterator) {
95     std::vector<DocHitInfo> infos;
96     while (iterator->Advance().ok()) {
97       infos.push_back(iterator->doc_hit_info());
98     }
99     return infos;
100   }
101 
GetHits(std::string term,int term_start_index,int unnormalized_term_length,TermMatchType::Code match_type)102   libtextclassifier3::StatusOr<std::vector<DocHitInfo>> GetHits(
103       std::string term, int term_start_index, int unnormalized_term_length,
104       TermMatchType::Code match_type) {
105     ICING_ASSIGN_OR_RETURN(
106         std::unique_ptr<DocHitInfoIterator> itr,
107         index_->GetIterator(term, term_start_index, unnormalized_term_length,
108                             kSectionIdMaskAll, match_type));
109     return GetHits(std::move(itr));
110   }
111 
112   Filesystem filesystem_;
113   IcingFilesystem icing_filesystem_;
114   std::string index_dir_;
115   std::unique_ptr<Index> index_;
116 };
117 
118 constexpr DocumentId kDocumentId0 = 0;
119 constexpr DocumentId kDocumentId1 = 1;
120 constexpr DocumentId kDocumentId2 = 2;
121 constexpr DocumentId kDocumentId3 = 3;
122 constexpr DocumentId kDocumentId4 = 4;
123 constexpr DocumentId kDocumentId5 = 5;
124 constexpr DocumentId kDocumentId6 = 6;
125 constexpr DocumentId kDocumentId7 = 7;
126 constexpr DocumentId kDocumentId8 = 8;
127 constexpr SectionId kSectionId2 = 2;
128 constexpr SectionId kSectionId3 = 3;
129 
130 MATCHER_P2(EqualsDocHitInfo, document_id, sections, "") {
131   const DocHitInfo& actual = arg;
132   SectionIdMask section_mask = kSectionIdMaskNone;
133   for (SectionId section : sections) {
134     section_mask |= UINT64_C(1) << section;
135   }
136   *result_listener << "actual is {document_id=" << actual.document_id()
137                    << ", section_mask=" << actual.hit_section_ids_mask()
138                    << "}, but expected was {document_id=" << document_id
139                    << ", section_mask=" << section_mask << "}.";
140   return actual.document_id() == document_id &&
141          actual.hit_section_ids_mask() == section_mask;
142 }
143 
144 MATCHER_P2(EqualsTermMetadata, content, hit_count, "") {
145   const TermMetadata& actual = arg;
146   *result_listener << "actual is {content=" << actual.content
147                    << ", score=" << actual.score
148                    << "}, but expected was {content=" << content
149                    << ", score=" << hit_count << "}.";
150   return actual.content == content && actual.score == hit_count;
151 }
152 
TEST_F(IndexTest,CreationWithNullPointerShouldFail)153 TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
154   Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
155                          /*lite_index_sort_at_indexing=*/true,
156                          /*lite_index_sort_size=*/1024 * 8);
157   EXPECT_THAT(
158       Index::Create(options, &filesystem_, /*icing_filesystem=*/nullptr),
159       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
160   EXPECT_THAT(
161       Index::Create(options, /*filesystem=*/nullptr, &icing_filesystem_),
162       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
163 }
164 
TEST_F(IndexTest,EmptyIndex)165 TEST_F(IndexTest, EmptyIndex) {
166   ICING_ASSERT_OK_AND_ASSIGN(
167       std::unique_ptr<DocHitInfoIterator> itr,
168       index_->GetIterator("foo", /*term_start_index=*/0,
169                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
170                           TermMatchType::EXACT_ONLY));
171   EXPECT_THAT(itr->Advance(),
172               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
173 
174   ICING_ASSERT_OK_AND_ASSIGN(
175       itr, index_->GetIterator("foo", /*term_start_index=*/0,
176                                /*unnormalized_term_length=*/0,
177                                kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
178   EXPECT_THAT(itr->Advance(),
179               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
180 }
181 
TEST_F(IndexTest,EmptyIndexAfterMerge)182 TEST_F(IndexTest, EmptyIndexAfterMerge) {
183   // Merging an empty index should succeed, but have no effects.
184   ICING_ASSERT_OK(index_->Merge());
185 
186   ICING_ASSERT_OK_AND_ASSIGN(
187       std::unique_ptr<DocHitInfoIterator> itr,
188       index_->GetIterator("foo", /*term_start_index=*/0,
189                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
190                           TermMatchType::EXACT_ONLY));
191   EXPECT_THAT(itr->Advance(),
192               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
193 
194   ICING_ASSERT_OK_AND_ASSIGN(
195       itr, index_->GetIterator("foo", /*term_start_index=*/0,
196                                /*unnormalized_term_length=*/0,
197                                kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
198   EXPECT_THAT(itr->Advance(),
199               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
200 }
201 
TEST_F(IndexTest,CreationWithLiteIndexSortAtIndexingEnabledShouldSort)202 TEST_F(IndexTest, CreationWithLiteIndexSortAtIndexingEnabledShouldSort) {
203   // Make the index with lite_index_sort_at_indexing=false and a very small sort
204   // threshold.
205   Index::Options options(index_dir_, /*index_merge_size=*/1024,
206                          /*lite_index_sort_at_indexing=*/false,
207                          /*lite_index_sort_size=*/16);
208   ICING_ASSERT_OK_AND_ASSIGN(
209       index_, Index::Create(options, &filesystem_, &icing_filesystem_));
210 
211   Index::Editor edit =
212       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
213   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
214   ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
215   ASSERT_THAT(edit.BufferTerm("baz", TermMatchType::EXACT_ONLY), IsOk());
216   ASSERT_THAT(edit.IndexAllBufferedTerms(), IsOk());
217 
218   // Persist and recreate the index with lite_index_sort_at_indexing=true
219   ASSERT_THAT(index_->PersistToDisk(), IsOk());
220   options = Index::Options(index_dir_, /*index_merge_size=*/1024,
221                            /*lite_index_sort_at_indexing=*/true,
222                            /*lite_index_sort_size=*/16);
223   ICING_ASSERT_OK_AND_ASSIGN(
224       index_, Index::Create(options, &filesystem_, &icing_filesystem_));
225 
226   // Check that the index is sorted after recreating with
227   // lite_index_sort_at_indexing, with the unsorted HitBuffer exceeding the sort
228   // threshold.
229   EXPECT_THAT(index_->LiteIndexNeedSort(), IsFalse());
230 }
231 
TEST_F(IndexTest,AdvancePastEnd)232 TEST_F(IndexTest, AdvancePastEnd) {
233   Index::Editor edit =
234       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
235   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
236   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
237 
238   ICING_ASSERT_OK_AND_ASSIGN(
239       std::unique_ptr<DocHitInfoIterator> itr,
240       index_->GetIterator("bar", /*term_start_index=*/0,
241                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
242                           TermMatchType::EXACT_ONLY));
243   EXPECT_THAT(itr->Advance(),
244               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
245   EXPECT_THAT(itr->doc_hit_info(),
246               EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
247 
248   ICING_ASSERT_OK_AND_ASSIGN(
249       itr, index_->GetIterator("foo", /*term_start_index=*/0,
250                                /*unnormalized_term_length=*/0,
251                                kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
252   EXPECT_THAT(itr->Advance(), IsOk());
253   EXPECT_THAT(itr->Advance(),
254               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
255   EXPECT_THAT(itr->doc_hit_info(),
256               EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
257 }
258 
TEST_F(IndexTest,AdvancePastEndAfterMerge)259 TEST_F(IndexTest, AdvancePastEndAfterMerge) {
260   Index::Editor edit =
261       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
262   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
263   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
264 
265   ICING_ASSERT_OK(index_->Merge());
266 
267   ICING_ASSERT_OK_AND_ASSIGN(
268       std::unique_ptr<DocHitInfoIterator> itr,
269       index_->GetIterator("bar", /*term_start_index=*/0,
270                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
271                           TermMatchType::EXACT_ONLY));
272   EXPECT_THAT(itr->Advance(),
273               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
274   EXPECT_THAT(itr->doc_hit_info(),
275               EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
276 
277   ICING_ASSERT_OK_AND_ASSIGN(
278       itr, index_->GetIterator("foo", /*term_start_index=*/0,
279                                /*unnormalized_term_length=*/0,
280                                kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
281   EXPECT_THAT(itr->Advance(), IsOk());
282   EXPECT_THAT(itr->Advance(),
283               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
284   EXPECT_THAT(itr->doc_hit_info(),
285               EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
286 }
287 
TEST_F(IndexTest,IteratorGetCallStats_mainIndexOnly)288 TEST_F(IndexTest, IteratorGetCallStats_mainIndexOnly) {
289   Index::Editor edit =
290       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
291   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
292   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
293   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
294 
295   edit = index_->Edit(kDocumentId1, kSectionId2,
296                       /*namespace_id=*/0);
297   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
298   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
299 
300   // Merge the index.
301   ICING_ASSERT_OK(index_->Merge());
302 
303   ICING_ASSERT_OK_AND_ASSIGN(
304       std::unique_ptr<DocHitInfoIterator> itr,
305       index_->GetIterator("foo", /*term_start_index=*/0,
306                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
307                           TermMatchType::EXACT_ONLY));
308 
309   // Before Advance().
310   EXPECT_THAT(
311       itr->GetCallStats(),
312       EqualsDocHitInfoIteratorCallStats(
313           /*num_leaf_advance_calls_lite_index=*/0,
314           /*num_leaf_advance_calls_main_index=*/0,
315           /*num_leaf_advance_calls_integer_index=*/0,
316           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
317 
318   // 1st Advance().
319   ICING_ASSERT_OK(itr->Advance());
320   EXPECT_THAT(
321       itr->GetCallStats(),
322       EqualsDocHitInfoIteratorCallStats(
323           /*num_leaf_advance_calls_lite_index=*/0,
324           /*num_leaf_advance_calls_main_index=*/1,
325           /*num_leaf_advance_calls_integer_index=*/0,
326           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
327 
328   // 2nd Advance().
329   ICING_ASSERT_OK(itr->Advance());
330   EXPECT_THAT(
331       itr->GetCallStats(),
332       EqualsDocHitInfoIteratorCallStats(
333           /*num_leaf_advance_calls_lite_index=*/0,
334           /*num_leaf_advance_calls_main_index=*/2,
335           /*num_leaf_advance_calls_integer_index=*/0,
336           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
337 
338   // 3rd Advance().
339   ASSERT_THAT(itr->Advance(),
340               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
341   EXPECT_THAT(
342       itr->GetCallStats(),
343       EqualsDocHitInfoIteratorCallStats(
344           /*num_leaf_advance_calls_lite_index=*/0,
345           /*num_leaf_advance_calls_main_index=*/2,
346           /*num_leaf_advance_calls_integer_index=*/0,
347           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
348 }
349 
TEST_F(IndexTest,IteratorGetCallStats_liteIndexOnly)350 TEST_F(IndexTest, IteratorGetCallStats_liteIndexOnly) {
351   Index::Editor edit =
352       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
353   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
354   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
355   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
356 
357   edit = index_->Edit(kDocumentId1, kSectionId2,
358                       /*namespace_id=*/0);
359   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
360   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
361 
362   ICING_ASSERT_OK_AND_ASSIGN(
363       std::unique_ptr<DocHitInfoIterator> itr,
364       index_->GetIterator("foo", /*term_start_index=*/0,
365                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
366                           TermMatchType::EXACT_ONLY));
367 
368   // Before Advance().
369   EXPECT_THAT(
370       itr->GetCallStats(),
371       EqualsDocHitInfoIteratorCallStats(
372           /*num_leaf_advance_calls_lite_index=*/0,
373           /*num_leaf_advance_calls_main_index=*/0,
374           /*num_leaf_advance_calls_integer_index=*/0,
375           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
376 
377   // 1st Advance().
378   ICING_ASSERT_OK(itr->Advance());
379   EXPECT_THAT(
380       itr->GetCallStats(),
381       EqualsDocHitInfoIteratorCallStats(
382           /*num_leaf_advance_calls_lite_index=*/1,
383           /*num_leaf_advance_calls_main_index=*/0,
384           /*num_leaf_advance_calls_integer_index=*/0,
385           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
386 
387   // 2nd Advance().
388   ICING_ASSERT_OK(itr->Advance());
389   EXPECT_THAT(
390       itr->GetCallStats(),
391       EqualsDocHitInfoIteratorCallStats(
392           /*num_leaf_advance_calls_lite_index=*/2,
393           /*num_leaf_advance_calls_main_index=*/0,
394           /*num_leaf_advance_calls_integer_index=*/0,
395           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
396 
397   // 3rd Advance().
398   ASSERT_THAT(itr->Advance(),
399               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
400   EXPECT_THAT(
401       itr->GetCallStats(),
402       EqualsDocHitInfoIteratorCallStats(
403           /*num_leaf_advance_calls_lite_index=*/2,
404           /*num_leaf_advance_calls_main_index=*/0,
405           /*num_leaf_advance_calls_integer_index=*/0,
406           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
407 }
408 
TEST_F(IndexTest,IteratorGetCallStats)409 TEST_F(IndexTest, IteratorGetCallStats) {
410   Index::Editor edit =
411       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
412   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
413   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
414   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
415 
416   edit = index_->Edit(kDocumentId1, kSectionId2,
417                       /*namespace_id=*/0);
418   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
419   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
420 
421   // Merge the index. 2 hits for "foo" will be merged into the main index.
422   ICING_ASSERT_OK(index_->Merge());
423 
424   // Insert 2 more hits for "foo". It will be in the lite index.
425   edit = index_->Edit(kDocumentId2, kSectionId2,
426                       /*namespace_id=*/0);
427   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
428   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
429 
430   edit = index_->Edit(kDocumentId3, kSectionId2,
431                       /*namespace_id=*/0);
432   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
433   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
434 
435   ICING_ASSERT_OK_AND_ASSIGN(
436       std::unique_ptr<DocHitInfoIterator> itr,
437       index_->GetIterator("foo", /*term_start_index=*/0,
438                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
439                           TermMatchType::EXACT_ONLY));
440 
441   // Before Advance().
442   EXPECT_THAT(
443       itr->GetCallStats(),
444       EqualsDocHitInfoIteratorCallStats(
445           /*num_leaf_advance_calls_lite_index=*/0,
446           /*num_leaf_advance_calls_main_index=*/0,
447           /*num_leaf_advance_calls_integer_index=*/0,
448           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/0));
449 
450   // 1st Advance(). DocHitInfoIteratorOr will advance both left and right
451   // iterator (i.e. lite and main index iterator) once, compare document ids,
452   // and return the hit with larger document id. In this case, hit from lite
453   // index will be chosen and returned.
454   ICING_ASSERT_OK(itr->Advance());
455   EXPECT_THAT(
456       itr->GetCallStats(),
457       EqualsDocHitInfoIteratorCallStats(
458           /*num_leaf_advance_calls_lite_index=*/1,
459           /*num_leaf_advance_calls_main_index=*/1,
460           /*num_leaf_advance_calls_integer_index=*/0,
461           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
462 
463   // 2nd Advance(). Since lite index iterator has larger document id in the
464   // previous round, we advance lite index iterator in this round. We still
465   // choose and return hit from lite index.
466   ICING_ASSERT_OK(itr->Advance());
467   EXPECT_THAT(
468       itr->GetCallStats(),
469       EqualsDocHitInfoIteratorCallStats(
470           /*num_leaf_advance_calls_lite_index=*/2,
471           /*num_leaf_advance_calls_main_index=*/1,
472           /*num_leaf_advance_calls_integer_index=*/0,
473           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
474 
475   // 3rd Advance(). Since lite index iterator has larger document id in the
476   // previous round, we advance lite index iterator in this round. However,
477   // there is no hit from lite index anymore, so we choose and return hit from
478   // main index.
479   ICING_ASSERT_OK(itr->Advance());
480   EXPECT_THAT(
481       itr->GetCallStats(),
482       EqualsDocHitInfoIteratorCallStats(
483           /*num_leaf_advance_calls_lite_index=*/2,
484           /*num_leaf_advance_calls_main_index=*/1,
485           /*num_leaf_advance_calls_integer_index=*/0,
486           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
487 
488   // 4th Advance(). Advance main index.
489   ICING_ASSERT_OK(itr->Advance());
490   EXPECT_THAT(
491       itr->GetCallStats(),
492       EqualsDocHitInfoIteratorCallStats(
493           /*num_leaf_advance_calls_lite_index=*/2,
494           /*num_leaf_advance_calls_main_index=*/2,
495           /*num_leaf_advance_calls_integer_index=*/0,
496           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
497 
498   // 5th Advance(). Reach the end.
499   ASSERT_THAT(itr->Advance(),
500               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
501   EXPECT_THAT(
502       itr->GetCallStats(),
503       EqualsDocHitInfoIteratorCallStats(
504           /*num_leaf_advance_calls_lite_index=*/2,
505           /*num_leaf_advance_calls_main_index=*/2,
506           /*num_leaf_advance_calls_integer_index=*/0,
507           /*num_leaf_advance_calls_no_index=*/0, /*num_blocks_inspected=*/1));
508 }
509 
TEST_F(IndexTest,SingleHitSingleTermIndex)510 TEST_F(IndexTest, SingleHitSingleTermIndex) {
511   Index::Editor edit =
512       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
513   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
514   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
515 
516   ICING_ASSERT_OK_AND_ASSIGN(
517       std::unique_ptr<DocHitInfoIterator> itr,
518       index_->GetIterator("foo", /*term_start_index=*/0,
519                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
520                           TermMatchType::EXACT_ONLY));
521   EXPECT_THAT(GetHits(std::move(itr)),
522               ElementsAre(EqualsDocHitInfo(
523                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
524 }
525 
TEST_F(IndexTest,SingleHitSingleTermIndexAfterMerge)526 TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
527   Index::Editor edit =
528       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
529   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
530   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
531 
532   ICING_ASSERT_OK(index_->Merge());
533 
534   ICING_ASSERT_OK_AND_ASSIGN(
535       std::unique_ptr<DocHitInfoIterator> itr,
536       index_->GetIterator("foo", /*term_start_index=*/0,
537                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
538                           TermMatchType::EXACT_ONLY));
539   EXPECT_THAT(GetHits(std::move(itr)),
540               ElementsAre(EqualsDocHitInfo(
541                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
542 }
543 
TEST_F(IndexTest,SingleHitSingleTermIndexAfterOptimize)544 TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) {
545   Index::Editor edit =
546       index_->Edit(kDocumentId2, kSectionId2, /*namespace_id=*/0);
547   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
548   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
549   index_->set_last_added_document_id(kDocumentId2);
550 
551   ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
552                                    /*new_last_added_document_id=*/2));
553   EXPECT_THAT(
554       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
555               TermMatchType::EXACT_ONLY),
556       IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
557           kDocumentId2, std::vector<SectionId>{kSectionId2}))));
558   EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
559 
560   // Mapping to a different docid will translate the hit
561   ICING_ASSERT_OK(index_->Optimize(
562       /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
563       /*new_last_added_document_id=*/1));
564   EXPECT_THAT(
565       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
566               TermMatchType::EXACT_ONLY),
567       IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
568           kDocumentId1, std::vector<SectionId>{kSectionId2}))));
569   EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
570 
571   // Mapping to kInvalidDocumentId will remove the hit.
572   ICING_ASSERT_OK(
573       index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
574                        /*new_last_added_document_id=*/0));
575   EXPECT_THAT(
576       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
577               TermMatchType::EXACT_ONLY),
578       IsOkAndHolds(IsEmpty()));
579   EXPECT_EQ(index_->last_added_document_id(), kDocumentId0);
580 }
581 
TEST_F(IndexTest,SingleHitSingleTermIndexAfterMergeAndOptimize)582 TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) {
583   Index::Editor edit =
584       index_->Edit(kDocumentId2, kSectionId2, /*namespace_id=*/0);
585   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
586   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
587   index_->set_last_added_document_id(kDocumentId2);
588 
589   ICING_ASSERT_OK(index_->Merge());
590 
591   ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
592                                    /*new_last_added_document_id=*/2));
593   EXPECT_THAT(
594       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
595               TermMatchType::EXACT_ONLY),
596       IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
597           kDocumentId2, std::vector<SectionId>{kSectionId2}))));
598   EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
599 
600   // Mapping to a different docid will translate the hit
601   ICING_ASSERT_OK(index_->Optimize(
602       /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1},
603       /*new_last_added_document_id=*/1));
604   EXPECT_THAT(
605       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
606               TermMatchType::EXACT_ONLY),
607       IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
608           kDocumentId1, std::vector<SectionId>{kSectionId2}))));
609   EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
610 
611   // Mapping to kInvalidDocumentId will remove the hit.
612   ICING_ASSERT_OK(
613       index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId},
614                        /*new_last_added_document_id=*/0));
615   EXPECT_THAT(
616       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
617               TermMatchType::EXACT_ONLY),
618       IsOkAndHolds(IsEmpty()));
619   EXPECT_EQ(index_->last_added_document_id(), 0);
620 }
621 
TEST_F(IndexTest,SingleHitMultiTermIndex)622 TEST_F(IndexTest, SingleHitMultiTermIndex) {
623   Index::Editor edit =
624       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
625   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
626   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
627   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
628 
629   ICING_ASSERT_OK_AND_ASSIGN(
630       std::unique_ptr<DocHitInfoIterator> itr,
631       index_->GetIterator("foo", /*term_start_index=*/0,
632                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
633                           TermMatchType::EXACT_ONLY));
634   EXPECT_THAT(GetHits(std::move(itr)),
635               ElementsAre(EqualsDocHitInfo(
636                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
637 }
638 
TEST_F(IndexTest,SingleHitMultiTermIndexAfterMerge)639 TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
640   Index::Editor edit =
641       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
642   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
643   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
644   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
645 
646   ICING_ASSERT_OK(index_->Merge());
647 
648   ICING_ASSERT_OK_AND_ASSIGN(
649       std::unique_ptr<DocHitInfoIterator> itr,
650       index_->GetIterator("foo", /*term_start_index=*/0,
651                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
652                           TermMatchType::EXACT_ONLY));
653   EXPECT_THAT(GetHits(std::move(itr)),
654               ElementsAre(EqualsDocHitInfo(
655                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
656 }
657 
TEST_F(IndexTest,MultiHitMultiTermIndexAfterOptimize)658 TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) {
659   Index::Editor edit =
660       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
661   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
662   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
663 
664   edit = index_->Edit(kDocumentId1, kSectionId2,
665                       /*namespace_id=*/0);
666   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
667   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
668 
669   edit = index_->Edit(kDocumentId2, kSectionId3,
670                       /*namespace_id=*/0);
671   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
672   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
673   index_->set_last_added_document_id(kDocumentId2);
674 
675   ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
676                                    /*new_last_added_document_id=*/2));
677   EXPECT_THAT(
678       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
679               TermMatchType::EXACT_ONLY),
680       IsOkAndHolds(ElementsAre(
681           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
682           EqualsDocHitInfo(kDocumentId0,
683                            std::vector<SectionId>{kSectionId2}))));
684   EXPECT_THAT(
685       GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
686               TermMatchType::EXACT_ONLY),
687       IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
688           kDocumentId1, std::vector<SectionId>{kSectionId2}))));
689   EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
690 
691   // Delete document id 1, and document id 2 is translated to 1.
692   ICING_ASSERT_OK(
693       index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
694                        /*new_last_added_document_id=*/1));
695   EXPECT_THAT(
696       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
697               TermMatchType::EXACT_ONLY),
698       IsOkAndHolds(ElementsAre(
699           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
700           EqualsDocHitInfo(kDocumentId0,
701                            std::vector<SectionId>{kSectionId2}))));
702   EXPECT_THAT(
703       GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
704               TermMatchType::EXACT_ONLY),
705       IsOkAndHolds(IsEmpty()));
706   EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
707 
708   // Delete all the rest documents.
709   ICING_ASSERT_OK(index_->Optimize(
710       /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
711       /*new_last_added_document_id=*/kInvalidDocumentId));
712   EXPECT_THAT(
713       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
714               TermMatchType::EXACT_ONLY),
715       IsOkAndHolds(IsEmpty()));
716   EXPECT_THAT(
717       GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
718               TermMatchType::EXACT_ONLY),
719       IsOkAndHolds(IsEmpty()));
720   EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
721 }
722 
TEST_F(IndexTest,MultiHitMultiTermIndexAfterMergeAndOptimize)723 TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) {
724   Index::Editor edit =
725       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
726   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
727   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
728 
729   edit = index_->Edit(kDocumentId1, kSectionId2,
730                       /*namespace_id=*/0);
731   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
732   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
733 
734   edit = index_->Edit(kDocumentId2, kSectionId3,
735                       /*namespace_id=*/0);
736   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
737   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
738   index_->set_last_added_document_id(kDocumentId2);
739 
740   ICING_ASSERT_OK(index_->Merge());
741 
742   ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2},
743                                    /*new_last_added_document_id=*/2));
744   EXPECT_THAT(
745       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
746               TermMatchType::EXACT_ONLY),
747       IsOkAndHolds(ElementsAre(
748           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
749           EqualsDocHitInfo(kDocumentId0,
750                            std::vector<SectionId>{kSectionId2}))));
751   EXPECT_THAT(
752       GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
753               TermMatchType::EXACT_ONLY),
754       IsOkAndHolds(ElementsAre(EqualsDocHitInfo(
755           kDocumentId1, std::vector<SectionId>{kSectionId2}))));
756   EXPECT_EQ(index_->last_added_document_id(), kDocumentId2);
757 
758   // Delete document id 1, and document id 2 is translated to 1.
759   ICING_ASSERT_OK(
760       index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
761                        /*new_last_added_document_id=*/1));
762   EXPECT_THAT(
763       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
764               TermMatchType::EXACT_ONLY),
765       IsOkAndHolds(ElementsAre(
766           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
767           EqualsDocHitInfo(kDocumentId0,
768                            std::vector<SectionId>{kSectionId2}))));
769   EXPECT_THAT(
770       GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
771               TermMatchType::EXACT_ONLY),
772       IsOkAndHolds(IsEmpty()));
773   EXPECT_EQ(index_->last_added_document_id(), kDocumentId1);
774 
775   // Delete all the rest documents.
776   ICING_ASSERT_OK(index_->Optimize(
777       /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId},
778       /*new_last_added_document_id=*/kInvalidDocumentId));
779   EXPECT_THAT(
780       GetHits("foo", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
781               TermMatchType::EXACT_ONLY),
782       IsOkAndHolds(IsEmpty()));
783   EXPECT_THAT(
784       GetHits("bar", /*term_start_index=*/0, /*unnormalized_term_length=*/0,
785               TermMatchType::EXACT_ONLY),
786       IsOkAndHolds(IsEmpty()));
787   EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
788 }
789 
TEST_F(IndexTest,NoHitMultiTermIndex)790 TEST_F(IndexTest, NoHitMultiTermIndex) {
791   Index::Editor edit =
792       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
793   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
794   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
795   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
796 
797   ICING_ASSERT_OK_AND_ASSIGN(
798       std::unique_ptr<DocHitInfoIterator> itr,
799       index_->GetIterator("baz", /*term_start_index=*/0,
800                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
801                           TermMatchType::EXACT_ONLY));
802   EXPECT_THAT(itr->Advance(),
803               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
804 }
805 
TEST_F(IndexTest,NoHitMultiTermIndexAfterMerge)806 TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
807   Index::Editor edit =
808       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
809   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
810   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
811   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
812 
813   ICING_ASSERT_OK(index_->Merge());
814 
815   ICING_ASSERT_OK_AND_ASSIGN(
816       std::unique_ptr<DocHitInfoIterator> itr,
817       index_->GetIterator("baz", /*term_start_index=*/0,
818                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
819                           TermMatchType::EXACT_ONLY));
820   EXPECT_THAT(itr->Advance(),
821               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
822 }
823 
TEST_F(IndexTest,MultiHitMultiTermIndex)824 TEST_F(IndexTest, MultiHitMultiTermIndex) {
825   Index::Editor edit =
826       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
827   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
828   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
829 
830   edit = index_->Edit(kDocumentId1, kSectionId2,
831                       /*namespace_id=*/0);
832   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
833   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
834 
835   edit = index_->Edit(kDocumentId2, kSectionId3,
836                       /*namespace_id=*/0);
837   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
838   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
839 
840   ICING_ASSERT_OK_AND_ASSIGN(
841       std::unique_ptr<DocHitInfoIterator> itr,
842       index_->GetIterator("foo", /*term_start_index=*/0,
843                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
844                           TermMatchType::EXACT_ONLY));
845   EXPECT_THAT(
846       GetHits(std::move(itr)),
847       ElementsAre(
848           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
849           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
850 }
851 
TEST_F(IndexTest,MultiHitMultiTermIndexAfterMerge)852 TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
853   Index::Editor edit =
854       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
855   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
856   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
857 
858   edit = index_->Edit(kDocumentId1, kSectionId2,
859                       /*namespace_id=*/0);
860   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
861   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
862 
863   edit = index_->Edit(kDocumentId2, kSectionId3,
864                       /*namespace_id=*/0);
865   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
866   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
867 
868   ICING_ASSERT_OK(index_->Merge());
869 
870   ICING_ASSERT_OK_AND_ASSIGN(
871       std::unique_ptr<DocHitInfoIterator> itr,
872       index_->GetIterator("foo", /*term_start_index=*/0,
873                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
874                           TermMatchType::EXACT_ONLY));
875   EXPECT_THAT(
876       GetHits(std::move(itr)),
877       ElementsAre(
878           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
879           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
880 }
881 
TEST_F(IndexTest,MultiHitSectionRestrict)882 TEST_F(IndexTest, MultiHitSectionRestrict) {
883   Index::Editor edit =
884       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
885   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
886   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
887 
888   edit = index_->Edit(kDocumentId1, kSectionId3,
889                       /*namespace_id=*/0);
890   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
891   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
892 
893   SectionIdMask desired_section = 1U << kSectionId2;
894   ICING_ASSERT_OK_AND_ASSIGN(
895       std::unique_ptr<DocHitInfoIterator> itr,
896       index_->GetIterator("foo", /*term_start_index=*/0,
897                           /*unnormalized_term_length=*/0, desired_section,
898                           TermMatchType::EXACT_ONLY));
899   EXPECT_THAT(GetHits(std::move(itr)),
900               ElementsAre(EqualsDocHitInfo(
901                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
902 }
903 
TEST_F(IndexTest,MultiHitSectionRestrictAfterMerge)904 TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
905   Index::Editor edit =
906       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
907   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
908   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
909 
910   edit = index_->Edit(kDocumentId1, kSectionId3,
911                       /*namespace_id=*/0);
912   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
913   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
914 
915   ICING_ASSERT_OK(index_->Merge());
916 
917   SectionIdMask desired_section = 1U << kSectionId2;
918   ICING_ASSERT_OK_AND_ASSIGN(
919       std::unique_ptr<DocHitInfoIterator> itr,
920       index_->GetIterator("foo", /*term_start_index=*/0,
921                           /*unnormalized_term_length=*/0, desired_section,
922                           TermMatchType::EXACT_ONLY));
923   EXPECT_THAT(GetHits(std::move(itr)),
924               ElementsAre(EqualsDocHitInfo(
925                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
926 }
927 
TEST_F(IndexTest,SingleHitDedupeIndex)928 TEST_F(IndexTest, SingleHitDedupeIndex) {
929   ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
930   EXPECT_THAT(size, Eq(0));
931   Index::Editor edit =
932       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
933   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
934   ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
935   EXPECT_THAT(size, Gt(0));
936   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
937   ICING_ASSERT_OK_AND_ASSIGN(int64_t new_size, index_->GetElementsSize());
938   EXPECT_THAT(new_size, Eq(size));
939   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
940 
941   ICING_ASSERT_OK_AND_ASSIGN(
942       std::unique_ptr<DocHitInfoIterator> itr,
943       index_->GetIterator("foo", /*term_start_index=*/0,
944                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
945                           TermMatchType::EXACT_ONLY));
946   EXPECT_THAT(GetHits(std::move(itr)),
947               ElementsAre(EqualsDocHitInfo(
948                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
949 }
950 
TEST_F(IndexTest,PrefixHit)951 TEST_F(IndexTest, PrefixHit) {
952   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
953                                     /*namespace_id=*/0);
954   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
955   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
956 
957   ICING_ASSERT_OK_AND_ASSIGN(
958       std::unique_ptr<DocHitInfoIterator> itr,
959       index_->GetIterator("foo", /*term_start_index=*/0,
960                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
961                           TermMatchType::PREFIX));
962   EXPECT_THAT(GetHits(std::move(itr)),
963               ElementsAre(EqualsDocHitInfo(
964                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
965 }
966 
TEST_F(IndexTest,PrefixHitAfterMerge)967 TEST_F(IndexTest, PrefixHitAfterMerge) {
968   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
969                                     /*namespace_id=*/0);
970   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
971   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
972 
973   ICING_ASSERT_OK(index_->Merge());
974 
975   ICING_ASSERT_OK_AND_ASSIGN(
976       std::unique_ptr<DocHitInfoIterator> itr,
977       index_->GetIterator("foo", /*term_start_index=*/0,
978                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
979                           TermMatchType::PREFIX));
980   EXPECT_THAT(GetHits(std::move(itr)),
981               ElementsAre(EqualsDocHitInfo(
982                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
983 }
984 
TEST_F(IndexTest,MultiPrefixHit)985 TEST_F(IndexTest, MultiPrefixHit) {
986   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
987                                     /*namespace_id=*/0);
988   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
989   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
990 
991   edit = index_->Edit(kDocumentId1, kSectionId3,
992                       /*namespace_id=*/0);
993   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
994   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
995 
996   ICING_ASSERT_OK_AND_ASSIGN(
997       std::unique_ptr<DocHitInfoIterator> itr,
998       index_->GetIterator("foo", /*term_start_index=*/0,
999                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1000                           TermMatchType::PREFIX));
1001   EXPECT_THAT(
1002       GetHits(std::move(itr)),
1003       ElementsAre(
1004           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
1005           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
1006 }
1007 
TEST_F(IndexTest,MultiPrefixHitAfterMerge)1008 TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
1009   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1010                                     /*namespace_id=*/0);
1011   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1012   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1013 
1014   edit = index_->Edit(kDocumentId1, kSectionId3,
1015                       /*namespace_id=*/0);
1016   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1017   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1018 
1019   ICING_ASSERT_OK(index_->Merge());
1020 
1021   ICING_ASSERT_OK_AND_ASSIGN(
1022       std::unique_ptr<DocHitInfoIterator> itr,
1023       index_->GetIterator("foo", /*term_start_index=*/0,
1024                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1025                           TermMatchType::PREFIX));
1026   EXPECT_THAT(
1027       GetHits(std::move(itr)),
1028       ElementsAre(
1029           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
1030           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
1031 }
1032 
TEST_F(IndexTest,NoExactHitInPrefixQuery)1033 TEST_F(IndexTest, NoExactHitInPrefixQuery) {
1034   Index::Editor edit =
1035       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1036   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1037   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1038 
1039   edit = index_->Edit(kDocumentId1, kSectionId3,
1040                       /*namespace_id=*/0);
1041   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1042   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1043 
1044   ICING_ASSERT_OK_AND_ASSIGN(
1045       std::unique_ptr<DocHitInfoIterator> itr,
1046       index_->GetIterator("foo", /*term_start_index=*/0,
1047                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1048                           TermMatchType::PREFIX));
1049   EXPECT_THAT(GetHits(std::move(itr)),
1050               ElementsAre(EqualsDocHitInfo(
1051                   kDocumentId1, std::vector<SectionId>{kSectionId3})));
1052 }
1053 
TEST_F(IndexTest,NoExactHitInPrefixQueryAfterMerge)1054 TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
1055   Index::Editor edit =
1056       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1057   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1058   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1059 
1060   edit = index_->Edit(kDocumentId1, kSectionId3,
1061                       /*namespace_id=*/0);
1062   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1063   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1064 
1065   ICING_ASSERT_OK(index_->Merge());
1066 
1067   ICING_ASSERT_OK_AND_ASSIGN(
1068       std::unique_ptr<DocHitInfoIterator> itr,
1069       index_->GetIterator("foo", /*term_start_index=*/0,
1070                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1071                           TermMatchType::PREFIX));
1072   EXPECT_THAT(GetHits(std::move(itr)),
1073               ElementsAre(EqualsDocHitInfo(
1074                   kDocumentId1, std::vector<SectionId>{kSectionId3})));
1075 }
1076 
TEST_F(IndexTest,PrefixHitDedupe)1077 TEST_F(IndexTest, PrefixHitDedupe) {
1078   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1079                                     /*namespace_id=*/0);
1080   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1081   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1082   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1083 
1084   ICING_ASSERT_OK_AND_ASSIGN(
1085       std::unique_ptr<DocHitInfoIterator> itr,
1086       index_->GetIterator("foo", /*term_start_index=*/0,
1087                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1088                           TermMatchType::PREFIX));
1089   EXPECT_THAT(GetHits(std::move(itr)),
1090               ElementsAre(EqualsDocHitInfo(
1091                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1092 }
1093 
TEST_F(IndexTest,PrefixHitDedupeAfterMerge)1094 TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
1095   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1096                                     /*namespace_id=*/0);
1097   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1098   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1099   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1100 
1101   ICING_ASSERT_OK(index_->Merge());
1102 
1103   ICING_ASSERT_OK_AND_ASSIGN(
1104       std::unique_ptr<DocHitInfoIterator> itr,
1105       index_->GetIterator("foo", /*term_start_index=*/0,
1106                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1107                           TermMatchType::PREFIX));
1108   EXPECT_THAT(GetHits(std::move(itr)),
1109               ElementsAre(EqualsDocHitInfo(
1110                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1111 }
1112 
TEST_F(IndexTest,PrefixToString)1113 TEST_F(IndexTest, PrefixToString) {
1114   SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
1115   ICING_ASSERT_OK_AND_ASSIGN(
1116       std::unique_ptr<DocHitInfoIterator> itr,
1117       index_->GetIterator("foo", /*term_start_index=*/0,
1118                           /*unnormalized_term_length=*/0, id_mask,
1119                           TermMatchType::PREFIX));
1120   EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1121                                   "000000000000000001100:foo* OR "
1122                                   "00000000000000000000000000000000000000000000"
1123                                   "00000000000000001100:foo*)"));
1124 
1125   ICING_ASSERT_OK_AND_ASSIGN(
1126       itr, index_->GetIterator("foo", /*term_start_index=*/0,
1127                                /*unnormalized_term_length=*/0,
1128                                kSectionIdMaskAll, TermMatchType::PREFIX));
1129   EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
1130                                   "111111111111111111111:foo* OR "
1131                                   "11111111111111111111111111111111111111111111"
1132                                   "11111111111111111111:foo*)"));
1133 
1134   ICING_ASSERT_OK_AND_ASSIGN(
1135       itr, index_->GetIterator("foo", /*term_start_index=*/0,
1136                                /*unnormalized_term_length=*/0,
1137                                kSectionIdMaskNone, TermMatchType::PREFIX));
1138   EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1139                                   "000000000000000000000:foo* OR "
1140                                   "00000000000000000000000000000000000000000000"
1141                                   "00000000000000000000:foo*)"));
1142 }
1143 
TEST_F(IndexTest,ExactToString)1144 TEST_F(IndexTest, ExactToString) {
1145   SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
1146   ICING_ASSERT_OK_AND_ASSIGN(
1147       std::unique_ptr<DocHitInfoIterator> itr,
1148       index_->GetIterator("foo", /*term_start_index=*/0,
1149                           /*unnormalized_term_length=*/0, id_mask,
1150                           TermMatchType::EXACT_ONLY));
1151   EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1152                                   "000000000000000001100:foo OR "
1153                                   "00000000000000000000000000000000000000000000"
1154                                   "00000000000000001100:foo)"));
1155 
1156   ICING_ASSERT_OK_AND_ASSIGN(
1157       itr, index_->GetIterator("foo", /*term_start_index=*/0,
1158                                /*unnormalized_term_length=*/0,
1159                                kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
1160   EXPECT_THAT(itr->ToString(), Eq("(1111111111111111111111111111111111111111111"
1161                                   "111111111111111111111:foo OR "
1162                                   "11111111111111111111111111111111111111111111"
1163                                   "11111111111111111111:foo)"));
1164 
1165   ICING_ASSERT_OK_AND_ASSIGN(
1166       itr, index_->GetIterator("foo", /*term_start_index=*/0,
1167                                /*unnormalized_term_length=*/0,
1168                                kSectionIdMaskNone, TermMatchType::EXACT_ONLY));
1169   EXPECT_THAT(itr->ToString(), Eq("(0000000000000000000000000000000000000000000"
1170                                   "000000000000000000000:foo OR "
1171                                   "00000000000000000000000000000000000000000000"
1172                                   "00000000000000000000:foo)"));
1173 }
1174 
TEST_F(IndexTest,NonAsciiTerms)1175 TEST_F(IndexTest, NonAsciiTerms) {
1176   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1177                                     /*namespace_id=*/0);
1178   ASSERT_THAT(edit.BufferTerm("こんにちは", TermMatchType::PREFIX), IsOk());
1179   ASSERT_THAT(edit.BufferTerm("あなた", TermMatchType::PREFIX), IsOk());
1180   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1181 
1182   ICING_ASSERT_OK_AND_ASSIGN(
1183       std::unique_ptr<DocHitInfoIterator> itr,
1184       index_->GetIterator("こんに", /*term_start_index=*/0,
1185                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1186                           TermMatchType::PREFIX));
1187   EXPECT_THAT(GetHits(std::move(itr)),
1188               ElementsAre(EqualsDocHitInfo(
1189                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1190 
1191   ICING_ASSERT_OK_AND_ASSIGN(
1192       itr, index_->GetIterator("あなた", /*term_start_index=*/0,
1193                                /*unnormalized_term_length=*/0,
1194                                kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
1195   EXPECT_THAT(GetHits(std::move(itr)),
1196               ElementsAre(EqualsDocHitInfo(
1197                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1198 }
1199 
TEST_F(IndexTest,NonAsciiTermsAfterMerge)1200 TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
1201   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1202                                     /*namespace_id=*/0);
1203   ASSERT_THAT(edit.BufferTerm("こんにちは", TermMatchType::PREFIX), IsOk());
1204   ASSERT_THAT(edit.BufferTerm("あなた", TermMatchType::PREFIX), IsOk());
1205   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1206 
1207   ICING_ASSERT_OK(index_->Merge());
1208 
1209   ICING_ASSERT_OK_AND_ASSIGN(
1210       std::unique_ptr<DocHitInfoIterator> itr,
1211       index_->GetIterator("こんに", /*term_start_index=*/0,
1212                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1213                           TermMatchType::PREFIX));
1214   EXPECT_THAT(GetHits(std::move(itr)),
1215               ElementsAre(EqualsDocHitInfo(
1216                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1217 
1218   ICING_ASSERT_OK_AND_ASSIGN(
1219       itr, index_->GetIterator("あなた", /*term_start_index=*/0,
1220                                /*unnormalized_term_length=*/0,
1221                                kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
1222   EXPECT_THAT(GetHits(std::move(itr)),
1223               ElementsAre(EqualsDocHitInfo(
1224                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1225 }
1226 
TEST_F(IndexTest,FullIndex)1227 TEST_F(IndexTest, FullIndex) {
1228   // Make a smaller index so that it's easier to fill up.
1229   Index::Options options(index_dir_, /*index_merge_size=*/1024,
1230                          /*lite_index_sort_at_indexing=*/true,
1231                          /*lite_index_sort_size=*/64);
1232   ICING_ASSERT_OK_AND_ASSIGN(
1233       index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1234 
1235   std::default_random_engine random;
1236   std::vector<std::string> query_terms;
1237   std::string prefix = "prefix";
1238   for (int i = 0; i < 2600; ++i) {
1239     constexpr int kTokenSize = 5;
1240     query_terms.push_back(prefix +
1241                           RandomString(kAlNumAlphabet, kTokenSize, &random));
1242   }
1243 
1244   DocumentId document_id = 0;
1245   libtextclassifier3::Status status = libtextclassifier3::Status::OK;
1246   std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
1247   while (status.ok()) {
1248     for (int i = 0; i < 100; ++i) {
1249       Index::Editor edit = index_->Edit(document_id, kSectionId2,
1250                                         /*namespace_id=*/0);
1251       size_t idx = uniform(random);
1252       status =
1253           edit.BufferTerm(query_terms.at(idx).c_str(), TermMatchType::PREFIX);
1254       if (!status.ok()) {
1255         break;
1256       }
1257       status = edit.IndexAllBufferedTerms();
1258       if (!status.ok()) {
1259         break;
1260       }
1261     }
1262     ++document_id;
1263   }
1264 
1265   // Adding more hits should fail.
1266   Index::Editor edit = index_->Edit(document_id + 1, kSectionId2,
1267                                     /*namespace_id=*/0);
1268   std::string term = prefix + "foo";
1269   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1270   term = prefix + "bar";
1271   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1272   term = prefix + "baz";
1273   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1274   EXPECT_THAT(edit.IndexAllBufferedTerms(),
1275               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1276 
1277   for (int i = 0; i < query_terms.size(); i += 25) {
1278     ICING_ASSERT_OK_AND_ASSIGN(
1279         std::unique_ptr<DocHitInfoIterator> itr,
1280         index_->GetIterator(query_terms.at(i).c_str(), /*term_start_index=*/0,
1281                             /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1282                             TermMatchType::PREFIX));
1283     // Each query term should contain at least one hit - there may have been
1284     // other hits for this term that were added.
1285     EXPECT_THAT(itr->Advance(), IsOk());
1286   }
1287   ICING_ASSERT_OK_AND_ASSIGN(
1288       std::unique_ptr<DocHitInfoIterator> last_itr,
1289       index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
1290                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1291                           TermMatchType::PREFIX));
1292   EXPECT_THAT(last_itr->Advance(), IsOk());
1293   EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
1294 }
1295 
TEST_F(IndexTest,FullIndexMerge)1296 TEST_F(IndexTest, FullIndexMerge) {
1297   // Make a smaller index so that it's easier to fill up.
1298   Index::Options options(index_dir_, /*index_merge_size=*/1024,
1299                          /*lite_index_sort_at_indexing=*/true,
1300                          /*lite_index_sort_size=*/64);
1301   ICING_ASSERT_OK_AND_ASSIGN(
1302       index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1303 
1304   std::default_random_engine random;
1305   std::vector<std::string> query_terms;
1306   std::string prefix = "prefix";
1307   for (int i = 0; i < 2600; ++i) {
1308     constexpr int kTokenSize = 5;
1309     query_terms.push_back(prefix +
1310                           RandomString(kAlNumAlphabet, kTokenSize, &random));
1311   }
1312 
1313   DocumentId document_id = 0;
1314   libtextclassifier3::Status status = libtextclassifier3::Status::OK;
1315   std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
1316   while (status.ok()) {
1317     for (int i = 0; i < 100; ++i) {
1318       Index::Editor edit = index_->Edit(document_id, kSectionId2,
1319                                         /*namespace_id=*/0);
1320       size_t idx = uniform(random);
1321       status =
1322           edit.BufferTerm(query_terms.at(idx).c_str(), TermMatchType::PREFIX);
1323       if (!status.ok()) {
1324         break;
1325       }
1326       status = edit.IndexAllBufferedTerms();
1327       if (!status.ok()) {
1328         break;
1329       }
1330     }
1331     ++document_id;
1332   }
1333   EXPECT_THAT(status,
1334               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1335 
1336   // Adding more hits should fail.
1337   Index::Editor edit = index_->Edit(document_id + 1, kSectionId2,
1338                                     /*namespace_id=*/0);
1339   std::string term = prefix + "foo";
1340   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1341   term = prefix + "bar";
1342   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1343   term = prefix + "baz";
1344   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1345   EXPECT_THAT(edit.IndexAllBufferedTerms(),
1346               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1347   ICING_ASSERT_OK_AND_ASSIGN(
1348       std::unique_ptr<DocHitInfoIterator> last_itr,
1349       index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
1350                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1351                           TermMatchType::PREFIX));
1352   EXPECT_THAT(last_itr->Advance(), IsOk());
1353   EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
1354 
1355   // After merging with the main index. Adding more hits should succeed now.
1356   ICING_ASSERT_OK(index_->Merge());
1357   edit = index_->Edit(document_id + 1, kSectionId2, 0);
1358   prefix + "foo";
1359   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1360   term = prefix + "bar";
1361   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1362   term = prefix + "baz";
1363   EXPECT_THAT(edit.BufferTerm(term.c_str(), TermMatchType::PREFIX), IsOk());
1364   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1365   ICING_ASSERT_OK_AND_ASSIGN(
1366       std::unique_ptr<DocHitInfoIterator> itr,
1367       index_->GetIterator(prefix + "bar", /*term_start_index=*/0,
1368                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1369                           TermMatchType::EXACT_ONLY));
1370   // We know that "bar" should have at least one hit because we just added it!
1371   EXPECT_THAT(itr->Advance(), IsOk());
1372   EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
1373   ICING_ASSERT_OK_AND_ASSIGN(
1374       last_itr, index_->GetIterator(prefix.c_str(), /*term_start_index=*/0,
1375                                     /*unnormalized_term_length=*/0,
1376                                     kSectionIdMaskAll, TermMatchType::PREFIX));
1377   EXPECT_THAT(last_itr->Advance(), IsOk());
1378   EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id + 1));
1379 }
1380 
TEST_F(IndexTest,OptimizeShouldWorkForEmptyIndex)1381 TEST_F(IndexTest, OptimizeShouldWorkForEmptyIndex) {
1382   // Optimize an empty index should succeed, but have no effects.
1383   ICING_ASSERT_OK(
1384       index_->Optimize(std::vector<DocumentId>(),
1385                        /*new_last_added_document_id=*/kInvalidDocumentId));
1386   EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId);
1387 
1388   ICING_ASSERT_OK_AND_ASSIGN(
1389       std::unique_ptr<DocHitInfoIterator> itr,
1390       index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
1391                           /*unnormalized_term_length=*/0,
1392                           TermMatchType::EXACT_ONLY));
1393   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
1394 
1395   ICING_ASSERT_OK_AND_ASSIGN(
1396       itr, index_->GetIterator("", kSectionIdMaskAll, /*term_start_index=*/0,
1397                                /*unnormalized_term_length=*/0,
1398                                TermMatchType::PREFIX));
1399   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
1400 }
1401 
TEST_F(IndexTest,IndexShouldWorkAtSectionLimit)1402 TEST_F(IndexTest, IndexShouldWorkAtSectionLimit) {
1403   std::string prefix = "prefix";
1404   std::default_random_engine random;
1405   std::vector<std::string> query_terms;
1406   // Add 2048 hits to main index, and 2048 hits to lite index.
1407   for (int i = 0; i < 4096; ++i) {
1408     if (i == 1024) {
1409       ICING_ASSERT_OK(index_->Merge());
1410     }
1411     // Generate a unique term for document i.
1412     query_terms.push_back(prefix + RandomString("abcdefg", 5, &random) +
1413                           std::to_string(i));
1414     TermMatchType::Code term_match_type = TermMatchType::PREFIX;
1415     SectionId section_id = i % 64;
1416     if (section_id == 2) {
1417       // Make section 2 an exact section.
1418       term_match_type = TermMatchType::EXACT_ONLY;
1419     }
1420     Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
1421                                       /*namespace_id=*/0);
1422     ICING_ASSERT_OK(
1423         edit.BufferTerm(query_terms.at(i).c_str(), term_match_type));
1424     ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
1425   }
1426 
1427   std::vector<DocHitInfo> exp_prefix_hits;
1428   for (int i = 0; i < 4096; ++i) {
1429     if (i % 64 == 2) {
1430       // Section 2 is an exact section, so we should not see any hits in
1431       // prefix search.
1432       continue;
1433     }
1434     exp_prefix_hits.push_back(DocHitInfo(i));
1435     exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
1436   }
1437   std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
1438 
1439   // Check prefix search.
1440   ICING_ASSERT_OK_AND_ASSIGN(
1441       std::vector<DocHitInfo> hits,
1442       GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
1443               TermMatchType::PREFIX));
1444   EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1445 
1446   // Check exact search.
1447   for (int i = 0; i < 4096; ++i) {
1448     ICING_ASSERT_OK_AND_ASSIGN(
1449         hits,
1450         GetHits(query_terms[i], /*term_start_index=*/0,
1451                 /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1452     EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1453                           i, std::vector<SectionId>{(SectionId)(i % 64)})));
1454   }
1455 }
1456 
1457 // Skip this test on Android because of timeout.
1458 #if !defined(__ANDROID__)
TEST_F(IndexTest,IndexShouldWorkAtDocumentLimit)1459 TEST_F(IndexTest, IndexShouldWorkAtDocumentLimit) {
1460   std::string prefix = "pre";
1461   std::default_random_engine random;
1462   const int max_lite_index_size = 1024 * 1024 / 8;
1463   int lite_index_size = 0;
1464   for (int i = 0; i <= kMaxDocumentId; ++i) {
1465     if (i % max_lite_index_size == 0 && i != 0) {
1466       ICING_ASSERT_OK(index_->Merge());
1467       lite_index_size = 0;
1468     }
1469     std::string term;
1470     TermMatchType::Code term_match_type = TermMatchType::PREFIX;
1471     SectionId section_id = i % 64;
1472     if (section_id == 2) {
1473       // Make section 2 an exact section.
1474       term_match_type = TermMatchType::EXACT_ONLY;
1475       term = std::to_string(i);
1476     } else {
1477       term = prefix + RandomString("abcd", 5, &random);
1478     }
1479     Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
1480                                       /*namespace_id=*/0);
1481     ICING_ASSERT_OK(edit.BufferTerm(term.c_str(), term_match_type));
1482     ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
1483     ++lite_index_size;
1484     index_->set_last_added_document_id(i);
1485   }
1486   // Ensure that the lite index still contains some data to better test both
1487   // indexes.
1488   ASSERT_THAT(lite_index_size, Eq(max_lite_index_size - 1));
1489   EXPECT_EQ(index_->last_added_document_id(), kMaxDocumentId);
1490 
1491   std::vector<DocHitInfo> exp_prefix_hits;
1492   for (int i = 0; i <= kMaxDocumentId; ++i) {
1493     if (i % 64 == 2) {
1494       // Section 2 is an exact section, so we should not see any hits in
1495       // prefix search.
1496       continue;
1497     }
1498     exp_prefix_hits.push_back(DocHitInfo(i));
1499     exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
1500   }
1501   std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
1502 
1503   // Check prefix search.
1504   ICING_ASSERT_OK_AND_ASSIGN(
1505       std::vector<DocHitInfo> hits,
1506       GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
1507               TermMatchType::PREFIX));
1508   EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1509 
1510   // Check exact search.
1511   for (int i = 0; i <= kMaxDocumentId; ++i) {
1512     if (i % 64 == 2) {
1513       // Only section 2 is an exact section
1514       ICING_ASSERT_OK_AND_ASSIGN(
1515           hits,
1516           GetHits(std::to_string(i), /*term_start_index=*/0,
1517                   /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1518       EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1519                             i, std::vector<SectionId>{(SectionId)(2)})));
1520     }
1521   }
1522 }
1523 #endif  // if !defined(__ANDROID__)
1524 
TEST_F(IndexTest,IndexOptimize)1525 TEST_F(IndexTest, IndexOptimize) {
1526   std::string prefix = "prefix";
1527   std::default_random_engine random;
1528   std::vector<std::string> query_terms;
1529   // Add 1024 hits to main index, and 1024 hits to lite index.
1530   for (int i = 0; i < 2048; ++i) {
1531     if (i == 1024) {
1532       ICING_ASSERT_OK(index_->Merge());
1533     }
1534     // Generate a unique term for document i.
1535     query_terms.push_back(prefix + RandomString("abcdefg", 5, &random) +
1536                           std::to_string(i));
1537     TermMatchType::Code term_match_type = TermMatchType::PREFIX;
1538     SectionId section_id = i % 64;
1539     if (section_id == 2) {
1540       // Make section 2 an exact section.
1541       term_match_type = TermMatchType::EXACT_ONLY;
1542     }
1543     Index::Editor edit = index_->Edit(/*document_id=*/i, section_id,
1544                                       /*namespace_id=*/0);
1545     ICING_ASSERT_OK(
1546         edit.BufferTerm(query_terms.at(i).c_str(), term_match_type));
1547     ICING_ASSERT_OK(edit.IndexAllBufferedTerms());
1548     index_->set_last_added_document_id(i);
1549   }
1550 
1551   // Delete one document for every three documents.
1552   DocumentId document_id = 0;
1553   DocumentId new_last_added_document_id = kInvalidDocumentId;
1554   std::vector<DocumentId> document_id_old_to_new;
1555   for (int i = 0; i < 2048; ++i) {
1556     if (i % 3 == 0) {
1557       document_id_old_to_new.push_back(kInvalidDocumentId);
1558     } else {
1559       new_last_added_document_id = document_id++;
1560       document_id_old_to_new.push_back(new_last_added_document_id);
1561     }
1562   }
1563 
1564   std::vector<DocHitInfo> exp_prefix_hits;
1565   for (int i = 0; i < 2048; ++i) {
1566     if (document_id_old_to_new[i] == kInvalidDocumentId) {
1567       continue;
1568     }
1569     if (i % 64 == 2) {
1570       // Section 2 is an exact section, so we should not see any hits in
1571       // prefix search.
1572       continue;
1573     }
1574     exp_prefix_hits.push_back(DocHitInfo(document_id_old_to_new[i]));
1575     exp_prefix_hits.back().UpdateSection(/*section_id=*/i % 64);
1576   }
1577   std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end());
1578 
1579   // Check that optimize is correct
1580   ICING_ASSERT_OK(
1581       index_->Optimize(document_id_old_to_new, new_last_added_document_id));
1582   EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
1583   // Check prefix search.
1584   ICING_ASSERT_OK_AND_ASSIGN(
1585       std::vector<DocHitInfo> hits,
1586       GetHits(prefix, /*term_start_index=*/0, /*unnormalized_term_length=*/0,
1587               TermMatchType::PREFIX));
1588   EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1589   // Check exact search.
1590   for (int i = 0; i < 2048; ++i) {
1591     ICING_ASSERT_OK_AND_ASSIGN(
1592         hits,
1593         GetHits(query_terms[i], /*term_start_index=*/0,
1594                 /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1595     if (document_id_old_to_new[i] == kInvalidDocumentId) {
1596       EXPECT_THAT(hits, IsEmpty());
1597     } else {
1598       EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1599                             document_id_old_to_new[i],
1600                             std::vector<SectionId>{(SectionId)(i % 64)})));
1601     }
1602   }
1603 
1604   // Check that optimize does not block merge.
1605   ICING_ASSERT_OK(index_->Merge());
1606   EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id);
1607   // Check prefix search.
1608   ICING_ASSERT_OK_AND_ASSIGN(
1609       hits, GetHits(prefix, /*term_start_index=*/0,
1610                     /*unnormalized_term_length=*/0, TermMatchType::PREFIX));
1611   EXPECT_THAT(hits, ContainerEq(exp_prefix_hits));
1612   // Check exact search.
1613   for (int i = 0; i < 2048; ++i) {
1614     ICING_ASSERT_OK_AND_ASSIGN(
1615         hits,
1616         GetHits(query_terms[i], /*term_start_index=*/0,
1617                 /*unnormalized_term_length=*/0, TermMatchType::EXACT_ONLY));
1618     if (document_id_old_to_new[i] == kInvalidDocumentId) {
1619       EXPECT_THAT(hits, IsEmpty());
1620     } else {
1621       EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
1622                             document_id_old_to_new[i],
1623                             std::vector<SectionId>{(SectionId)(i % 64)})));
1624     }
1625   }
1626 }
1627 
TEST_F(IndexTest,IndexCreateIOFailure)1628 TEST_F(IndexTest, IndexCreateIOFailure) {
1629   // Create the index with mock filesystem. By default, Mock will return false,
1630   // so the first attempted file operation will fail.
1631   NiceMock<IcingMockFilesystem> mock_icing_filesystem;
1632   ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
1633       .WillByDefault(Return(false));
1634   Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1635                          /*lite_index_sort_at_indexing=*/true,
1636                          /*lite_index_sort_size=*/1024 * 8);
1637   EXPECT_THAT(Index::Create(options, &filesystem_, &mock_icing_filesystem),
1638               StatusIs(libtextclassifier3::StatusCode::INTERNAL));
1639 }
1640 
TEST_F(IndexTest,IndexCreateCorruptionFailure)1641 TEST_F(IndexTest, IndexCreateCorruptionFailure) {
1642   // Add some content to the index
1643   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1644                                     /*namespace_id=*/0);
1645   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1646   ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1647   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1648 
1649   // Close the index.
1650   index_.reset();
1651 
1652   // Corrrupt the index file.
1653   std::string hit_buffer_filename = index_dir_ + "/idx/lite.hb";
1654   ScopedFd sfd(icing_filesystem_.OpenForWrite(hit_buffer_filename.c_str()));
1655   ASSERT_THAT(sfd.is_valid(), IsTrue());
1656 
1657   constexpr std::string_view kCorruptBytes = "ffffffffffffffffffffff";
1658   // The first page of the hit_buffer is taken up by the header. Overwrite the
1659   // first page of content.
1660   int hit_buffer_start_offset = GetBlockSize();
1661   ASSERT_THAT(
1662       icing_filesystem_.PWrite(sfd.get(), hit_buffer_start_offset,
1663                                kCorruptBytes.data(), kCorruptBytes.length()),
1664       IsTrue());
1665 
1666   // Recreate the index.
1667   Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1668                          /*lite_index_sort_at_indexing=*/true,
1669                          /*lite_index_sort_size=*/1024 * 8);
1670   EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
1671               StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
1672 }
1673 
TEST_F(IndexTest,UpdateChecksum)1674 TEST_F(IndexTest, UpdateChecksum) {
1675   // Add some content to the index
1676   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1677                                     /*namespace_id=*/0);
1678   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1679   ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1680   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1681   Crc32 lite_only_crc = index_->GetChecksum();
1682   EXPECT_THAT(index_->UpdateChecksum(), Eq(lite_only_crc));
1683   EXPECT_THAT(index_->GetChecksum(), Eq(lite_only_crc));
1684 
1685   // Merge content into the main index.
1686   ASSERT_THAT(index_->Merge(), IsOk());
1687   Crc32 main_only_crc = index_->GetChecksum();
1688   EXPECT_THAT(main_only_crc, Not(Eq(lite_only_crc)));
1689   EXPECT_THAT(index_->UpdateChecksum(), Eq(main_only_crc));
1690   EXPECT_THAT(index_->GetChecksum(), Eq(main_only_crc));
1691 
1692   // Add some more content to the lite index
1693   edit = index_->Edit(kDocumentId1, kSectionId2,
1694                       /*namespace_id=*/0);
1695   ASSERT_THAT(edit.BufferTerm("baz", TermMatchType::PREFIX), IsOk());
1696   ASSERT_THAT(edit.BufferTerm("bat", TermMatchType::PREFIX), IsOk());
1697   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1698   Crc32 both_crc = index_->GetChecksum();
1699   EXPECT_THAT(both_crc, Not(Eq(lite_only_crc)));
1700   EXPECT_THAT(both_crc, Not(Eq(main_only_crc)));
1701   EXPECT_THAT(index_->UpdateChecksum(), Eq(both_crc));
1702   EXPECT_THAT(index_->GetChecksum(), Eq(both_crc));
1703 }
1704 
TEST_F(IndexTest,IndexPersistence)1705 TEST_F(IndexTest, IndexPersistence) {
1706   // Add some content to the index
1707   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1708                                     /*namespace_id=*/0);
1709   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1710   ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1711   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1712   EXPECT_THAT(index_->PersistToDisk(), IsOk());
1713 
1714   // Close the index.
1715   index_.reset();
1716 
1717   // Recreate the index.
1718   Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1719                          /*lite_index_sort_at_indexing=*/true,
1720                          /*lite_index_sort_size=*/1024 * 8);
1721   ICING_ASSERT_OK_AND_ASSIGN(
1722       index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1723 
1724   // Check that the hits are present.
1725   ICING_ASSERT_OK_AND_ASSIGN(
1726       std::unique_ptr<DocHitInfoIterator> itr,
1727       index_->GetIterator("f", /*term_start_index=*/0,
1728                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1729                           TermMatchType::PREFIX));
1730   EXPECT_THAT(GetHits(std::move(itr)),
1731               ElementsAre(EqualsDocHitInfo(
1732                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1733 }
1734 
TEST_F(IndexTest,IndexPersistenceAfterMerge)1735 TEST_F(IndexTest, IndexPersistenceAfterMerge) {
1736   // Add some content to the index
1737   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1738                                     /*namespace_id=*/0);
1739   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
1740   ASSERT_THAT(edit.BufferTerm("bar", TermMatchType::PREFIX), IsOk());
1741   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1742   ICING_ASSERT_OK(index_->Merge());
1743   EXPECT_THAT(index_->PersistToDisk(), IsOk());
1744 
1745   // Close the index.
1746   index_.reset();
1747 
1748   // Recreate the index.
1749   Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024,
1750                          /*lite_index_sort_at_indexing=*/true,
1751                          /*lite_index_sort_size=*/1024 * 8);
1752   ICING_ASSERT_OK_AND_ASSIGN(
1753       index_, Index::Create(options, &filesystem_, &icing_filesystem_));
1754 
1755   // Check that the hits are present.
1756   ICING_ASSERT_OK_AND_ASSIGN(
1757       std::unique_ptr<DocHitInfoIterator> itr,
1758       index_->GetIterator("f", /*term_start_index=*/0,
1759                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
1760                           TermMatchType::PREFIX));
1761   EXPECT_THAT(GetHits(std::move(itr)),
1762               ElementsAre(EqualsDocHitInfo(
1763                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
1764 }
1765 
TEST_F(IndexTest,InvalidHitBufferSize)1766 TEST_F(IndexTest, InvalidHitBufferSize) {
1767   Index::Options options(
1768       index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max(),
1769       /*lite_index_sort_at_indexing=*/true, /*lite_index_sort_size=*/1024 * 8);
1770   EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
1771               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
1772 }
1773 
TEST_F(IndexTest,FindTermByPrefixShouldReturnEmpty)1774 TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
1775   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
1776                                     /*namespace_id=*/0);
1777   AlwaysTrueSuggestionResultCheckerImpl impl;
1778   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
1779   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1780 
1781   EXPECT_THAT(
1782       index_->FindTermsByPrefix(
1783           /*prefix=*/"foo", /*num_to_return=*/0, TermMatchType::PREFIX,
1784           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1785           &impl),
1786       IsOkAndHolds(IsEmpty()));
1787   EXPECT_THAT(
1788       index_->FindTermsByPrefix(
1789           /*prefix=*/"foo", /*num_to_return=*/-1, TermMatchType::PREFIX,
1790           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1791           &impl),
1792       IsOkAndHolds(IsEmpty()));
1793 
1794   ICING_ASSERT_OK(index_->Merge());
1795 
1796   EXPECT_THAT(
1797       index_->FindTermsByPrefix(
1798           /*prefix=*/"foo", /*num_to_return=*/0, TermMatchType::PREFIX,
1799           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1800           &impl),
1801       IsOkAndHolds(IsEmpty()));
1802   EXPECT_THAT(
1803       index_->FindTermsByPrefix(
1804           /*prefix=*/"foo", /*num_to_return=*/-1, TermMatchType::PREFIX,
1805           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1806           &impl),
1807       IsOkAndHolds(IsEmpty()));
1808 }
1809 
TEST_F(IndexTest,FindTermByPrefixShouldReturnCorrectResult)1810 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
1811   Index::Editor edit =
1812       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1813   AlwaysTrueSuggestionResultCheckerImpl impl;
1814   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1815   EXPECT_THAT(edit.BufferTerm("bar", TermMatchType::EXACT_ONLY), IsOk());
1816   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1817 
1818   // "b" should only match "bar" but not "foo".
1819   EXPECT_THAT(
1820       index_->FindTermsByPrefix(
1821           /*prefix=*/"b", /*num_to_return=*/10, TermMatchType::PREFIX,
1822           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1823           &impl),
1824       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
1825 
1826   ICING_ASSERT_OK(index_->Merge());
1827 
1828   // "b" should only match "bar" but not "foo".
1829   EXPECT_THAT(
1830       index_->FindTermsByPrefix(
1831           /*prefix=*/"b", /*num_to_return=*/10, TermMatchType::PREFIX,
1832           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1833           &impl),
1834       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
1835 }
1836 
TEST_F(IndexTest,FindTermByPrefixShouldRespectNumToReturn)1837 TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
1838   Index::Editor edit =
1839       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
1840   AlwaysTrueSuggestionResultCheckerImpl impl;
1841   EXPECT_THAT(edit.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
1842   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1843   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1844   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1845 
1846   // We have 3 results but only 2 should be returned.
1847   EXPECT_THAT(
1848       index_->FindTermsByPrefix(
1849           /*prefix=*/"f", /*num_to_return=*/2, TermMatchType::PREFIX,
1850           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1851           &impl),
1852       IsOkAndHolds(SizeIs(2)));
1853 
1854   ICING_ASSERT_OK(index_->Merge());
1855 
1856   // We have 3 results but only 2 should be returned.
1857   EXPECT_THAT(
1858       index_->FindTermsByPrefix(
1859           /*prefix=*/"f", /*num_to_return=*/2, TermMatchType::PREFIX,
1860           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1861           &impl),
1862       IsOkAndHolds(SizeIs(2)));
1863 }
1864 
TEST_F(IndexTest,FindTermByPrefixShouldReturnTermsInAllNamespaces)1865 TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
1866   Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
1867                                      /*namespace_id=*/0);
1868   AlwaysTrueSuggestionResultCheckerImpl impl;
1869   EXPECT_THAT(edit1.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
1870   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
1871 
1872   Index::Editor edit2 = index_->Edit(kDocumentId1, kSectionId2,
1873                                      /*namespace_id=*/1);
1874   EXPECT_THAT(edit2.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1875   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
1876 
1877   Index::Editor edit3 = index_->Edit(kDocumentId2, kSectionId2,
1878                                      /*namespace_id=*/2);
1879   EXPECT_THAT(edit3.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1880   EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
1881 
1882   // Should return "fo", "foo" and "fool" across all namespaces.
1883   EXPECT_THAT(
1884       index_->FindTermsByPrefix(
1885           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1886           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1887           &impl),
1888       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
1889                                         EqualsTermMetadata("foo", 1),
1890                                         EqualsTermMetadata("fool", 1))));
1891 
1892   ICING_ASSERT_OK(index_->Merge());
1893 
1894   // Should return "fo", "foo" and "fool" across all namespaces.
1895   EXPECT_THAT(
1896       index_->FindTermsByPrefix(
1897           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1898           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1899           &impl),
1900       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
1901                                         EqualsTermMetadata("foo", 1),
1902                                         EqualsTermMetadata("fool", 1))));
1903 }
1904 
TEST_F(IndexTest,FindTermByPrefixShouldReturnCorrectHitCount)1905 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
1906   Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
1907                                      /*namespace_id=*/0);
1908   AlwaysTrueSuggestionResultCheckerImpl impl;
1909   EXPECT_THAT(edit1.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
1910   EXPECT_THAT(edit1.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1911   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
1912 
1913   Index::Editor edit2 = index_->Edit(kDocumentId1, kSectionId2,
1914                                      /*namespace_id=*/0);
1915   EXPECT_THAT(edit2.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1916   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
1917 
1918   // 'foo' has 1 hit, 'fool' has 2 hits.
1919   EXPECT_THAT(
1920       index_->FindTermsByPrefix(
1921           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1922           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1923           &impl),
1924       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
1925                                EqualsTermMetadata("foo", 1))));
1926 
1927   ICING_ASSERT_OK(index_->Merge());
1928 
1929   EXPECT_THAT(
1930       index_->FindTermsByPrefix(
1931           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1932           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1933           &impl),
1934       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
1935                                EqualsTermMetadata("foo", 1))));
1936 }
1937 
TEST_F(IndexTest,FindTermByPrefixMultipleHitBatch)1938 TEST_F(IndexTest, FindTermByPrefixMultipleHitBatch) {
1939   AlwaysTrueSuggestionResultCheckerImpl impl;
1940   // Create multiple hit batches.
1941   for (int i = 0; i < 4000; i++) {
1942     Index::Editor edit = index_->Edit(i, kSectionId2,
1943                                       /*namespace_id=*/0);
1944     EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
1945     EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
1946   }
1947 
1948   EXPECT_THAT(
1949       index_->FindTermsByPrefix(
1950           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1951           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1952           &impl),
1953       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 4000))));
1954 
1955   ICING_ASSERT_OK(index_->Merge());
1956 
1957   EXPECT_THAT(
1958       index_->FindTermsByPrefix(
1959           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
1960           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
1961           &impl),
1962       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 4000))));
1963 }
1964 
TEST_F(IndexTest,FindTermByPrefixShouldReturnInOrder)1965 TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
1966   // Push 6 term-six, 5 term-five, 4 term-four, 3 term-three, 2 term-two and one
1967   // term-one into lite index.
1968   Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
1969                                      /*namespace_id=*/0);
1970   AlwaysTrueSuggestionResultCheckerImpl impl;
1971   EXPECT_THAT(edit1.BufferTerm("term-one", TermMatchType::EXACT_ONLY), IsOk());
1972   EXPECT_THAT(edit1.BufferTerm("term-two", TermMatchType::EXACT_ONLY), IsOk());
1973   EXPECT_THAT(edit1.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
1974               IsOk());
1975   EXPECT_THAT(edit1.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
1976   EXPECT_THAT(edit1.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
1977   EXPECT_THAT(edit1.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
1978   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
1979 
1980   Index::Editor edit2 = index_->Edit(kDocumentId2, kSectionId2,
1981                                      /*namespace_id=*/0);
1982   EXPECT_THAT(edit2.BufferTerm("term-two", TermMatchType::EXACT_ONLY), IsOk());
1983   EXPECT_THAT(edit2.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
1984               IsOk());
1985   EXPECT_THAT(edit2.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
1986   EXPECT_THAT(edit2.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
1987   EXPECT_THAT(edit2.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
1988   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
1989 
1990   Index::Editor edit3 = index_->Edit(kDocumentId3, kSectionId2,
1991                                      /*namespace_id=*/0);
1992   EXPECT_THAT(edit3.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
1993               IsOk());
1994   EXPECT_THAT(edit3.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
1995   EXPECT_THAT(edit3.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
1996   EXPECT_THAT(edit3.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
1997   EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
1998 
1999   Index::Editor edit4 = index_->Edit(kDocumentId4, kSectionId2,
2000                                      /*namespace_id=*/0);
2001   EXPECT_THAT(edit4.BufferTerm("term-four", TermMatchType::EXACT_ONLY), IsOk());
2002   EXPECT_THAT(edit4.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2003   EXPECT_THAT(edit4.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
2004   EXPECT_THAT(edit4.IndexAllBufferedTerms(), IsOk());
2005 
2006   Index::Editor edit5 = index_->Edit(kDocumentId5, kSectionId2,
2007                                      /*namespace_id=*/0);
2008   EXPECT_THAT(edit5.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2009   EXPECT_THAT(edit5.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
2010   EXPECT_THAT(edit5.IndexAllBufferedTerms(), IsOk());
2011 
2012   Index::Editor edit6 = index_->Edit(kDocumentId6, kSectionId2,
2013                                      /*namespace_id=*/0);
2014   EXPECT_THAT(edit6.BufferTerm("term-six", TermMatchType::EXACT_ONLY), IsOk());
2015   EXPECT_THAT(edit6.IndexAllBufferedTerms(), IsOk());
2016 
2017   // verify the order in lite index is correct.
2018   EXPECT_THAT(
2019       index_->FindTermsByPrefix(
2020           /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
2021           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2022           &impl),
2023       IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
2024                                EqualsTermMetadata("term-five", 5),
2025                                EqualsTermMetadata("term-four", 4),
2026                                EqualsTermMetadata("term-three", 3),
2027                                EqualsTermMetadata("term-two", 2),
2028                                EqualsTermMetadata("term-one", 1))));
2029 
2030   ICING_ASSERT_OK(index_->Merge());
2031 
2032   EXPECT_THAT(
2033       index_->FindTermsByPrefix(
2034           /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
2035           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2036           &impl),
2037       IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
2038                                EqualsTermMetadata("term-five", 5),
2039                                EqualsTermMetadata("term-four", 4),
2040                                EqualsTermMetadata("term-three", 3),
2041                                EqualsTermMetadata("term-two", 2),
2042                                EqualsTermMetadata("term-one", 1))));
2043 
2044   // keep push terms to the lite index. We will add 2 document to term-five,
2045   // term-three and term-one. The output order should be 5-6-3-4-1-2.
2046   Index::Editor edit7 = index_->Edit(kDocumentId7, kSectionId2,
2047                                      /*namespace_id=*/0);
2048   EXPECT_THAT(edit7.BufferTerm("term-one", TermMatchType::EXACT_ONLY), IsOk());
2049   EXPECT_THAT(edit7.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
2050               IsOk());
2051   EXPECT_THAT(edit7.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2052   EXPECT_THAT(edit7.IndexAllBufferedTerms(), IsOk());
2053 
2054   Index::Editor edit8 = index_->Edit(kDocumentId8, kSectionId2,
2055                                      /*namespace_id=*/0);
2056   EXPECT_THAT(edit8.BufferTerm("term-one", TermMatchType::EXACT_ONLY), IsOk());
2057   EXPECT_THAT(edit8.BufferTerm("term-three", TermMatchType::EXACT_ONLY),
2058               IsOk());
2059   EXPECT_THAT(edit8.BufferTerm("term-five", TermMatchType::EXACT_ONLY), IsOk());
2060   EXPECT_THAT(edit8.IndexAllBufferedTerms(), IsOk());
2061 
2062   // verify the combination of lite index and main index is in correct order.
2063   EXPECT_THAT(
2064       index_->FindTermsByPrefix(
2065           /*prefix=*/"t", /*num_to_return=*/10, TermMatchType::PREFIX,
2066           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2067           &impl),
2068       IsOkAndHolds(ElementsAre(
2069           EqualsTermMetadata("term-five", 7), EqualsTermMetadata("term-six", 6),
2070           EqualsTermMetadata("term-three", 5),
2071           EqualsTermMetadata("term-four", 4), EqualsTermMetadata("term-one", 3),
2072           EqualsTermMetadata("term-two", 2))));
2073 
2074   // Get the first three terms.
2075   EXPECT_THAT(
2076       index_->FindTermsByPrefix(
2077           /*prefix=*/"t", /*num_to_return=*/3, TermMatchType::PREFIX,
2078           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2079           &impl),
2080       IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-five", 7),
2081                                EqualsTermMetadata("term-six", 6),
2082                                EqualsTermMetadata("term-three", 5))));
2083 }
2084 
TEST_F(IndexTest,FindTermByPrefix_InTermMatchTypePrefix_ShouldReturnInOrder)2085 TEST_F(IndexTest, FindTermByPrefix_InTermMatchTypePrefix_ShouldReturnInOrder) {
2086   Index::Editor edit1 = index_->Edit(kDocumentId0, kSectionId2,
2087                                      /*namespace_id=*/0);
2088   AlwaysTrueSuggestionResultCheckerImpl impl;
2089   EXPECT_THAT(edit1.BufferTerm("fo", TermMatchType::PREFIX), IsOk());
2090   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
2091 
2092   Index::Editor edit2 = index_->Edit(kDocumentId2, kSectionId2,
2093                                      /*namespace_id=*/0);
2094   EXPECT_THAT(edit2.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2095   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
2096 
2097   Index::Editor edit3 = index_->Edit(kDocumentId3, kSectionId2,
2098                                      /*namespace_id=*/0);
2099   EXPECT_THAT(edit3.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
2100   EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
2101 
2102   ICING_ASSERT_OK(index_->Merge());
2103   // verify the order in pls is correct
2104   // "fo"    { {doc0, exact_hit}, {doc1, prefix_hit}, {doc2, prefix_hit} }
2105   // "foo"   { {doc1, exact_hit}, {doc2, prefix_hit} }
2106   // "fool"  { {doc2, exact_hit} }
2107   EXPECT_THAT(
2108       index_->FindTermsByPrefix(
2109           /*prefix=*/"f",
2110           /*num_to_return=*/10, TermMatchType::PREFIX,
2111           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2112           &impl),
2113       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
2114                                EqualsTermMetadata("foo", 2),
2115                                EqualsTermMetadata("fool", 1))));
2116   // Find by exact only, all terms should be equally.
2117   EXPECT_THAT(
2118       index_->FindTermsByPrefix(
2119           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2120           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2121           &impl),
2122       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
2123                                         EqualsTermMetadata("foo", 1),
2124                                         EqualsTermMetadata("fool", 1))));
2125 }
2126 
TEST_F(IndexTest,FindTermByPrefixShouldReturnHitCountForMain)2127 TEST_F(IndexTest, FindTermByPrefixShouldReturnHitCountForMain) {
2128   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2129                                     /*namespace_id=*/0);
2130   AlwaysTrueSuggestionResultCheckerImpl impl;
2131   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2132   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2133   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2134 
2135   edit = index_->Edit(kDocumentId1, kSectionId2,
2136                       /*namespace_id=*/0);
2137   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2138   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2139   edit = index_->Edit(kDocumentId2, kSectionId2,
2140                       /*namespace_id=*/0);
2141   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2142   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2143   edit = index_->Edit(kDocumentId3, kSectionId2,
2144                       /*namespace_id=*/0);
2145   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2146   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2147   edit = index_->Edit(kDocumentId4, kSectionId2,
2148                       /*namespace_id=*/0);
2149   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2150   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2151   edit = index_->Edit(kDocumentId5, kSectionId2,
2152                       /*namespace_id=*/0);
2153   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2154   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2155   edit = index_->Edit(kDocumentId6, kSectionId2,
2156                       /*namespace_id=*/0);
2157   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2158   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2159   edit = index_->Edit(kDocumentId7, kSectionId2,
2160                       /*namespace_id=*/0);
2161   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2162   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2163 
2164   // 'foo' has 1 hit, 'fool' has 8 hits.
2165   EXPECT_THAT(
2166       index_->FindTermsByPrefix(
2167           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2168           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2169           &impl),
2170       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 8),
2171                                EqualsTermMetadata("foo", 1))));
2172 
2173   ICING_ASSERT_OK(index_->Merge());
2174 
2175   EXPECT_THAT(
2176       index_->FindTermsByPrefix(
2177           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2178           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2179           &impl),
2180       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
2181                                         EqualsTermMetadata("fool", 8))));
2182 }
2183 
TEST_F(IndexTest,FindTermByPrefixShouldReturnCombinedHitCount)2184 TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
2185   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2186                                     /*namespace_id=*/0);
2187   AlwaysTrueSuggestionResultCheckerImpl impl;
2188   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2189   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2190   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2191 
2192   ICING_ASSERT_OK(index_->Merge());
2193 
2194   edit = index_->Edit(kDocumentId1, kSectionId2,
2195                       /*namespace_id=*/0);
2196   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2197   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2198 
2199   EXPECT_THAT(
2200       index_->FindTermsByPrefix(
2201           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2202           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2203           &impl),
2204       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
2205                                EqualsTermMetadata("foo", 1))));
2206 }
2207 
TEST_F(IndexTest,FindTermRankComparison)2208 TEST_F(IndexTest, FindTermRankComparison) {
2209   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2210                                     /*namespace_id=*/0);
2211   AlwaysTrueSuggestionResultCheckerImpl impl;
2212   EXPECT_THAT(edit.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
2213   EXPECT_THAT(edit.BufferTerm("fo", TermMatchType::EXACT_ONLY), IsOk());
2214   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2215   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2216   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2217 
2218   Index::Editor edit2 = index_->Edit(kDocumentId2, kSectionId2,
2219                                      /*namespace_id=*/0);
2220   EXPECT_THAT(edit2.BufferTerm("fo", TermMatchType::PREFIX), IsOk());
2221   EXPECT_THAT(edit2.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2222   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
2223 
2224   EXPECT_THAT(
2225       index_->FindTermsByPrefix(
2226           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2227           SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY,
2228           &impl),
2229       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
2230                                EqualsTermMetadata("foo", 2),
2231                                EqualsTermMetadata("fool", 1))));
2232   EXPECT_THAT(
2233       index_->FindTermsByPrefix(
2234           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2235           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2236           &impl),
2237       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 2),
2238                                         EqualsTermMetadata("foo", 2),
2239                                         EqualsTermMetadata("fool", 1))));
2240   EXPECT_THAT(
2241       index_->FindTermsByPrefix(
2242           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2243           SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE, &impl),
2244       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
2245                                         EqualsTermMetadata("foo", 1),
2246                                         EqualsTermMetadata("fool", 1))));
2247 
2248   ICING_ASSERT_OK(index_->Merge());
2249 
2250   EXPECT_THAT(
2251       index_->FindTermsByPrefix(
2252           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2253           SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY,
2254           &impl),
2255       IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
2256                                EqualsTermMetadata("foo", 2),
2257                                EqualsTermMetadata("fool", 1))));
2258   EXPECT_THAT(
2259       index_->FindTermsByPrefix(
2260           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2261           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2262           &impl),
2263       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 2),
2264                                         EqualsTermMetadata("foo", 2),
2265                                         EqualsTermMetadata("fool", 1))));
2266   EXPECT_THAT(
2267       index_->FindTermsByPrefix(
2268           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::EXACT_ONLY,
2269           SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE, &impl),
2270       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
2271                                         EqualsTermMetadata("foo", 1),
2272                                         EqualsTermMetadata("fool", 1))));
2273 }
2274 
TEST_F(IndexTest,FindTermByPrefixShouldReturnTermsFromBothIndices)2275 TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
2276   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2277                                     /*namespace_id=*/0);
2278   AlwaysTrueSuggestionResultCheckerImpl impl;
2279 
2280   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2281   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2282 
2283   ICING_ASSERT_OK(index_->Merge());
2284 
2285   edit = index_->Edit(kDocumentId1, kSectionId2,
2286                       /*namespace_id=*/0);
2287   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2288   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2289 
2290   // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
2291   EXPECT_THAT(
2292       index_->FindTermsByPrefix(
2293           /*prefix=*/"f", /*num_to_return=*/10, TermMatchType::PREFIX,
2294           SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
2295           &impl),
2296       IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
2297                                         EqualsTermMetadata("fool", 1))));
2298 }
2299 
TEST_F(IndexTest,GetElementsSize)2300 TEST_F(IndexTest, GetElementsSize) {
2301   // Check empty index.
2302   ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
2303   EXPECT_THAT(size, Eq(0));
2304 
2305   // Add an element.
2306   Index::Editor edit =
2307       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2308   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2309   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2310   ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
2311   EXPECT_THAT(size, Gt(0));
2312 
2313   ASSERT_THAT(index_->Merge(), IsOk());
2314   ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
2315   EXPECT_THAT(size, Gt(0));
2316 }
2317 
TEST_F(IndexTest,ExactResultsFromLiteAndMain)2318 TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
2319   Index::Editor edit =
2320       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2321   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2322   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2323   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2324   edit = index_->Edit(kDocumentId1, kSectionId3,
2325                       /*namespace_id=*/0);
2326   EXPECT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2327   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2328   ICING_ASSERT_OK(index_->Merge());
2329 
2330   edit = index_->Edit(kDocumentId2, kSectionId2,
2331                       /*namespace_id=*/0);
2332   EXPECT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2333   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2334   edit = index_->Edit(kDocumentId2, kSectionId3,
2335                       /*namespace_id=*/0);
2336   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2337   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2338 
2339   ICING_ASSERT_OK_AND_ASSIGN(
2340       std::unique_ptr<DocHitInfoIterator> itr,
2341       index_->GetIterator("foo", /*term_start_index=*/0,
2342                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2343                           TermMatchType::EXACT_ONLY));
2344   EXPECT_THAT(
2345       GetHits(std::move(itr)),
2346       ElementsAre(
2347           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
2348           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2349 }
2350 
TEST_F(IndexTest,PrefixResultsFromLiteAndMain)2351 TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
2352   Index::Editor edit =
2353       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2354   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2355   EXPECT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2356   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2357   edit = index_->Edit(kDocumentId1, kSectionId3,
2358                       /*namespace_id=*/0);
2359   EXPECT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2360   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2361   ICING_ASSERT_OK(index_->Merge());
2362 
2363   edit = index_->Edit(kDocumentId2, kSectionId2,
2364                       /*namespace_id=*/0);
2365   EXPECT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2366   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2367   edit = index_->Edit(kDocumentId2, kSectionId3,
2368                       /*namespace_id=*/0);
2369   EXPECT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2370   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2371 
2372   ICING_ASSERT_OK_AND_ASSIGN(
2373       std::unique_ptr<DocHitInfoIterator> itr,
2374       index_->GetIterator("foo", /*term_start_index=*/0,
2375                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2376                           TermMatchType::PREFIX));
2377   EXPECT_THAT(
2378       GetHits(std::move(itr)),
2379       ElementsAre(
2380           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
2381           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2382           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2383 }
2384 
TEST_F(IndexTest,GetDebugInfo)2385 TEST_F(IndexTest, GetDebugInfo) {
2386   // Add two documents to the lite index, merge them into the main index and
2387   // then add another doc to the lite index.
2388   Index::Editor edit =
2389       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2390   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2391   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2392   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2393   edit = index_->Edit(kDocumentId1, kSectionId3,
2394                       /*namespace_id=*/0);
2395   index_->set_last_added_document_id(kDocumentId1);
2396   ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2397   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2398   ICING_ASSERT_OK(index_->Merge());
2399 
2400   edit = index_->Edit(kDocumentId2, kSectionId2,
2401                       /*namespace_id=*/0);
2402   index_->set_last_added_document_id(kDocumentId2);
2403   ASSERT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2404   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2405   edit = index_->Edit(kDocumentId2, kSectionId3,
2406                       /*namespace_id=*/0);
2407   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2408   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2409 
2410   IndexDebugInfoProto out0 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
2411   ICING_LOG(DBG) << "main_index_info:\n" << out0.main_index_info();
2412   ICING_LOG(DBG) << "lite_index_info:\n" << out0.lite_index_info();
2413   EXPECT_THAT(out0.main_index_info(), Not(IsEmpty()));
2414   EXPECT_THAT(out0.lite_index_info(), Not(IsEmpty()));
2415 
2416   IndexDebugInfoProto out1 = index_->GetDebugInfo(DebugInfoVerbosity::DETAILED);
2417   ICING_LOG(DBG) << "main_index_info:\n" << out1.main_index_info();
2418   ICING_LOG(DBG) << "lite_index_info:\n" << out1.lite_index_info();
2419   EXPECT_THAT(out1.main_index_info(),
2420               SizeIs(Gt(out0.main_index_info().size())));
2421   EXPECT_THAT(out1.lite_index_info(),
2422               SizeIs(Gt(out0.lite_index_info().size())));
2423 
2424   // Add one more doc to the lite index. Debug strings should change.
2425   edit = index_->Edit(kDocumentId3, kSectionId2,
2426                       /*namespace_id=*/0);
2427   index_->set_last_added_document_id(kDocumentId3);
2428   ASSERT_THAT(edit.BufferTerm("far", TermMatchType::EXACT_ONLY), IsOk());
2429   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2430 
2431   IndexDebugInfoProto out2 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
2432   ICING_LOG(DBG) << "main_index_info:\n" << out2.main_index_info();
2433   ICING_LOG(DBG) << "lite_index_info:\n" << out2.lite_index_info();
2434   EXPECT_THAT(out2.main_index_info(), Not(IsEmpty()));
2435   EXPECT_THAT(out2.lite_index_info(), Not(IsEmpty()));
2436   EXPECT_THAT(out2.main_index_info(), StrEq(out0.main_index_info()));
2437   EXPECT_THAT(out2.lite_index_info(), StrNe(out0.lite_index_info()));
2438 
2439   // Merge into the man index. Debug strings should change again.
2440   ICING_ASSERT_OK(index_->Merge());
2441 
2442   IndexDebugInfoProto out3 = index_->GetDebugInfo(DebugInfoVerbosity::BASIC);
2443   EXPECT_TRUE(out3.has_index_storage_info());
2444   ICING_LOG(DBG) << "main_index_info:\n" << out3.main_index_info();
2445   ICING_LOG(DBG) << "lite_index_info:\n" << out3.lite_index_info();
2446   EXPECT_THAT(out3.main_index_info(), Not(IsEmpty()));
2447   EXPECT_THAT(out3.lite_index_info(), Not(IsEmpty()));
2448   EXPECT_THAT(out3.main_index_info(), StrNe(out2.main_index_info()));
2449   EXPECT_THAT(out3.lite_index_info(), StrNe(out2.lite_index_info()));
2450 }
2451 
TEST_F(IndexTest,BackfillingMultipleTermsSucceeds)2452 TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
2453   // Add two documents to the lite index, merge them into the main index and
2454   // then add another doc to the lite index.
2455   Index::Editor edit =
2456       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2457   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2458   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2459   edit = index_->Edit(kDocumentId0, kSectionId3,
2460                       /*namespace_id=*/0);
2461   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::PREFIX), IsOk());
2462   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2463   edit = index_->Edit(kDocumentId1, kSectionId3,
2464                       /*namespace_id=*/0);
2465   ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2466   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2467 
2468   // After this merge the index should have posting lists for
2469   // "fool" {(doc0,sec3)},
2470   // "foot" {(doc1,sec3)},
2471   // "foo"  {(doc1,sec3),(doc0,sec3),(doc0,sec2)}
2472   ICING_ASSERT_OK(index_->Merge());
2473 
2474   // Add one more doc to the lite index.
2475   edit = index_->Edit(kDocumentId2, kSectionId2,
2476                       /*namespace_id=*/0);
2477   ASSERT_THAT(edit.BufferTerm("far", TermMatchType::EXACT_ONLY), IsOk());
2478   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2479 
2480   // After this merge the index should add a posting list for "far" and a
2481   // backfill branch point for "f". In addition to the posting lists described
2482   // above, which are unaffected, the new posting lists should be
2483   // "far" {(doc2,sec2)},
2484   // "f"   {(doc1,sec3),(doc0,sec3)}
2485   // Multiple pre-existing hits should be added to the new backfill branch
2486   // point.
2487   ICING_ASSERT_OK(index_->Merge());
2488 
2489   ICING_ASSERT_OK_AND_ASSIGN(
2490       std::unique_ptr<DocHitInfoIterator> itr,
2491       index_->GetIterator("f", /*term_start_index=*/0,
2492                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2493                           TermMatchType::PREFIX));
2494   EXPECT_THAT(
2495       GetHits(std::move(itr)),
2496       ElementsAre(
2497           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2498           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId3})));
2499 }
2500 
TEST_F(IndexTest,BackfillingNewTermsSucceeds)2501 TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
2502   // Add two documents to the lite index, merge them into the main index and
2503   // then add another doc to the lite index.
2504   Index::Editor edit =
2505       index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2506   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::EXACT_ONLY), IsOk());
2507   ASSERT_THAT(edit.BufferTerm("fool", TermMatchType::EXACT_ONLY), IsOk());
2508   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2509   edit = index_->Edit(kDocumentId1, kSectionId3,
2510                       /*namespace_id=*/0);
2511   ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2512   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2513   // After this merge the index should have posting lists for
2514   // "fool" {(doc0,sec2)},
2515   // "foot" {(doc1,sec3)},
2516   // "foo"  {(doc1,sec3),(doc0,sec2)}
2517   ICING_ASSERT_OK(index_->Merge());
2518 
2519   edit = index_->Edit(kDocumentId2, kSectionId2,
2520                       /*namespace_id=*/0);
2521   ASSERT_THAT(edit.BufferTerm("footer", TermMatchType::EXACT_ONLY), IsOk());
2522   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2523   edit = index_->Edit(kDocumentId2, kSectionId3,
2524                       /*namespace_id=*/0);
2525   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2526   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2527   // Add one more doc to the lite index. Debug strings should change.
2528   edit = index_->Edit(kDocumentId3, kSectionId2,
2529                       /*namespace_id=*/0);
2530   ASSERT_THAT(edit.BufferTerm("far", TermMatchType::EXACT_ONLY), IsOk());
2531   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2532 
2533   // After this merge the index should add posting lists for "far" and "footer"
2534   // and a backfill branch point for "f". The new posting lists should be
2535   // "fool"    {(doc0,sec2)},
2536   // "foot"    {(doc1,sec3)},
2537   // "foo"     {(doc2,sec3),(doc1,sec3),(doc0,sec2)}
2538   // "footer"  {(doc2,sec2)},
2539   // "far"     {(doc3,sec2)},
2540   // "f"       {(doc2,sec3),(doc1,sec3)}
2541   // Multiple pre-existing hits should be added to the new backfill branch
2542   // point.
2543   ICING_ASSERT_OK(index_->Merge());
2544 
2545   ICING_ASSERT_OK_AND_ASSIGN(
2546       std::unique_ptr<DocHitInfoIterator> itr,
2547       index_->GetIterator("f", /*term_start_index=*/0,
2548                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2549                           TermMatchType::PREFIX));
2550   EXPECT_THAT(
2551       GetHits(std::move(itr)),
2552       ElementsAre(
2553           EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
2554           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3})));
2555 }
2556 
TEST_F(IndexTest,TruncateToInvalidDocumentIdHasNoEffect)2557 TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
2558   ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2559   EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
2560   ICING_ASSERT_OK_AND_ASSIGN(
2561       std::unique_ptr<DocHitInfoIterator> itr,
2562       index_->GetIterator("f", /*term_start_index=*/0,
2563                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2564                           TermMatchType::PREFIX));
2565   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
2566 
2567   // Add one document to the lite index
2568   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2569                                     /*namespace_id=*/0);
2570   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2571   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2572   // Clipping to invalid should have no effect.
2573   ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2574   ICING_ASSERT_OK_AND_ASSIGN(
2575       itr, index_->GetIterator("f", /*term_start_index=*/0,
2576                                /*unnormalized_term_length=*/0,
2577                                kSectionIdMaskAll, TermMatchType::PREFIX));
2578   EXPECT_THAT(GetHits(std::move(itr)),
2579               ElementsAre(EqualsDocHitInfo(
2580                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
2581 
2582   // Clipping to invalid should still have no effect even if hits are in main.
2583   ICING_ASSERT_OK(index_->Merge());
2584   ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2585   ICING_ASSERT_OK_AND_ASSIGN(
2586       itr, index_->GetIterator("f", /*term_start_index=*/0,
2587                                /*unnormalized_term_length=*/0,
2588                                kSectionIdMaskAll, TermMatchType::PREFIX));
2589   EXPECT_THAT(GetHits(std::move(itr)),
2590               ElementsAre(EqualsDocHitInfo(
2591                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
2592 
2593   edit = index_->Edit(kDocumentId1, kSectionId3,
2594                       /*namespace_id=*/0);
2595   ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2596   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2597 
2598   // Clipping to invalid should still have no effect even if both indices have
2599   // hits.
2600   ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
2601   ICING_ASSERT_OK_AND_ASSIGN(
2602       itr, index_->GetIterator("f", /*term_start_index=*/0,
2603                                /*unnormalized_term_length=*/0,
2604                                kSectionIdMaskAll, TermMatchType::PREFIX));
2605   EXPECT_THAT(
2606       GetHits(std::move(itr)),
2607       ElementsAre(
2608           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2609           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2610 }
2611 
TEST_F(IndexTest,TruncateToLastAddedDocumentIdHasNoEffect)2612 TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
2613   ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2614   EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
2615   ICING_ASSERT_OK_AND_ASSIGN(
2616       std::unique_ptr<DocHitInfoIterator> itr,
2617       index_->GetIterator("f", /*term_start_index=*/0,
2618                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2619                           TermMatchType::PREFIX));
2620   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
2621 
2622   // Add one document to the lite index
2623   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2624                                     /*namespace_id=*/0);
2625   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2626   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2627   index_->set_last_added_document_id(kDocumentId0);
2628   ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2629   // Clipping to invalid should have no effect.
2630   ICING_ASSERT_OK_AND_ASSIGN(
2631       itr, index_->GetIterator("f", /*term_start_index=*/0,
2632                                /*unnormalized_term_length=*/0,
2633                                kSectionIdMaskAll, TermMatchType::PREFIX));
2634   EXPECT_THAT(GetHits(std::move(itr)),
2635               ElementsAre(EqualsDocHitInfo(
2636                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
2637 
2638   // Clipping to invalid should still have no effect even if hits are in main.
2639   ICING_ASSERT_OK(index_->Merge());
2640   ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2641   ICING_ASSERT_OK_AND_ASSIGN(
2642       itr, index_->GetIterator("f", /*term_start_index=*/0,
2643                                /*unnormalized_term_length=*/0,
2644                                kSectionIdMaskAll, TermMatchType::PREFIX));
2645   EXPECT_THAT(GetHits(std::move(itr)),
2646               ElementsAre(EqualsDocHitInfo(
2647                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
2648 
2649   edit = index_->Edit(kDocumentId1, kSectionId3,
2650                       /*namespace_id=*/0);
2651   ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2652   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2653   index_->set_last_added_document_id(kDocumentId1);
2654 
2655   // Clipping to invalid should still have no effect even if both indices have
2656   // hits.
2657   ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
2658   ICING_ASSERT_OK_AND_ASSIGN(
2659       itr, index_->GetIterator("f", /*term_start_index=*/0,
2660                                /*unnormalized_term_length=*/0,
2661                                kSectionIdMaskAll, TermMatchType::PREFIX));
2662   EXPECT_THAT(
2663       GetHits(std::move(itr)),
2664       ElementsAre(
2665           EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
2666           EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
2667 }
2668 
TEST_F(IndexTest,TruncateToThrowsOutLiteIndex)2669 TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
2670   // Add one document to the lite index and merge it into main.
2671   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2672                                     /*namespace_id=*/0);
2673   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2674   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2675   index_->set_last_added_document_id(kDocumentId0);
2676 
2677   ICING_ASSERT_OK(index_->Merge());
2678 
2679   // Add another document to the lite index.
2680   edit = index_->Edit(kDocumentId1, kSectionId3,
2681                       /*namespace_id=*/0);
2682   ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2683   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2684   index_->set_last_added_document_id(kDocumentId1);
2685 
2686   EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
2687 
2688   // Clipping to document 0 should toss out the lite index, but keep the main.
2689   ICING_ASSERT_OK_AND_ASSIGN(
2690       std::unique_ptr<DocHitInfoIterator> itr,
2691       index_->GetIterator("f", /*term_start_index=*/0,
2692                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2693                           TermMatchType::PREFIX));
2694   EXPECT_THAT(GetHits(std::move(itr)),
2695               ElementsAre(EqualsDocHitInfo(
2696                   kDocumentId0, std::vector<SectionId>{kSectionId2})));
2697 }
2698 
TEST_F(IndexTest,TruncateToThrowsOutBothIndices)2699 TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
2700   // Add two documents to the lite index and merge them into main.
2701   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2702                                     /*namespace_id=*/0);
2703   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2704   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2705   index_->set_last_added_document_id(kDocumentId0);
2706   edit = index_->Edit(kDocumentId1, kSectionId2,
2707                       /*namespace_id=*/0);
2708   ASSERT_THAT(edit.BufferTerm("foul", TermMatchType::PREFIX), IsOk());
2709   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2710   index_->set_last_added_document_id(kDocumentId1);
2711 
2712   ICING_ASSERT_OK(index_->Merge());
2713 
2714   // Add another document to the lite index.
2715   edit = index_->Edit(kDocumentId2, kSectionId3,
2716                       /*namespace_id=*/0);
2717   ASSERT_THAT(edit.BufferTerm("foot", TermMatchType::PREFIX), IsOk());
2718   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2719   index_->set_last_added_document_id(kDocumentId2);
2720 
2721   EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
2722 
2723   // Clipping to document 0 should toss out both indices.
2724   ICING_ASSERT_OK_AND_ASSIGN(
2725       std::unique_ptr<DocHitInfoIterator> itr,
2726       index_->GetIterator("f", /*term_start_index=*/0,
2727                           /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2728                           TermMatchType::PREFIX));
2729   EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
2730 }
2731 
TEST_F(IndexTest,IndexStorageInfoProto)2732 TEST_F(IndexTest, IndexStorageInfoProto) {
2733   // Add two documents to the lite index and merge them into main.
2734   {
2735     Index::Editor edit =
2736         index_->Edit(kDocumentId0, kSectionId2, /*namespace_id=*/0);
2737     ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2738     EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2739     edit = index_->Edit(kDocumentId1, kSectionId2,
2740                         /*namespace_id=*/0);
2741     ASSERT_THAT(edit.BufferTerm("foul", TermMatchType::PREFIX), IsOk());
2742     EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2743 
2744     ICING_ASSERT_OK(index_->Merge());
2745   }
2746 
2747   IndexStorageInfoProto storage_info = index_->GetStorageInfo();
2748   EXPECT_THAT(storage_info.index_size(), Ge(0));
2749   EXPECT_THAT(storage_info.lite_index_lexicon_size(), Ge(0));
2750   EXPECT_THAT(storage_info.lite_index_hit_buffer_size(), Ge(0));
2751   EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
2752   EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
2753   EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
2754   // There should be 1 block for the header and 1 block for three posting lists
2755   // ("fo", "foo", "foul").
2756   EXPECT_THAT(storage_info.num_blocks(), Eq(2));
2757   EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
2758 }
2759 
TEST_F(IndexTest,PublishQueryStats)2760 TEST_F(IndexTest, PublishQueryStats) {
2761   // Add two documents to the lite index without merging.
2762   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
2763                                     /*namespace_id=*/0);
2764   ASSERT_THAT(edit.BufferTerm("foo", TermMatchType::PREFIX), IsOk());
2765   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2766   edit = index_->Edit(kDocumentId1, kSectionId2,
2767                       /*namespace_id=*/0);
2768   ASSERT_THAT(edit.BufferTerm("foul", TermMatchType::PREFIX), IsOk());
2769   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
2770 
2771   // Verify query stats.
2772   QueryStatsProto query_stats1;
2773   index_->PublishQueryStats(&query_stats1);
2774   EXPECT_THAT(query_stats1.lite_index_hit_buffer_byte_size(),
2775               Eq(2 * sizeof(TermIdHitPair::Value)));
2776   EXPECT_THAT(query_stats1.lite_index_hit_buffer_unsorted_byte_size(),
2777               Ge(2 * sizeof(TermIdHitPair::Value)));
2778 
2779   // Sort lite index.
2780   index_->SortLiteIndex();
2781   QueryStatsProto query_stats2;
2782   index_->PublishQueryStats(&query_stats2);
2783   EXPECT_THAT(query_stats2.lite_index_hit_buffer_byte_size(),
2784               Eq(2 * sizeof(TermIdHitPair::Value)));
2785   EXPECT_THAT(query_stats2.lite_index_hit_buffer_unsorted_byte_size(), Eq(0));
2786 
2787   // Merge lite index to main index.
2788   ICING_ASSERT_OK(index_->Merge());
2789   QueryStatsProto query_stats3;
2790   index_->PublishQueryStats(&query_stats3);
2791   EXPECT_THAT(query_stats3.lite_index_hit_buffer_byte_size(), Eq(0));
2792   EXPECT_THAT(query_stats3.lite_index_hit_buffer_unsorted_byte_size(), Eq(0));
2793 }
2794 
2795 }  // namespace
2796 
2797 }  // namespace lib
2798 }  // namespace icing
2799