xref: /aosp_15_r20/external/icing/icing/result/snippet-retriever_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2019 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker //      http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker 
15*8b6cd535SAndroid Build Coastguard Worker #include "icing/result/snippet-retriever.h"
16*8b6cd535SAndroid Build Coastguard Worker 
17*8b6cd535SAndroid Build Coastguard Worker #include <cstdint>
18*8b6cd535SAndroid Build Coastguard Worker #include <limits>
19*8b6cd535SAndroid Build Coastguard Worker #include <memory>
20*8b6cd535SAndroid Build Coastguard Worker 
21*8b6cd535SAndroid Build Coastguard Worker #include "gmock/gmock.h"
22*8b6cd535SAndroid Build Coastguard Worker #include "gtest/gtest.h"
23*8b6cd535SAndroid Build Coastguard Worker #include "icing/document-builder.h"
24*8b6cd535SAndroid Build Coastguard Worker #include "icing/feature-flags.h"
25*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/mock-filesystem.h"
26*8b6cd535SAndroid Build Coastguard Worker #include "icing/portable/equals-proto.h"
27*8b6cd535SAndroid Build Coastguard Worker #include "icing/portable/platform.h"
28*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/document.pb.h"
29*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/schema.pb.h"
30*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/search.pb.h"
31*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/term.pb.h"
32*8b6cd535SAndroid Build Coastguard Worker #include "icing/query/query-terms.h"
33*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema-builder.h"
34*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/schema-store.h"
35*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/section-manager.h"
36*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-id.h"
37*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/key-mapper.h"
38*8b6cd535SAndroid Build Coastguard Worker #include "icing/testing/common-matchers.h"
39*8b6cd535SAndroid Build Coastguard Worker #include "icing/testing/fake-clock.h"
40*8b6cd535SAndroid Build Coastguard Worker #include "icing/testing/jni-test-helpers.h"
41*8b6cd535SAndroid Build Coastguard Worker #include "icing/testing/test-data.h"
42*8b6cd535SAndroid Build Coastguard Worker #include "icing/testing/test-feature-flags.h"
43*8b6cd535SAndroid Build Coastguard Worker #include "icing/testing/tmp-directory.h"
44*8b6cd535SAndroid Build Coastguard Worker #include "icing/tokenization/language-segmenter-factory.h"
45*8b6cd535SAndroid Build Coastguard Worker #include "icing/tokenization/language-segmenter.h"
46*8b6cd535SAndroid Build Coastguard Worker #include "icing/transform/map/map-normalizer.h"
47*8b6cd535SAndroid Build Coastguard Worker #include "icing/transform/normalizer-factory.h"
48*8b6cd535SAndroid Build Coastguard Worker #include "icing/transform/normalizer.h"
49*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/icu-data-file-helper.h"
50*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/snippet-helpers.h"
51*8b6cd535SAndroid Build Coastguard Worker #include "unicode/uloc.h"
52*8b6cd535SAndroid Build Coastguard Worker 
53*8b6cd535SAndroid Build Coastguard Worker namespace icing {
54*8b6cd535SAndroid Build Coastguard Worker namespace lib {
55*8b6cd535SAndroid Build Coastguard Worker 
56*8b6cd535SAndroid Build Coastguard Worker namespace {
57*8b6cd535SAndroid Build Coastguard Worker 
58*8b6cd535SAndroid Build Coastguard Worker using ::testing::ElementsAre;
59*8b6cd535SAndroid Build Coastguard Worker using ::testing::Eq;
60*8b6cd535SAndroid Build Coastguard Worker using ::testing::IsEmpty;
61*8b6cd535SAndroid Build Coastguard Worker using ::testing::SizeIs;
62*8b6cd535SAndroid Build Coastguard Worker 
63*8b6cd535SAndroid Build Coastguard Worker // TODO (b/246964044): remove ifdef guard when url-tokenizer is ready for export
64*8b6cd535SAndroid Build Coastguard Worker // to Android. Also move it to schema-builder.h
65*8b6cd535SAndroid Build Coastguard Worker #ifdef ENABLE_URL_TOKENIZER
66*8b6cd535SAndroid Build Coastguard Worker constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_URL =
67*8b6cd535SAndroid Build Coastguard Worker     StringIndexingConfig::TokenizerType::URL;
68*8b6cd535SAndroid Build Coastguard Worker #endif  // ENABLE_URL_TOKENIZER
69*8b6cd535SAndroid Build Coastguard Worker 
GetPropertyPaths(const SnippetProto & snippet)70*8b6cd535SAndroid Build Coastguard Worker std::vector<std::string_view> GetPropertyPaths(const SnippetProto& snippet) {
71*8b6cd535SAndroid Build Coastguard Worker   std::vector<std::string_view> paths;
72*8b6cd535SAndroid Build Coastguard Worker   for (const SnippetProto::EntryProto& entry : snippet.entries()) {
73*8b6cd535SAndroid Build Coastguard Worker     paths.push_back(entry.property_name());
74*8b6cd535SAndroid Build Coastguard Worker   }
75*8b6cd535SAndroid Build Coastguard Worker   return paths;
76*8b6cd535SAndroid Build Coastguard Worker }
77*8b6cd535SAndroid Build Coastguard Worker 
78*8b6cd535SAndroid Build Coastguard Worker class SnippetRetrieverTest : public testing::Test {
79*8b6cd535SAndroid Build Coastguard Worker  protected:
SetUp()80*8b6cd535SAndroid Build Coastguard Worker   void SetUp() override {
81*8b6cd535SAndroid Build Coastguard Worker     feature_flags_ = std::make_unique<FeatureFlags>(GetTestFeatureFlags());
82*8b6cd535SAndroid Build Coastguard Worker     test_dir_ = GetTestTempDir() + "/icing";
83*8b6cd535SAndroid Build Coastguard Worker     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
84*8b6cd535SAndroid Build Coastguard Worker 
85*8b6cd535SAndroid Build Coastguard Worker     if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
86*8b6cd535SAndroid Build Coastguard Worker       ICING_ASSERT_OK(
87*8b6cd535SAndroid Build Coastguard Worker           // File generated via icu_data_file rule in //icing/BUILD.
88*8b6cd535SAndroid Build Coastguard Worker           icu_data_file_helper::SetUpIcuDataFile(
89*8b6cd535SAndroid Build Coastguard Worker               GetTestFilePath("icing/icu.dat")));
90*8b6cd535SAndroid Build Coastguard Worker     }
91*8b6cd535SAndroid Build Coastguard Worker 
92*8b6cd535SAndroid Build Coastguard Worker     jni_cache_ = GetTestJniCache();
93*8b6cd535SAndroid Build Coastguard Worker     language_segmenter_factory::SegmenterOptions options(ULOC_US,
94*8b6cd535SAndroid Build Coastguard Worker                                                          jni_cache_.get());
95*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSERT_OK_AND_ASSIGN(
96*8b6cd535SAndroid Build Coastguard Worker         language_segmenter_,
97*8b6cd535SAndroid Build Coastguard Worker         language_segmenter_factory::Create(std::move(options)));
98*8b6cd535SAndroid Build Coastguard Worker 
99*8b6cd535SAndroid Build Coastguard Worker     // Setup the schema
100*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSERT_OK_AND_ASSIGN(
101*8b6cd535SAndroid Build Coastguard Worker         schema_store_, SchemaStore::Create(&filesystem_, test_dir_,
102*8b6cd535SAndroid Build Coastguard Worker                                            &fake_clock_, feature_flags_.get()));
103*8b6cd535SAndroid Build Coastguard Worker     SchemaProto schema =
104*8b6cd535SAndroid Build Coastguard Worker         SchemaBuilder()
105*8b6cd535SAndroid Build Coastguard Worker             .AddType(
106*8b6cd535SAndroid Build Coastguard Worker                 SchemaTypeConfigBuilder()
107*8b6cd535SAndroid Build Coastguard Worker                     .SetType("email")
108*8b6cd535SAndroid Build Coastguard Worker                     .AddProperty(PropertyConfigBuilder()
109*8b6cd535SAndroid Build Coastguard Worker                                      .SetName("subject")
110*8b6cd535SAndroid Build Coastguard Worker                                      .SetDataTypeString(TERM_MATCH_PREFIX,
111*8b6cd535SAndroid Build Coastguard Worker                                                         TOKENIZER_PLAIN)
112*8b6cd535SAndroid Build Coastguard Worker                                      .SetCardinality(CARDINALITY_OPTIONAL))
113*8b6cd535SAndroid Build Coastguard Worker                     .AddProperty(PropertyConfigBuilder()
114*8b6cd535SAndroid Build Coastguard Worker                                      .SetName("body")
115*8b6cd535SAndroid Build Coastguard Worker                                      .SetDataTypeString(TERM_MATCH_EXACT,
116*8b6cd535SAndroid Build Coastguard Worker                                                         TOKENIZER_PLAIN)
117*8b6cd535SAndroid Build Coastguard Worker                                      .SetCardinality(CARDINALITY_OPTIONAL)))
118*8b6cd535SAndroid Build Coastguard Worker             .Build();
119*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSERT_OK(schema_store_->SetSchema(
120*8b6cd535SAndroid Build Coastguard Worker         schema, /*ignore_errors_and_delete_documents=*/false,
121*8b6cd535SAndroid Build Coastguard Worker         /*allow_circular_schema_definitions=*/false));
122*8b6cd535SAndroid Build Coastguard Worker 
123*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
124*8b6cd535SAndroid Build Coastguard Worker                                                 /*max_term_byte_size=*/10000));
125*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSERT_OK_AND_ASSIGN(
126*8b6cd535SAndroid Build Coastguard Worker         snippet_retriever_,
127*8b6cd535SAndroid Build Coastguard Worker         SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
128*8b6cd535SAndroid Build Coastguard Worker                                  normalizer_.get()));
129*8b6cd535SAndroid Build Coastguard Worker 
130*8b6cd535SAndroid Build Coastguard Worker     // Set limits to max - effectively no limit. Enable matching and request a
131*8b6cd535SAndroid Build Coastguard Worker     // window of 64 bytes.
132*8b6cd535SAndroid Build Coastguard Worker     snippet_spec_.set_num_to_snippet(std::numeric_limits<int32_t>::max());
133*8b6cd535SAndroid Build Coastguard Worker     snippet_spec_.set_num_matches_per_property(
134*8b6cd535SAndroid Build Coastguard Worker         std::numeric_limits<int32_t>::max());
135*8b6cd535SAndroid Build Coastguard Worker     snippet_spec_.set_max_window_utf32_length(64);
136*8b6cd535SAndroid Build Coastguard Worker   }
137*8b6cd535SAndroid Build Coastguard Worker 
TearDown()138*8b6cd535SAndroid Build Coastguard Worker   void TearDown() override {
139*8b6cd535SAndroid Build Coastguard Worker     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
140*8b6cd535SAndroid Build Coastguard Worker   }
141*8b6cd535SAndroid Build Coastguard Worker 
142*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<FeatureFlags> feature_flags_;
143*8b6cd535SAndroid Build Coastguard Worker   Filesystem filesystem_;
144*8b6cd535SAndroid Build Coastguard Worker   FakeClock fake_clock_;
145*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<SchemaStore> schema_store_;
146*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<LanguageSegmenter> language_segmenter_;
147*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<SnippetRetriever> snippet_retriever_;
148*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<Normalizer> normalizer_;
149*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<const JniCache> jni_cache_;
150*8b6cd535SAndroid Build Coastguard Worker   ResultSpecProto::SnippetSpecProto snippet_spec_;
151*8b6cd535SAndroid Build Coastguard Worker   std::string test_dir_;
152*8b6cd535SAndroid Build Coastguard Worker };
153*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,CreationWithNullPointerShouldFail)154*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, CreationWithNullPointerShouldFail) {
155*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
156*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(/*schema_store=*/nullptr,
157*8b6cd535SAndroid Build Coastguard Worker                                language_segmenter_.get(), normalizer_.get()),
158*8b6cd535SAndroid Build Coastguard Worker       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
159*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(SnippetRetriever::Create(schema_store_.get(),
160*8b6cd535SAndroid Build Coastguard Worker                                        /*language_segmenter=*/nullptr,
161*8b6cd535SAndroid Build Coastguard Worker                                        normalizer_.get()),
162*8b6cd535SAndroid Build Coastguard Worker               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
163*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
164*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
165*8b6cd535SAndroid Build Coastguard Worker                                /*normalizer=*/nullptr),
166*8b6cd535SAndroid Build Coastguard Worker       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
167*8b6cd535SAndroid Build Coastguard Worker }
168*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowSizeSmallerThanMatch)169*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeSmallerThanMatch) {
170*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
171*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
172*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
173*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
174*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
175*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
176*8b6cd535SAndroid Build Coastguard Worker           .Build();
177*8b6cd535SAndroid Build Coastguard Worker 
178*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
179*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
180*8b6cd535SAndroid Build Coastguard Worker 
181*8b6cd535SAndroid Build Coastguard Worker   // Window starts at the beginning of "three" and ends in the middle of
182*8b6cd535SAndroid Build Coastguard Worker   // "three". len=4, orig_window= "thre"
183*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(4);
184*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
185*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
186*8b6cd535SAndroid Build Coastguard Worker 
187*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
188*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
189*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
190*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
191*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
192*8b6cd535SAndroid Build Coastguard Worker }
193*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowSizeEqualToMatch_OddLengthMatch)194*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest,
195*8b6cd535SAndroid Build Coastguard Worker        SnippetingWindowMaxWindowSizeEqualToMatch_OddLengthMatch) {
196*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
197*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
198*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
199*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
200*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
201*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
202*8b6cd535SAndroid Build Coastguard Worker           .Build();
203*8b6cd535SAndroid Build Coastguard Worker 
204*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
205*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
206*8b6cd535SAndroid Build Coastguard Worker 
207*8b6cd535SAndroid Build Coastguard Worker   // Window starts at the beginning of "three" and at the exact end of
208*8b6cd535SAndroid Build Coastguard Worker   // "three". len=5, orig_window= "three"
209*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(5);
210*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
211*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
212*8b6cd535SAndroid Build Coastguard Worker 
213*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
214*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
215*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
216*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
217*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("three"));
218*8b6cd535SAndroid Build Coastguard Worker }
219*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowSizeEqualToMatch_EvenLengthMatch)220*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest,
221*8b6cd535SAndroid Build Coastguard Worker        SnippetingWindowMaxWindowSizeEqualToMatch_EvenLengthMatch) {
222*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
223*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
224*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
225*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
226*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
227*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
228*8b6cd535SAndroid Build Coastguard Worker           .Build();
229*8b6cd535SAndroid Build Coastguard Worker 
230*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
231*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
232*8b6cd535SAndroid Build Coastguard Worker 
233*8b6cd535SAndroid Build Coastguard Worker   // Window starts at the beginning of "four" and at the exact end of
234*8b6cd535SAndroid Build Coastguard Worker   // "four". len=4, orig_window= "four"
235*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(4);
236*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
237*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
238*8b6cd535SAndroid Build Coastguard Worker 
239*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
240*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
241*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
242*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
243*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("four"));
244*8b6cd535SAndroid Build Coastguard Worker }
245*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowStartsInWhitespace)246*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
247*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
248*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
249*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
250*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
251*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
252*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
253*8b6cd535SAndroid Build Coastguard Worker           .Build();
254*8b6cd535SAndroid Build Coastguard Worker 
255*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
256*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
257*8b6cd535SAndroid Build Coastguard Worker 
258*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four.... five"
259*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^        ^   ^
260*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14       23  27
261*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14       23  27
262*8b6cd535SAndroid Build Coastguard Worker   //
263*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
264*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (2,17).
265*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [4,13) "two three"
266*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [4,18) "two three four"
267*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(14);
268*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
269*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
270*8b6cd535SAndroid Build Coastguard Worker 
271*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
272*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
273*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
274*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
275*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
276*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("two three four"));
277*8b6cd535SAndroid Build Coastguard Worker }
278*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowStartsMidToken)279*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
280*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
281*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
282*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
283*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
284*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
285*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
286*8b6cd535SAndroid Build Coastguard Worker           .Build();
287*8b6cd535SAndroid Build Coastguard Worker 
288*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
289*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
290*8b6cd535SAndroid Build Coastguard Worker 
291*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four.... five"
292*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^        ^   ^
293*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14       23  27
294*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14       23  27
295*8b6cd535SAndroid Build Coastguard Worker   //
296*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
297*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (1,18).
298*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [4,18) "two three four"
299*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [4,20) "two three four.."
300*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(16);
301*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
302*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
303*8b6cd535SAndroid Build Coastguard Worker 
304*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
305*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
306*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
307*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
308*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
309*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("two three four.."));
310*8b6cd535SAndroid Build Coastguard Worker }
311*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowEndsInPunctuation)312*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
313*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
314*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
315*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
316*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
317*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
318*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
319*8b6cd535SAndroid Build Coastguard Worker           .Build();
320*8b6cd535SAndroid Build Coastguard Worker 
321*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
322*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
323*8b6cd535SAndroid Build Coastguard Worker 
324*8b6cd535SAndroid Build Coastguard Worker   // Window ends in the middle of all the punctuation and window starts at 0.
325*8b6cd535SAndroid Build Coastguard Worker   // len=20, orig_window="one two three four.."
326*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(20);
327*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
328*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
329*8b6cd535SAndroid Build Coastguard Worker 
330*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
331*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
332*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
333*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
334*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
335*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four.."));
336*8b6cd535SAndroid Build Coastguard Worker }
337*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowEndsMultiBytePunctuation)338*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest,
339*8b6cd535SAndroid Build Coastguard Worker        SnippetingWindowMaxWindowEndsMultiBytePunctuation) {
340*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
341*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
342*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
343*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
344*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
345*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
346*8b6cd535SAndroid Build Coastguard Worker                              "Is everything upside down in Australia¿ Crikey!")
347*8b6cd535SAndroid Build Coastguard Worker           .Build();
348*8b6cd535SAndroid Build Coastguard Worker 
349*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
350*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
351*8b6cd535SAndroid Build Coastguard Worker 
352*8b6cd535SAndroid Build Coastguard Worker   // Window ends in the middle of all the punctuation and window starts at 0.
353*8b6cd535SAndroid Build Coastguard Worker   // len=26, orig_window="pside down in Australia¿"
354*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(24);
355*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
356*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
357*8b6cd535SAndroid Build Coastguard Worker 
358*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
359*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
360*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
361*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
362*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
363*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("down in Australia¿"));
364*8b6cd535SAndroid Build Coastguard Worker }
365*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowBeyondMultiBytePunctuation)366*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest,
367*8b6cd535SAndroid Build Coastguard Worker        SnippetingWindowMaxWindowBeyondMultiBytePunctuation) {
368*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
369*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
370*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
371*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
372*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
373*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
374*8b6cd535SAndroid Build Coastguard Worker                              "Is everything upside down in Australia¿ Crikey!")
375*8b6cd535SAndroid Build Coastguard Worker           .Build();
376*8b6cd535SAndroid Build Coastguard Worker 
377*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
378*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
379*8b6cd535SAndroid Build Coastguard Worker 
380*8b6cd535SAndroid Build Coastguard Worker   // Window ends in the middle of all the punctuation and window starts at 0.
381*8b6cd535SAndroid Build Coastguard Worker   // len=26, orig_window="upside down in Australia¿ "
382*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(26);
383*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
384*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
385*8b6cd535SAndroid Build Coastguard Worker 
386*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
387*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
388*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
389*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
390*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
391*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("upside down in Australia¿"));
392*8b6cd535SAndroid Build Coastguard Worker }
393*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowStartsBeforeValueStart)394*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
395*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
396*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
397*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
398*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
399*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
400*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
401*8b6cd535SAndroid Build Coastguard Worker           .Build();
402*8b6cd535SAndroid Build Coastguard Worker 
403*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
404*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
405*8b6cd535SAndroid Build Coastguard Worker 
406*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four.... five"
407*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^        ^   ^
408*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14       23  27
409*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14       23  27
410*8b6cd535SAndroid Build Coastguard Worker   //
411*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
412*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-2,21).
413*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0,21) "one two three four..."
414*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0,22) "one two three four...."
415*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(22);
416*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
417*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
418*8b6cd535SAndroid Build Coastguard Worker 
419*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
420*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
421*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
422*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
423*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
424*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four...."));
425*8b6cd535SAndroid Build Coastguard Worker }
426*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowEndsInWhitespace)427*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
428*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
429*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
430*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
431*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
432*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
433*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
434*8b6cd535SAndroid Build Coastguard Worker           .Build();
435*8b6cd535SAndroid Build Coastguard Worker 
436*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
437*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
438*8b6cd535SAndroid Build Coastguard Worker 
439*8b6cd535SAndroid Build Coastguard Worker   // Window ends before "five" but after all the punctuation
440*8b6cd535SAndroid Build Coastguard Worker   // len=26, orig_window="one two three four.... "
441*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(26);
442*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
443*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
444*8b6cd535SAndroid Build Coastguard Worker 
445*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
446*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
447*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
448*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
449*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
450*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four...."));
451*8b6cd535SAndroid Build Coastguard Worker }
452*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowEndsMidToken)453*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
454*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
455*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
456*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
457*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
458*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
459*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
460*8b6cd535SAndroid Build Coastguard Worker           .Build();
461*8b6cd535SAndroid Build Coastguard Worker 
462*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
463*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
464*8b6cd535SAndroid Build Coastguard Worker 
465*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four.... five"
466*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^        ^   ^
467*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14       23  27
468*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14       23  27
469*8b6cd535SAndroid Build Coastguard Worker   //
470*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
471*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be ((-7,26).
472*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0,26) "one two three four...."
473*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0,27) "one two three four.... five"
474*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(32);
475*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
476*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
477*8b6cd535SAndroid Build Coastguard Worker 
478*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
479*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
480*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
481*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
482*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
483*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four.... five"));
484*8b6cd535SAndroid Build Coastguard Worker }
485*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowSizeEqualToValueSize)486*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
487*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
488*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
489*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
490*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
491*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
492*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
493*8b6cd535SAndroid Build Coastguard Worker           .Build();
494*8b6cd535SAndroid Build Coastguard Worker 
495*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
496*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
497*8b6cd535SAndroid Build Coastguard Worker 
498*8b6cd535SAndroid Build Coastguard Worker   // Max window size equals the size of the value.
499*8b6cd535SAndroid Build Coastguard Worker   // len=34, orig_window="one two three four.... five"
500*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(34);
501*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
502*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
503*8b6cd535SAndroid Build Coastguard Worker 
504*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
505*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
506*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
507*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
508*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
509*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four.... five"));
510*8b6cd535SAndroid Build Coastguard Worker }
511*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMaxWindowSizeLargerThanValueSize)512*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
513*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
514*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
515*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
516*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
517*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
518*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five")
519*8b6cd535SAndroid Build Coastguard Worker           .Build();
520*8b6cd535SAndroid Build Coastguard Worker 
521*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
522*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
523*8b6cd535SAndroid Build Coastguard Worker 
524*8b6cd535SAndroid Build Coastguard Worker   // Max window size exceeds the size of the value.
525*8b6cd535SAndroid Build Coastguard Worker   // len=36, orig_window="one two three four.... five"
526*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(36);
527*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
528*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
529*8b6cd535SAndroid Build Coastguard Worker 
530*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
531*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
532*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
533*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
534*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
535*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four.... five"));
536*8b6cd535SAndroid Build Coastguard Worker }
537*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMatchAtTextStart)538*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStart) {
539*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
540*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
541*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
542*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
543*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
544*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five six")
545*8b6cd535SAndroid Build Coastguard Worker           .Build();
546*8b6cd535SAndroid Build Coastguard Worker 
547*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
548*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"two"}}};
549*8b6cd535SAndroid Build Coastguard Worker 
550*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four.... five six"
551*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^        ^    ^  ^
552*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14       23  28  31
553*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14       23  28  31
554*8b6cd535SAndroid Build Coastguard Worker   //
555*8b6cd535SAndroid Build Coastguard Worker   // Window size will go past the start of the window.
556*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
557*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-10,19).
558*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0,19) "one two three four."
559*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0,27) "one two three four.... five"
560*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(28);
561*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
562*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
563*8b6cd535SAndroid Build Coastguard Worker 
564*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
565*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
566*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
567*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
568*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
569*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four.... five"));
570*8b6cd535SAndroid Build Coastguard Worker }
571*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMatchAtTextEnd)572*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEnd) {
573*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
574*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
575*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
576*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
577*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
578*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four.... five six")
579*8b6cd535SAndroid Build Coastguard Worker           .Build();
580*8b6cd535SAndroid Build Coastguard Worker 
581*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
582*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"five"}}};
583*8b6cd535SAndroid Build Coastguard Worker 
584*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four.... five six"
585*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^        ^    ^  ^
586*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14       23  28  31
587*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14       23  28  31
588*8b6cd535SAndroid Build Coastguard Worker   //
589*8b6cd535SAndroid Build Coastguard Worker   // Window size will go past the end of the window.
590*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
591*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (10,39).
592*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [14,31) "four.... five six"
593*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [4,31) "two three four.... five six"
594*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(28);
595*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
596*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
597*8b6cd535SAndroid Build Coastguard Worker 
598*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
599*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
600*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
601*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
602*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
603*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("two three four.... five six"));
604*8b6cd535SAndroid Build Coastguard Worker }
605*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMatchAtTextStartShortText)606*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStartShortText) {
607*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
608*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
609*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
610*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
611*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
612*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four....")
613*8b6cd535SAndroid Build Coastguard Worker           .Build();
614*8b6cd535SAndroid Build Coastguard Worker 
615*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
616*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"two"}}};
617*8b6cd535SAndroid Build Coastguard Worker 
618*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four...."
619*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^       ^
620*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14      22
621*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14      22
622*8b6cd535SAndroid Build Coastguard Worker   //
623*8b6cd535SAndroid Build Coastguard Worker   // Window size will go past the start of the window.
624*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
625*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-10,19).
626*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0, 19) "one two three four."
627*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 22) "one two three four...."
628*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(28);
629*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
630*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
631*8b6cd535SAndroid Build Coastguard Worker 
632*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
633*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
634*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
635*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
636*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
637*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four...."));
638*8b6cd535SAndroid Build Coastguard Worker }
639*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingWindowMatchAtTextEndShortText)640*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEndShortText) {
641*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
642*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
643*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
644*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
645*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "counting")
646*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "one two three four....")
647*8b6cd535SAndroid Build Coastguard Worker           .Build();
648*8b6cd535SAndroid Build Coastguard Worker 
649*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
650*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
651*8b6cd535SAndroid Build Coastguard Worker 
652*8b6cd535SAndroid Build Coastguard Worker   // String:      "one two three four...."
653*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^   ^     ^       ^
654*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0   4   8     14      22
655*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0   4   8     14      22
656*8b6cd535SAndroid Build Coastguard Worker   //
657*8b6cd535SAndroid Build Coastguard Worker   // Window size will go past the start of the window.
658*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
659*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (1,30).
660*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [4, 22) "two three four...."
661*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 22) "one two three four...."
662*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(28);
663*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
664*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
665*8b6cd535SAndroid Build Coastguard Worker 
666*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
667*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
668*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
669*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
670*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
671*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("one two three four...."));
672*8b6cd535SAndroid Build Coastguard Worker }
673*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,PrefixSnippeting)674*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
675*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
676*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
677*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
678*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
679*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "subject foo")
680*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "Only a fool would match this content.")
681*8b6cd535SAndroid Build Coastguard Worker           .Build();
682*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
683*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
684*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
685*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
686*8b6cd535SAndroid Build Coastguard Worker 
687*8b6cd535SAndroid Build Coastguard Worker   // Check the snippets. 'f' should match prefix-enabled property 'subject', but
688*8b6cd535SAndroid Build Coastguard Worker   // not exact-only property 'body'
689*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
690*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
691*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
692*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
693*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
694*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("subject foo"));
695*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
696*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("f"));
697*8b6cd535SAndroid Build Coastguard Worker }
698*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,ExactSnippeting)699*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, ExactSnippeting) {
700*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
701*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
702*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
703*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
704*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "subject foo")
705*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "Only a fool would match this content.")
706*8b6cd535SAndroid Build Coastguard Worker           .Build();
707*8b6cd535SAndroid Build Coastguard Worker 
708*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
709*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
710*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
711*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
712*8b6cd535SAndroid Build Coastguard Worker 
713*8b6cd535SAndroid Build Coastguard Worker   // Check the snippets
714*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), IsEmpty());
715*8b6cd535SAndroid Build Coastguard Worker }
716*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SimpleSnippetingNoWindowing)717*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SimpleSnippetingNoWindowing) {
718*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
719*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
720*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
721*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
722*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "subject foo")
723*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "Only a fool would match this content.")
724*8b6cd535SAndroid Build Coastguard Worker           .Build();
725*8b6cd535SAndroid Build Coastguard Worker 
726*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(0);
727*8b6cd535SAndroid Build Coastguard Worker 
728*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
729*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
730*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
731*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
732*8b6cd535SAndroid Build Coastguard Worker 
733*8b6cd535SAndroid Build Coastguard Worker   // Check the snippets
734*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
735*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
736*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
737*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
738*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
739*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
740*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
741*8b6cd535SAndroid Build Coastguard Worker }
742*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingMultipleMatches)743*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
744*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
745*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
746*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
747*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
748*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "subject foo")
749*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
750*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
751*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
752*8b6cd535SAndroid Build Coastguard Worker           .Build();
753*8b6cd535SAndroid Build Coastguard Worker   // String:      "Concerning the subject of foo, we need to begin considering "
754*8b6cd535SAndroid Build Coastguard Worker   //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
755*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
756*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
757*8b6cd535SAndroid Build Coastguard Worker   //
758*8b6cd535SAndroid Build Coastguard Worker   // String ctd:  "our options regarding body bar."
759*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^       ^         ^    ^   ^
760*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    60  64      72        82   87  91
761*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   60  64      72        82   87  91
762*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
763*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
764*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
765*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
766*8b6cd535SAndroid Build Coastguard Worker 
767*8b6cd535SAndroid Build Coastguard Worker   // Check the snippets
768*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(2));
769*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
770*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
771*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
772*8b6cd535SAndroid Build Coastguard Worker   // The first window will be:
773*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-6,59).
774*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
775*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 63) "Concerning... our"
776*8b6cd535SAndroid Build Coastguard Worker   // The second window will be:
777*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (54,91).
778*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [60, 91) "our... bar.".
779*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [31, 91) "we... bar."
780*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
781*8b6cd535SAndroid Build Coastguard Worker       GetWindows(content, snippet.entries(0)),
782*8b6cd535SAndroid Build Coastguard Worker       ElementsAre(
783*8b6cd535SAndroid Build Coastguard Worker           "Concerning the subject of foo, we need to begin considering our",
784*8b6cd535SAndroid Build Coastguard Worker           "we need to begin considering our options regarding body bar."));
785*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
786*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("foo", "bar"));
787*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
788*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("foo", "bar"));
789*8b6cd535SAndroid Build Coastguard Worker 
790*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
791*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(1).property_name());
792*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(1)),
793*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("subject foo"));
794*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
795*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
796*8b6cd535SAndroid Build Coastguard Worker }
797*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingMultipleMatchesSectionRestrict)798*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
799*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
800*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
801*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
802*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
803*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "subject foo")
804*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
805*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
806*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
807*8b6cd535SAndroid Build Coastguard Worker           .Build();
808*8b6cd535SAndroid Build Coastguard Worker   // String:      "Concerning the subject of foo, we need to begin considering "
809*8b6cd535SAndroid Build Coastguard Worker   //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
810*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
811*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
812*8b6cd535SAndroid Build Coastguard Worker   //
813*8b6cd535SAndroid Build Coastguard Worker   // String ctd:  "our options regarding body bar."
814*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^       ^         ^    ^   ^
815*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    60  64      72        82   87  91
816*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   60  64      72        82   87  91
817*8b6cd535SAndroid Build Coastguard Worker   //
818*8b6cd535SAndroid Build Coastguard Worker   // Section 1 "subject" is not in the section_mask, so no snippet information
819*8b6cd535SAndroid Build Coastguard Worker   // from that section should be returned by the SnippetRetriever.
820*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000001;
821*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
822*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
823*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
824*8b6cd535SAndroid Build Coastguard Worker 
825*8b6cd535SAndroid Build Coastguard Worker   // Check the snippets
826*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
827*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
828*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
829*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
830*8b6cd535SAndroid Build Coastguard Worker   // The first window will be:
831*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-6,59).
832*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
833*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 63) "Concerning... our"
834*8b6cd535SAndroid Build Coastguard Worker   // The second window will be:
835*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (54,91).
836*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [60, 91) "our... bar.".
837*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [31, 91) "we... bar."
838*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
839*8b6cd535SAndroid Build Coastguard Worker       GetWindows(content, snippet.entries(0)),
840*8b6cd535SAndroid Build Coastguard Worker       ElementsAre(
841*8b6cd535SAndroid Build Coastguard Worker           "Concerning the subject of foo, we need to begin considering our",
842*8b6cd535SAndroid Build Coastguard Worker           "we need to begin considering our options regarding body bar."));
843*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
844*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("foo", "bar"));
845*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
846*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("foo", "bar"));
847*8b6cd535SAndroid Build Coastguard Worker }
848*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingMultipleMatchesSectionRestrictedTerm)849*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
850*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
851*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
852*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
853*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
854*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "subject foo")
855*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
856*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
857*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
858*8b6cd535SAndroid Build Coastguard Worker           .Build();
859*8b6cd535SAndroid Build Coastguard Worker   // String:      "Concerning the subject of foo, we need to begin considering "
860*8b6cd535SAndroid Build Coastguard Worker   //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
861*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
862*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
863*8b6cd535SAndroid Build Coastguard Worker   //
864*8b6cd535SAndroid Build Coastguard Worker   // String ctd:  "our options regarding body bar."
865*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^       ^         ^    ^   ^
866*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    60  64      72        82   87  91
867*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   60  64      72        82   87  91
868*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
869*8b6cd535SAndroid Build Coastguard Worker   // "subject" should match in both sections, but "foo" is restricted to "body"
870*8b6cd535SAndroid Build Coastguard Worker   // so it should only match in the 'body' section and not the 'subject'
871*8b6cd535SAndroid Build Coastguard Worker   // section.
872*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"subject"}},
873*8b6cd535SAndroid Build Coastguard Worker                                            {"body", {"foo"}}};
874*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
875*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
876*8b6cd535SAndroid Build Coastguard Worker 
877*8b6cd535SAndroid Build Coastguard Worker   // Check the snippets
878*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(2));
879*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
880*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
881*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
882*8b6cd535SAndroid Build Coastguard Worker   // The first window will be:
883*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-15,50).
884*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0, 47) "Concerning... begin".
885*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 63) "Concerning... our"
886*8b6cd535SAndroid Build Coastguard Worker   // The second window will be:
887*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-6,59).
888*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
889*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 63) "Concerning... our"
890*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
891*8b6cd535SAndroid Build Coastguard Worker       GetWindows(content, snippet.entries(0)),
892*8b6cd535SAndroid Build Coastguard Worker       ElementsAre(
893*8b6cd535SAndroid Build Coastguard Worker           "Concerning the subject of foo, we need to begin considering our",
894*8b6cd535SAndroid Build Coastguard Worker           "Concerning the subject of foo, we need to begin considering our"));
895*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
896*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("subject", "foo"));
897*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
898*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("subject", "foo"));
899*8b6cd535SAndroid Build Coastguard Worker 
900*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
901*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(1).property_name());
902*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(1)),
903*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("subject foo"));
904*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("subject"));
905*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(1)),
906*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("subject"));
907*8b6cd535SAndroid Build Coastguard Worker }
908*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingMultipleMatchesOneMatchPerProperty)909*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
910*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
911*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
912*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
913*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
914*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "subject foo")
915*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
916*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
917*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
918*8b6cd535SAndroid Build Coastguard Worker           .Build();
919*8b6cd535SAndroid Build Coastguard Worker 
920*8b6cd535SAndroid Build Coastguard Worker   // String:      "Concerning the subject of foo, we need to begin considering "
921*8b6cd535SAndroid Build Coastguard Worker   //               ^          ^   ^       ^  ^    ^  ^    ^  ^     ^
922*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    0          11  15     23  26  31  34  39  42    48
923*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   0          11  15     23  26  31  34  39  42    48
924*8b6cd535SAndroid Build Coastguard Worker   //
925*8b6cd535SAndroid Build Coastguard Worker   // String ctd:  "our options regarding body bar."
926*8b6cd535SAndroid Build Coastguard Worker   //               ^   ^       ^         ^    ^   ^
927*8b6cd535SAndroid Build Coastguard Worker   // UTF-8 idx:    60  64      72        82   87  91
928*8b6cd535SAndroid Build Coastguard Worker   // UTF-32 idx:   60  64      72        82   87  91
929*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_num_matches_per_property(1);
930*8b6cd535SAndroid Build Coastguard Worker 
931*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
932*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
933*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
934*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
935*8b6cd535SAndroid Build Coastguard Worker 
936*8b6cd535SAndroid Build Coastguard Worker   // Check the snippets
937*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(2));
938*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
939*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
940*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
941*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
942*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (-6,59).
943*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [0, 59) "Concerning... considering".
944*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 63) "Concerning... our"
945*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
946*8b6cd535SAndroid Build Coastguard Worker       GetWindows(content, snippet.entries(0)),
947*8b6cd535SAndroid Build Coastguard Worker       ElementsAre(
948*8b6cd535SAndroid Build Coastguard Worker           "Concerning the subject of foo, we need to begin considering our"));
949*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
950*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("foo"));
951*8b6cd535SAndroid Build Coastguard Worker 
952*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
953*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(1).property_name());
954*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(1)),
955*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("subject foo"));
956*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
957*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("foo"));
958*8b6cd535SAndroid Build Coastguard Worker }
959*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,PrefixSnippetingNormalization)960*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
961*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
962*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
963*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
964*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
965*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "MDI team")
966*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "Some members are in Zürich.")
967*8b6cd535SAndroid Build Coastguard Worker           .Build();
968*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
969*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"md"}}};
970*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
971*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
972*8b6cd535SAndroid Build Coastguard Worker 
973*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
974*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
975*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
976*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
977*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("MDI team"));
978*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("MDI"));
979*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("MD"));
980*8b6cd535SAndroid Build Coastguard Worker }
981*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,ExactSnippetingNormalization)982*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
983*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
984*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
985*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
986*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
987*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", "MDI team")
988*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body", "Some members are in Zürich.")
989*8b6cd535SAndroid Build Coastguard Worker           .Build();
990*8b6cd535SAndroid Build Coastguard Worker 
991*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
992*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"zurich"}}};
993*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
994*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
995*8b6cd535SAndroid Build Coastguard Worker 
996*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(1));
997*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
998*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
999*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1000*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1001*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("Some members are in Zürich."));
1002*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("Zürich"));
1003*8b6cd535SAndroid Build Coastguard Worker 
1004*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1005*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("Zürich"));
1006*8b6cd535SAndroid Build Coastguard Worker }
1007*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingTestOneLevel)1008*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
1009*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1010*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1011*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1012*8b6cd535SAndroid Build Coastguard Worker                        .SetType("SingleLevelType")
1013*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1014*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("X")
1015*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1016*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1017*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1018*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1019*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Y")
1020*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1021*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1022*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1023*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1024*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Z")
1025*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1026*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1027*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1028*8b6cd535SAndroid Build Coastguard Worker           .Build();
1029*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1030*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1031*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1032*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1033*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1034*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1035*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1036*8b6cd535SAndroid Build Coastguard Worker 
1037*8b6cd535SAndroid Build Coastguard Worker   std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
1038*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document;
1039*8b6cd535SAndroid Build Coastguard Worker   document.set_schema("SingleLevelType");
1040*8b6cd535SAndroid Build Coastguard Worker   PropertyProto* prop = document.add_properties();
1041*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("X");
1042*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1043*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1044*8b6cd535SAndroid Build Coastguard Worker   }
1045*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1046*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Y");
1047*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1048*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1049*8b6cd535SAndroid Build Coastguard Worker   }
1050*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1051*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Z");
1052*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1053*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1054*8b6cd535SAndroid Build Coastguard Worker   }
1055*8b6cd535SAndroid Build Coastguard Worker 
1056*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000111;
1057*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
1058*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1059*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
1060*8b6cd535SAndroid Build Coastguard Worker 
1061*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(6));
1062*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("X[1]"));
1063*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1064*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1065*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
1066*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
1067*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
1068*8b6cd535SAndroid Build Coastguard Worker 
1069*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(1).property_name(), Eq("X[3]"));
1070*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(1).property_name());
1071*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
1072*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
1073*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
1074*8b6cd535SAndroid Build Coastguard Worker 
1075*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetPropertyPaths(snippet),
1076*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("X[1]", "X[3]", "Y[1]", "Y[3]", "Z[1]", "Z[3]"));
1077*8b6cd535SAndroid Build Coastguard Worker }
1078*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingTestMultiLevel)1079*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
1080*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1081*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1082*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1083*8b6cd535SAndroid Build Coastguard Worker                        .SetType("SingleLevelType")
1084*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1085*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("X")
1086*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1087*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1088*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1089*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1090*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Y")
1091*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1092*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1093*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1094*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1095*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Z")
1096*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1097*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1098*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1099*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1100*8b6cd535SAndroid Build Coastguard Worker                        .SetType("MultiLevelType")
1101*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1102*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("A")
1103*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1104*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1105*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1106*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_OPTIONAL))
1107*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1108*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("B")
1109*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1110*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1111*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1112*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_OPTIONAL))
1113*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1114*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("C")
1115*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1116*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1117*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1118*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_OPTIONAL)))
1119*8b6cd535SAndroid Build Coastguard Worker           .Build();
1120*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1121*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1122*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1123*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1124*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1125*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1126*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1127*8b6cd535SAndroid Build Coastguard Worker 
1128*8b6cd535SAndroid Build Coastguard Worker   std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
1129*8b6cd535SAndroid Build Coastguard Worker   DocumentProto subdocument;
1130*8b6cd535SAndroid Build Coastguard Worker   PropertyProto* prop = subdocument.add_properties();
1131*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("X");
1132*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1133*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1134*8b6cd535SAndroid Build Coastguard Worker   }
1135*8b6cd535SAndroid Build Coastguard Worker   prop = subdocument.add_properties();
1136*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Y");
1137*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1138*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1139*8b6cd535SAndroid Build Coastguard Worker   }
1140*8b6cd535SAndroid Build Coastguard Worker   prop = subdocument.add_properties();
1141*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Z");
1142*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1143*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1144*8b6cd535SAndroid Build Coastguard Worker   }
1145*8b6cd535SAndroid Build Coastguard Worker 
1146*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document;
1147*8b6cd535SAndroid Build Coastguard Worker   document.set_schema("MultiLevelType");
1148*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1149*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("A");
1150*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1151*8b6cd535SAndroid Build Coastguard Worker 
1152*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1153*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("B");
1154*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1155*8b6cd535SAndroid Build Coastguard Worker 
1156*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1157*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("C");
1158*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1159*8b6cd535SAndroid Build Coastguard Worker 
1160*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b111111111;
1161*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
1162*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1163*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
1164*8b6cd535SAndroid Build Coastguard Worker 
1165*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(18));
1166*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("A.X[1]"));
1167*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1168*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1169*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
1170*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
1171*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
1172*8b6cd535SAndroid Build Coastguard Worker 
1173*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(1).property_name(), Eq("A.X[3]"));
1174*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(1).property_name());
1175*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
1176*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
1177*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
1178*8b6cd535SAndroid Build Coastguard Worker 
1179*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
1180*8b6cd535SAndroid Build Coastguard Worker       GetPropertyPaths(snippet),
1181*8b6cd535SAndroid Build Coastguard Worker       ElementsAre("A.X[1]", "A.X[3]", "A.Y[1]", "A.Y[3]", "A.Z[1]", "A.Z[3]",
1182*8b6cd535SAndroid Build Coastguard Worker                   "B.X[1]", "B.X[3]", "B.Y[1]", "B.Y[3]", "B.Z[1]", "B.Z[3]",
1183*8b6cd535SAndroid Build Coastguard Worker                   "C.X[1]", "C.X[3]", "C.Y[1]", "C.Y[3]", "C.Z[1]", "C.Z[3]"));
1184*8b6cd535SAndroid Build Coastguard Worker }
1185*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingTestMultiLevelRepeated)1186*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
1187*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1188*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1189*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1190*8b6cd535SAndroid Build Coastguard Worker                        .SetType("SingleLevelType")
1191*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1192*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("X")
1193*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1194*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1195*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1196*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1197*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Y")
1198*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1199*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1200*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1201*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1202*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Z")
1203*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1204*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1205*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1206*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1207*8b6cd535SAndroid Build Coastguard Worker                        .SetType("MultiLevelType")
1208*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1209*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("A")
1210*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1211*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1212*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1213*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1214*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1215*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("B")
1216*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1217*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1218*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1219*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1220*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1221*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("C")
1222*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1223*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1224*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1225*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1226*8b6cd535SAndroid Build Coastguard Worker           .Build();
1227*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1228*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1229*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1230*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1231*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1232*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1233*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1234*8b6cd535SAndroid Build Coastguard Worker 
1235*8b6cd535SAndroid Build Coastguard Worker   std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
1236*8b6cd535SAndroid Build Coastguard Worker   DocumentProto subdocument;
1237*8b6cd535SAndroid Build Coastguard Worker   PropertyProto* prop = subdocument.add_properties();
1238*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("X");
1239*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1240*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1241*8b6cd535SAndroid Build Coastguard Worker   }
1242*8b6cd535SAndroid Build Coastguard Worker   prop = subdocument.add_properties();
1243*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Y");
1244*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1245*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1246*8b6cd535SAndroid Build Coastguard Worker   }
1247*8b6cd535SAndroid Build Coastguard Worker   prop = subdocument.add_properties();
1248*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Z");
1249*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& s : string_values) {
1250*8b6cd535SAndroid Build Coastguard Worker     prop->add_string_values(s);
1251*8b6cd535SAndroid Build Coastguard Worker   }
1252*8b6cd535SAndroid Build Coastguard Worker 
1253*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document;
1254*8b6cd535SAndroid Build Coastguard Worker   document.set_schema("MultiLevelType");
1255*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1256*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("A");
1257*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1258*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1259*8b6cd535SAndroid Build Coastguard Worker 
1260*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1261*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("B");
1262*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1263*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1264*8b6cd535SAndroid Build Coastguard Worker 
1265*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1266*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("C");
1267*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1268*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1269*8b6cd535SAndroid Build Coastguard Worker 
1270*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b111111111;
1271*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
1272*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1273*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
1274*8b6cd535SAndroid Build Coastguard Worker 
1275*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(36));
1276*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X[1]"));
1277*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1278*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1279*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
1280*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
1281*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
1282*8b6cd535SAndroid Build Coastguard Worker 
1283*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[0].X[3]"));
1284*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(1).property_name());
1285*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
1286*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
1287*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
1288*8b6cd535SAndroid Build Coastguard Worker 
1289*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetPropertyPaths(snippet),
1290*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("A[0].X[1]", "A[0].X[3]", "A[1].X[1]", "A[1].X[3]",
1291*8b6cd535SAndroid Build Coastguard Worker                           "A[0].Y[1]", "A[0].Y[3]", "A[1].Y[1]", "A[1].Y[3]",
1292*8b6cd535SAndroid Build Coastguard Worker                           "A[0].Z[1]", "A[0].Z[3]", "A[1].Z[1]", "A[1].Z[3]",
1293*8b6cd535SAndroid Build Coastguard Worker                           "B[0].X[1]", "B[0].X[3]", "B[1].X[1]", "B[1].X[3]",
1294*8b6cd535SAndroid Build Coastguard Worker                           "B[0].Y[1]", "B[0].Y[3]", "B[1].Y[1]", "B[1].Y[3]",
1295*8b6cd535SAndroid Build Coastguard Worker                           "B[0].Z[1]", "B[0].Z[3]", "B[1].Z[1]", "B[1].Z[3]",
1296*8b6cd535SAndroid Build Coastguard Worker                           "C[0].X[1]", "C[0].X[3]", "C[1].X[1]", "C[1].X[3]",
1297*8b6cd535SAndroid Build Coastguard Worker                           "C[0].Y[1]", "C[0].Y[3]", "C[1].Y[1]", "C[1].Y[3]",
1298*8b6cd535SAndroid Build Coastguard Worker                           "C[0].Z[1]", "C[0].Z[3]", "C[1].Z[1]", "C[1].Z[3]"));
1299*8b6cd535SAndroid Build Coastguard Worker }
1300*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippetingTestMultiLevelSingleValue)1301*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
1302*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1303*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1304*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1305*8b6cd535SAndroid Build Coastguard Worker                        .SetType("SingleLevelType")
1306*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1307*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("X")
1308*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1309*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1310*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_OPTIONAL))
1311*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1312*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Y")
1313*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1314*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1315*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_OPTIONAL))
1316*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1317*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("Z")
1318*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1319*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_PLAIN)
1320*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_OPTIONAL)))
1321*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1322*8b6cd535SAndroid Build Coastguard Worker                        .SetType("MultiLevelType")
1323*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1324*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("A")
1325*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1326*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1327*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1328*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1329*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1330*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("B")
1331*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1332*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1333*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1334*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED))
1335*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1336*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("C")
1337*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeDocument(
1338*8b6cd535SAndroid Build Coastguard Worker                                             "SingleLevelType",
1339*8b6cd535SAndroid Build Coastguard Worker                                             /*index_nested_properties=*/true)
1340*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1341*8b6cd535SAndroid Build Coastguard Worker           .Build();
1342*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1343*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1344*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1345*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1346*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1347*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1348*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1349*8b6cd535SAndroid Build Coastguard Worker 
1350*8b6cd535SAndroid Build Coastguard Worker   DocumentProto subdocument;
1351*8b6cd535SAndroid Build Coastguard Worker   PropertyProto* prop = subdocument.add_properties();
1352*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("X");
1353*8b6cd535SAndroid Build Coastguard Worker   prop->add_string_values("polo");
1354*8b6cd535SAndroid Build Coastguard Worker   prop = subdocument.add_properties();
1355*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Y");
1356*8b6cd535SAndroid Build Coastguard Worker   prop->add_string_values("marco");
1357*8b6cd535SAndroid Build Coastguard Worker   prop = subdocument.add_properties();
1358*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("Z");
1359*8b6cd535SAndroid Build Coastguard Worker   prop->add_string_values("polo");
1360*8b6cd535SAndroid Build Coastguard Worker 
1361*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document;
1362*8b6cd535SAndroid Build Coastguard Worker   document.set_schema("MultiLevelType");
1363*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1364*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("A");
1365*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1366*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1367*8b6cd535SAndroid Build Coastguard Worker 
1368*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1369*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("B");
1370*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1371*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1372*8b6cd535SAndroid Build Coastguard Worker 
1373*8b6cd535SAndroid Build Coastguard Worker   prop = document.add_properties();
1374*8b6cd535SAndroid Build Coastguard Worker   prop->set_name("C");
1375*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1376*8b6cd535SAndroid Build Coastguard Worker   *prop->add_document_values() = subdocument;
1377*8b6cd535SAndroid Build Coastguard Worker 
1378*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b111111111;
1379*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
1380*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1381*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
1382*8b6cd535SAndroid Build Coastguard Worker 
1383*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(), SizeIs(12));
1384*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X"));
1385*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1386*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1387*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
1388*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
1389*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("polo"));
1390*8b6cd535SAndroid Build Coastguard Worker 
1391*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[1].X"));
1392*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(1).property_name());
1393*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
1394*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
1395*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(1)), ElementsAre("polo"));
1396*8b6cd535SAndroid Build Coastguard Worker 
1397*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(
1398*8b6cd535SAndroid Build Coastguard Worker       GetPropertyPaths(snippet),
1399*8b6cd535SAndroid Build Coastguard Worker       ElementsAre("A[0].X", "A[1].X", "A[0].Z", "A[1].Z", "B[0].X", "B[1].X",
1400*8b6cd535SAndroid Build Coastguard Worker                   "B[0].Z", "B[1].Z", "C[0].X", "C[1].X", "C[0].Z", "C[1].Z"));
1401*8b6cd535SAndroid Build Coastguard Worker }
1402*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,CJKSnippetMatchTest)1403*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, CJKSnippetMatchTest) {
1404*8b6cd535SAndroid Build Coastguard Worker   // String:     "我每天走路去上班。"
1405*8b6cd535SAndroid Build Coastguard Worker   //              ^ ^  ^   ^^
1406*8b6cd535SAndroid Build Coastguard Worker   // UTF8 idx:    0 3  9  15 18
1407*8b6cd535SAndroid Build Coastguard Worker   // UTF16 idx:   0 1  3   5 6
1408*8b6cd535SAndroid Build Coastguard Worker   // Breaks into segments: "我", "每天", "走路", "去", "上班"
1409*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kChinese = "我每天走路去上班。";
1410*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
1411*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
1412*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
1413*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
1414*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", kChinese)
1415*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
1416*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
1417*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
1418*8b6cd535SAndroid Build Coastguard Worker           .Build();
1419*8b6cd535SAndroid Build Coastguard Worker 
1420*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
1421*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
1422*8b6cd535SAndroid Build Coastguard Worker 
1423*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1424*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1425*8b6cd535SAndroid Build Coastguard Worker 
1426*8b6cd535SAndroid Build Coastguard Worker   // Ensure that one and only one property was matched and it was "body"
1427*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1428*8b6cd535SAndroid Build Coastguard Worker   const SnippetProto::EntryProto* entry = &snippet.entries(0);
1429*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(entry->property_name(), Eq("subject"));
1430*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1431*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1432*8b6cd535SAndroid Build Coastguard Worker 
1433*8b6cd535SAndroid Build Coastguard Worker   // Ensure that there is one and only one match within "subject"
1434*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
1435*8b6cd535SAndroid Build Coastguard Worker   const SnippetMatchProto& match_proto = entry->snippet_matches(0);
1436*8b6cd535SAndroid Build Coastguard Worker 
1437*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the match is correct.
1438*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, *entry), ElementsAre("走路"));
1439*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("走"));
1440*8b6cd535SAndroid Build Coastguard Worker 
1441*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the utf-16 values are also as expected
1442*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
1443*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
1444*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(1));
1445*8b6cd535SAndroid Build Coastguard Worker }
1446*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,CJKSnippetWindowTest)1447*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, CJKSnippetWindowTest) {
1448*8b6cd535SAndroid Build Coastguard Worker   language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE,
1449*8b6cd535SAndroid Build Coastguard Worker                                                        jni_cache_.get());
1450*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1451*8b6cd535SAndroid Build Coastguard Worker       language_segmenter_,
1452*8b6cd535SAndroid Build Coastguard Worker       language_segmenter_factory::Create(std::move(options)));
1453*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1454*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1455*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1456*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1457*8b6cd535SAndroid Build Coastguard Worker 
1458*8b6cd535SAndroid Build Coastguard Worker   // String:     "我每天走路去上班。"
1459*8b6cd535SAndroid Build Coastguard Worker   //              ^ ^  ^   ^^
1460*8b6cd535SAndroid Build Coastguard Worker   // UTF8 idx:    0 3  9  15 18
1461*8b6cd535SAndroid Build Coastguard Worker   // UTF16 idx:   0 1  3   5 6
1462*8b6cd535SAndroid Build Coastguard Worker   // UTF32 idx:   0 1  3   5 6
1463*8b6cd535SAndroid Build Coastguard Worker   // Breaks into segments: "我", "每天", "走路", "去", "上班"
1464*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kChinese = "我每天走路去上班。";
1465*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
1466*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
1467*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
1468*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
1469*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", kChinese)
1470*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
1471*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
1472*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
1473*8b6cd535SAndroid Build Coastguard Worker           .Build();
1474*8b6cd535SAndroid Build Coastguard Worker 
1475*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
1476*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
1477*8b6cd535SAndroid Build Coastguard Worker 
1478*8b6cd535SAndroid Build Coastguard Worker   // The window will be:
1479*8b6cd535SAndroid Build Coastguard Worker   //   1. untrimmed, no-shifting window will be (0,7).
1480*8b6cd535SAndroid Build Coastguard Worker   //   2. trimmed, no-shifting window [1, 6) "每天走路去".
1481*8b6cd535SAndroid Build Coastguard Worker   //   3. trimmed, shifted window [0, 6) "我每天走路去"
1482*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(6);
1483*8b6cd535SAndroid Build Coastguard Worker 
1484*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1485*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1486*8b6cd535SAndroid Build Coastguard Worker 
1487*8b6cd535SAndroid Build Coastguard Worker   // Ensure that one and only one property was matched and it was "body"
1488*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1489*8b6cd535SAndroid Build Coastguard Worker   const SnippetProto::EntryProto* entry = &snippet.entries(0);
1490*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(entry->property_name(), Eq("subject"));
1491*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1492*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1493*8b6cd535SAndroid Build Coastguard Worker 
1494*8b6cd535SAndroid Build Coastguard Worker   // Ensure that there is one and only one match within "subject"
1495*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
1496*8b6cd535SAndroid Build Coastguard Worker   const SnippetMatchProto& match_proto = entry->snippet_matches(0);
1497*8b6cd535SAndroid Build Coastguard Worker 
1498*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the match is correct.
1499*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, *entry), ElementsAre("我每天走路去"));
1500*8b6cd535SAndroid Build Coastguard Worker 
1501*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the utf-16 values are also as expected
1502*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_utf16_position(), Eq(0));
1503*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_utf16_length(), Eq(6));
1504*8b6cd535SAndroid Build Coastguard Worker }
1505*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,Utf16MultiCodeUnitSnippetMatchTest)1506*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitSnippetMatchTest) {
1507*8b6cd535SAndroid Build Coastguard Worker   // The following string has four-byte UTF-8 characters. Most importantly, it
1508*8b6cd535SAndroid Build Coastguard Worker   // is also two code units in UTF-16.
1509*8b6cd535SAndroid Build Coastguard Worker   // String:     "���� ���� ��"
1510*8b6cd535SAndroid Build Coastguard Worker   //              ^  ^  ^
1511*8b6cd535SAndroid Build Coastguard Worker   // UTF8 idx:    0  9  18
1512*8b6cd535SAndroid Build Coastguard Worker   // UTF16 idx:   0  5  10
1513*8b6cd535SAndroid Build Coastguard Worker   // Breaks into segments: "����", "����", "��"
1514*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kText = "���� ���� ��";
1515*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
1516*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
1517*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
1518*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
1519*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", kText)
1520*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
1521*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
1522*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
1523*8b6cd535SAndroid Build Coastguard Worker           .Build();
1524*8b6cd535SAndroid Build Coastguard Worker 
1525*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
1526*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"��"}}};
1527*8b6cd535SAndroid Build Coastguard Worker 
1528*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1529*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1530*8b6cd535SAndroid Build Coastguard Worker 
1531*8b6cd535SAndroid Build Coastguard Worker   // Ensure that one and only one property was matched and it was "body"
1532*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1533*8b6cd535SAndroid Build Coastguard Worker   const SnippetProto::EntryProto* entry = &snippet.entries(0);
1534*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(entry->property_name(), Eq("subject"));
1535*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1536*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1537*8b6cd535SAndroid Build Coastguard Worker 
1538*8b6cd535SAndroid Build Coastguard Worker   // Ensure that there is one and only one match within "subject"
1539*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
1540*8b6cd535SAndroid Build Coastguard Worker   const SnippetMatchProto& match_proto = entry->snippet_matches(0);
1541*8b6cd535SAndroid Build Coastguard Worker 
1542*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the match is correct.
1543*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, *entry), ElementsAre("����"));
1544*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, *entry), ElementsAre("��"));
1545*8b6cd535SAndroid Build Coastguard Worker 
1546*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the utf-16 values are also as expected
1547*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(5));
1548*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(4));
1549*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
1550*8b6cd535SAndroid Build Coastguard Worker }
1551*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,Utf16MultiCodeUnitWindowTest)1552*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
1553*8b6cd535SAndroid Build Coastguard Worker   // The following string has four-byte UTF-8 characters. Most importantly, it
1554*8b6cd535SAndroid Build Coastguard Worker   // is also two code units in UTF-16.
1555*8b6cd535SAndroid Build Coastguard Worker   // String:     "���� ���� ��"
1556*8b6cd535SAndroid Build Coastguard Worker   //              ^  ^  ^
1557*8b6cd535SAndroid Build Coastguard Worker   // UTF8 idx:    0  9  18
1558*8b6cd535SAndroid Build Coastguard Worker   // UTF16 idx:   0  5  10
1559*8b6cd535SAndroid Build Coastguard Worker   // UTF32 idx:   0  3  6
1560*8b6cd535SAndroid Build Coastguard Worker   // Breaks into segments: "����", "����", "��"
1561*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kText = "���� ���� ��";
1562*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
1563*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
1564*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "email/1")
1565*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("email")
1566*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("subject", kText)
1567*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("body",
1568*8b6cd535SAndroid Build Coastguard Worker                              "Concerning the subject of foo, we need to begin "
1569*8b6cd535SAndroid Build Coastguard Worker                              "considering our options regarding body bar.")
1570*8b6cd535SAndroid Build Coastguard Worker           .Build();
1571*8b6cd535SAndroid Build Coastguard Worker 
1572*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000011;
1573*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"��"}}};
1574*8b6cd535SAndroid Build Coastguard Worker 
1575*8b6cd535SAndroid Build Coastguard Worker   // Set a six character window. This will produce a window like this:
1576*8b6cd535SAndroid Build Coastguard Worker   // String:     "���� ���� ��"
1577*8b6cd535SAndroid Build Coastguard Worker   //                 ^   ^
1578*8b6cd535SAndroid Build Coastguard Worker   // UTF8 idx:       9   22
1579*8b6cd535SAndroid Build Coastguard Worker   // UTF16 idx:      5   12
1580*8b6cd535SAndroid Build Coastguard Worker   // UTF32 idx:      3   7
1581*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(6);
1582*8b6cd535SAndroid Build Coastguard Worker 
1583*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1584*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1585*8b6cd535SAndroid Build Coastguard Worker 
1586*8b6cd535SAndroid Build Coastguard Worker   // Ensure that one and only one property was matched and it was "body"
1587*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1588*8b6cd535SAndroid Build Coastguard Worker   const SnippetProto::EntryProto* entry = &snippet.entries(0);
1589*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(entry->property_name(), Eq("subject"));
1590*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1591*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1592*8b6cd535SAndroid Build Coastguard Worker 
1593*8b6cd535SAndroid Build Coastguard Worker   // Ensure that there is one and only one match within "subject"
1594*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
1595*8b6cd535SAndroid Build Coastguard Worker   const SnippetMatchProto& match_proto = entry->snippet_matches(0);
1596*8b6cd535SAndroid Build Coastguard Worker 
1597*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the match is correct.
1598*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, *entry), ElementsAre("���� ��"));
1599*8b6cd535SAndroid Build Coastguard Worker 
1600*8b6cd535SAndroid Build Coastguard Worker   // Ensure that the utf-16 values are also as expected
1601*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_utf16_position(), Eq(5));
1602*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_utf16_length(), Eq(7));
1603*8b6cd535SAndroid Build Coastguard Worker }
1604*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippettingVerbatimAscii)1605*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippettingVerbatimAscii) {
1606*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1607*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1608*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1609*8b6cd535SAndroid Build Coastguard Worker                        .SetType("verbatimType")
1610*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1611*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("verbatim")
1612*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_EXACT,
1613*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_VERBATIM)
1614*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1615*8b6cd535SAndroid Build Coastguard Worker           .Build();
1616*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1617*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1618*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1619*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1620*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1621*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1622*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1623*8b6cd535SAndroid Build Coastguard Worker 
1624*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document = DocumentBuilder()
1625*8b6cd535SAndroid Build Coastguard Worker                                .SetKey("icing", "verbatim/1")
1626*8b6cd535SAndroid Build Coastguard Worker                                .SetSchema("verbatimType")
1627*8b6cd535SAndroid Build Coastguard Worker                                .AddStringProperty("verbatim", "Hello, world!")
1628*8b6cd535SAndroid Build Coastguard Worker                                .Build();
1629*8b6cd535SAndroid Build Coastguard Worker 
1630*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000001;
1631*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"Hello, world!"}}};
1632*8b6cd535SAndroid Build Coastguard Worker 
1633*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(13);
1634*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1635*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_EXACT, snippet_spec_, document, section_mask);
1636*8b6cd535SAndroid Build Coastguard Worker 
1637*8b6cd535SAndroid Build Coastguard Worker   // There should only be one snippet entry and match, the verbatim token in its
1638*8b6cd535SAndroid Build Coastguard Worker   // entirety.
1639*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1640*8b6cd535SAndroid Build Coastguard Worker 
1641*8b6cd535SAndroid Build Coastguard Worker   const SnippetProto::EntryProto* entry = &snippet.entries(0);
1642*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
1643*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->property_name(), "verbatim");
1644*8b6cd535SAndroid Build Coastguard Worker 
1645*8b6cd535SAndroid Build Coastguard Worker   const SnippetMatchProto& match_proto = entry->snippet_matches(0);
1646*8b6cd535SAndroid Build Coastguard Worker   // We expect the match to begin at position 0, and to span the entire token
1647*8b6cd535SAndroid Build Coastguard Worker   // which contains 13 characters.
1648*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
1649*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_utf16_length(), Eq(13));
1650*8b6cd535SAndroid Build Coastguard Worker 
1651*8b6cd535SAndroid Build Coastguard Worker   // We expect the submatch to begin at position 0 of the verbatim token and
1652*8b6cd535SAndroid Build Coastguard Worker   // span the length of our query term "Hello, world!", which has utf-16 length
1653*8b6cd535SAndroid Build Coastguard Worker   // of 13. The submatch length is equal to the window length as the query the
1654*8b6cd535SAndroid Build Coastguard Worker   // snippet is retrieved with an exact term match.
1655*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
1656*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(13));
1657*8b6cd535SAndroid Build Coastguard Worker }
1658*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippettingVerbatimCJK)1659*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippettingVerbatimCJK) {
1660*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1661*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1662*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1663*8b6cd535SAndroid Build Coastguard Worker                        .SetType("verbatimType")
1664*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1665*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("verbatim")
1666*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1667*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_VERBATIM)
1668*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1669*8b6cd535SAndroid Build Coastguard Worker           .Build();
1670*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1671*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1672*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1673*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1674*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1675*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1676*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1677*8b6cd535SAndroid Build Coastguard Worker 
1678*8b6cd535SAndroid Build Coastguard Worker   // String:     "我每天走路去上班。"
1679*8b6cd535SAndroid Build Coastguard Worker   //              ^ ^  ^   ^^
1680*8b6cd535SAndroid Build Coastguard Worker   // UTF8 idx:    0 3  9  15 18
1681*8b6cd535SAndroid Build Coastguard Worker   // UTF16 idx:   0 1  3   5 6
1682*8b6cd535SAndroid Build Coastguard Worker   // UTF32 idx:   0 1  3   5 6
1683*8b6cd535SAndroid Build Coastguard Worker   // Breaks into segments: "我", "每天", "走路", "去", "上班"
1684*8b6cd535SAndroid Build Coastguard Worker   std::string chinese_string = "我每天走路去上班。";
1685*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document = DocumentBuilder()
1686*8b6cd535SAndroid Build Coastguard Worker                                .SetKey("icing", "verbatim/1")
1687*8b6cd535SAndroid Build Coastguard Worker                                .SetSchema("verbatimType")
1688*8b6cd535SAndroid Build Coastguard Worker                                .AddStringProperty("verbatim", chinese_string)
1689*8b6cd535SAndroid Build Coastguard Worker                                .Build();
1690*8b6cd535SAndroid Build Coastguard Worker 
1691*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000001;
1692*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"我每"}}};
1693*8b6cd535SAndroid Build Coastguard Worker 
1694*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(9);
1695*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1696*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1697*8b6cd535SAndroid Build Coastguard Worker 
1698*8b6cd535SAndroid Build Coastguard Worker   // There should only be one snippet entry and match, the verbatim token in its
1699*8b6cd535SAndroid Build Coastguard Worker   // entirety.
1700*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1701*8b6cd535SAndroid Build Coastguard Worker 
1702*8b6cd535SAndroid Build Coastguard Worker   const SnippetProto::EntryProto* entry = &snippet.entries(0);
1703*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
1704*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(entry->property_name(), "verbatim");
1705*8b6cd535SAndroid Build Coastguard Worker 
1706*8b6cd535SAndroid Build Coastguard Worker   const SnippetMatchProto& match_proto = entry->snippet_matches(0);
1707*8b6cd535SAndroid Build Coastguard Worker   // We expect the match to begin at position 0, and to span the entire token
1708*8b6cd535SAndroid Build Coastguard Worker   // which has utf-16 length of 9.
1709*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
1710*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.window_utf16_length(), Eq(9));
1711*8b6cd535SAndroid Build Coastguard Worker 
1712*8b6cd535SAndroid Build Coastguard Worker   // We expect the submatch to begin at position 0 of the verbatim token and
1713*8b6cd535SAndroid Build Coastguard Worker   // span the length of our query term "我每", which has utf-16 length of 2.
1714*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
1715*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
1716*8b6cd535SAndroid Build Coastguard Worker }
1717*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippettingRfc822Ascii)1718*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippettingRfc822Ascii) {
1719*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1720*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1721*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1722*8b6cd535SAndroid Build Coastguard Worker                        .SetType("rfc822Type")
1723*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1724*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("rfc822")
1725*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1726*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_RFC822)
1727*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1728*8b6cd535SAndroid Build Coastguard Worker           .Build();
1729*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1730*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1731*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1732*8b6cd535SAndroid Build Coastguard Worker 
1733*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1734*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1735*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1736*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1737*8b6cd535SAndroid Build Coastguard Worker 
1738*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
1739*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
1740*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "rfc822/1")
1741*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("rfc822Type")
1742*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("rfc822",
1743*8b6cd535SAndroid Build Coastguard Worker                              "Alexander Sav <[email protected]>, Very Long "
1744*8b6cd535SAndroid Build Coastguard Worker                              "Name Example <[email protected]>")
1745*8b6cd535SAndroid Build Coastguard Worker           .Build();
1746*8b6cd535SAndroid Build Coastguard Worker 
1747*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000001;
1748*8b6cd535SAndroid Build Coastguard Worker 
1749*8b6cd535SAndroid Build Coastguard Worker   // This should match both the first name token as well as the entire RFC822.
1750*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"alexand"}}};
1751*8b6cd535SAndroid Build Coastguard Worker 
1752*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(35);
1753*8b6cd535SAndroid Build Coastguard Worker 
1754*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1755*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1756*8b6cd535SAndroid Build Coastguard Worker 
1757*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1758*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "rfc822");
1759*8b6cd535SAndroid Build Coastguard Worker 
1760*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1761*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1762*8b6cd535SAndroid Build Coastguard Worker 
1763*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1764*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("Alexander Sav <[email protected]>,",
1765*8b6cd535SAndroid Build Coastguard Worker                           "Alexander Sav <[email protected]>,"));
1766*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
1767*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("Alexander Sav <[email protected]>", "Alexander"));
1768*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1769*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("Alexand", "Alexand"));
1770*8b6cd535SAndroid Build Coastguard Worker 
1771*8b6cd535SAndroid Build Coastguard Worker   // "tom" should match the local component, local address, and address tokens.
1772*8b6cd535SAndroid Build Coastguard Worker   query_terms = SectionRestrictQueryTermsMap{{"", {"tom"}}};
1773*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(36);
1774*8b6cd535SAndroid Build Coastguard Worker 
1775*8b6cd535SAndroid Build Coastguard Worker   snippet = snippet_retriever_->RetrieveSnippet(
1776*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1777*8b6cd535SAndroid Build Coastguard Worker 
1778*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1779*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "rfc822");
1780*8b6cd535SAndroid Build Coastguard Worker 
1781*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(0).property_name());
1782*8b6cd535SAndroid Build Coastguard Worker 
1783*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/248362902) Stop returning duplicate matches.
1784*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1785*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("Alexander Sav <[email protected]>,",
1786*8b6cd535SAndroid Build Coastguard Worker                           "Alexander Sav <[email protected]>,",
1787*8b6cd535SAndroid Build Coastguard Worker                           "Alexander Sav <[email protected]>,"));
1788*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
1789*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("tom.bar", "[email protected]", "tom"));
1790*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1791*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("tom", "tom", "tom"));
1792*8b6cd535SAndroid Build Coastguard Worker }
1793*8b6cd535SAndroid Build Coastguard Worker 
TEST_F(SnippetRetrieverTest,SnippettingRfc822CJK)1794*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) {
1795*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1796*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1797*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder()
1798*8b6cd535SAndroid Build Coastguard Worker                        .SetType("rfc822Type")
1799*8b6cd535SAndroid Build Coastguard Worker                        .AddProperty(PropertyConfigBuilder()
1800*8b6cd535SAndroid Build Coastguard Worker                                         .SetName("rfc822")
1801*8b6cd535SAndroid Build Coastguard Worker                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1802*8b6cd535SAndroid Build Coastguard Worker                                                            TOKENIZER_RFC822)
1803*8b6cd535SAndroid Build Coastguard Worker                                         .SetCardinality(CARDINALITY_REPEATED)))
1804*8b6cd535SAndroid Build Coastguard Worker           .Build();
1805*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1806*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true,
1807*8b6cd535SAndroid Build Coastguard Worker       /*allow_circular_schema_definitions=*/false));
1808*8b6cd535SAndroid Build Coastguard Worker 
1809*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1810*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1811*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1812*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1813*8b6cd535SAndroid Build Coastguard Worker 
1814*8b6cd535SAndroid Build Coastguard Worker   std::string chinese_string = "我, 每天@走路, 去@上班";
1815*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document = DocumentBuilder()
1816*8b6cd535SAndroid Build Coastguard Worker                                .SetKey("icing", "rfc822/1")
1817*8b6cd535SAndroid Build Coastguard Worker                                .SetSchema("rfc822Type")
1818*8b6cd535SAndroid Build Coastguard Worker                                .AddStringProperty("rfc822", chinese_string)
1819*8b6cd535SAndroid Build Coastguard Worker                                .Build();
1820*8b6cd535SAndroid Build Coastguard Worker 
1821*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000001;
1822*8b6cd535SAndroid Build Coastguard Worker 
1823*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
1824*8b6cd535SAndroid Build Coastguard Worker 
1825*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(8);
1826*8b6cd535SAndroid Build Coastguard Worker 
1827*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1828*8b6cd535SAndroid Build Coastguard Worker       query_terms, TERM_MATCH_PREFIX, snippet_spec_, document, section_mask);
1829*8b6cd535SAndroid Build Coastguard Worker 
1830*8b6cd535SAndroid Build Coastguard Worker   // There should only be one snippet entry and match, the local component token
1831*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1832*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "rfc822");
1833*8b6cd535SAndroid Build Coastguard Worker 
1834*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1835*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1836*8b6cd535SAndroid Build Coastguard Worker 
1837*8b6cd535SAndroid Build Coastguard Worker   // The local component, address, local address, and token will all match. The
1838*8b6cd535SAndroid Build Coastguard Worker   // windows for address and token are "" as the snippet window is too small.
1839*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/248362902) Stop returning duplicate matches.
1840*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1841*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("每天@走路,", "每天@走路,"));
1842*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
1843*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("走路", "走路"));
1844*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1845*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("走", "走"));
1846*8b6cd535SAndroid Build Coastguard Worker }
1847*8b6cd535SAndroid Build Coastguard Worker 
1848*8b6cd535SAndroid Build Coastguard Worker #ifdef ENABLE_URL_TOKENIZER
TEST_F(SnippetRetrieverTest,SnippettingUrlAscii)1849*8b6cd535SAndroid Build Coastguard Worker TEST_F(SnippetRetrieverTest, SnippettingUrlAscii) {
1850*8b6cd535SAndroid Build Coastguard Worker   SchemaProto schema =
1851*8b6cd535SAndroid Build Coastguard Worker       SchemaBuilder()
1852*8b6cd535SAndroid Build Coastguard Worker           .AddType(SchemaTypeConfigBuilder().SetType("urlType").AddProperty(
1853*8b6cd535SAndroid Build Coastguard Worker               PropertyConfigBuilder()
1854*8b6cd535SAndroid Build Coastguard Worker                   .SetName("url")
1855*8b6cd535SAndroid Build Coastguard Worker                   .SetDataTypeString(MATCH_PREFIX, TOKENIZER_URL)
1856*8b6cd535SAndroid Build Coastguard Worker                   .SetCardinality(CARDINALITY_REPEATED)))
1857*8b6cd535SAndroid Build Coastguard Worker           .Build();
1858*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK(schema_store_->SetSchema(
1859*8b6cd535SAndroid Build Coastguard Worker       schema, /*ignore_errors_and_delete_documents=*/true));
1860*8b6cd535SAndroid Build Coastguard Worker 
1861*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSERT_OK_AND_ASSIGN(
1862*8b6cd535SAndroid Build Coastguard Worker       snippet_retriever_,
1863*8b6cd535SAndroid Build Coastguard Worker       SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
1864*8b6cd535SAndroid Build Coastguard Worker                                normalizer_.get()));
1865*8b6cd535SAndroid Build Coastguard Worker 
1866*8b6cd535SAndroid Build Coastguard Worker   DocumentProto document =
1867*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
1868*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "url/1")
1869*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("urlType")
1870*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("url", "https://mail.google.com/calendar/google/")
1871*8b6cd535SAndroid Build Coastguard Worker           .Build();
1872*8b6cd535SAndroid Build Coastguard Worker 
1873*8b6cd535SAndroid Build Coastguard Worker   SectionIdMask section_mask = 0b00000001;
1874*8b6cd535SAndroid Build Coastguard Worker 
1875*8b6cd535SAndroid Build Coastguard Worker   // Query with single url split-token match
1876*8b6cd535SAndroid Build Coastguard Worker   SectionRestrictQueryTermsMap query_terms{{"", {"com"}}};
1877*8b6cd535SAndroid Build Coastguard Worker   // 40 is the length of the url.
1878*8b6cd535SAndroid Build Coastguard Worker   // Window that is the size of the url should return entire url.
1879*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(40);
1880*8b6cd535SAndroid Build Coastguard Worker 
1881*8b6cd535SAndroid Build Coastguard Worker   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
1882*8b6cd535SAndroid Build Coastguard Worker       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
1883*8b6cd535SAndroid Build Coastguard Worker 
1884*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1885*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "url");
1886*8b6cd535SAndroid Build Coastguard Worker 
1887*8b6cd535SAndroid Build Coastguard Worker   std::string_view content =
1888*8b6cd535SAndroid Build Coastguard Worker       GetString(&document, snippet.entries(0).property_name());
1889*8b6cd535SAndroid Build Coastguard Worker 
1890*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1891*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("https://mail.google.com/calendar/google/"));
1892*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("com"));
1893*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)), ElementsAre("com"));
1894*8b6cd535SAndroid Build Coastguard Worker 
1895*8b6cd535SAndroid Build Coastguard Worker   // Query with single url suffix-token match
1896*8b6cd535SAndroid Build Coastguard Worker   query_terms = SectionRestrictQueryTermsMap{{"", {"mail.goo"}}};
1897*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(40);
1898*8b6cd535SAndroid Build Coastguard Worker 
1899*8b6cd535SAndroid Build Coastguard Worker   snippet = snippet_retriever_->RetrieveSnippet(
1900*8b6cd535SAndroid Build Coastguard Worker       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
1901*8b6cd535SAndroid Build Coastguard Worker 
1902*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1903*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "url");
1904*8b6cd535SAndroid Build Coastguard Worker 
1905*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(0).property_name());
1906*8b6cd535SAndroid Build Coastguard Worker 
1907*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1908*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("https://mail.google.com/calendar/google/"));
1909*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
1910*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("mail.google.com/calendar/google/"));
1911*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1912*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("mail.goo"));
1913*8b6cd535SAndroid Build Coastguard Worker 
1914*8b6cd535SAndroid Build Coastguard Worker   // Query with multiple url split-token matches
1915*8b6cd535SAndroid Build Coastguard Worker   query_terms = SectionRestrictQueryTermsMap{{"", {"goog"}}};
1916*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(40);
1917*8b6cd535SAndroid Build Coastguard Worker 
1918*8b6cd535SAndroid Build Coastguard Worker   snippet = snippet_retriever_->RetrieveSnippet(
1919*8b6cd535SAndroid Build Coastguard Worker       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
1920*8b6cd535SAndroid Build Coastguard Worker 
1921*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1922*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "url");
1923*8b6cd535SAndroid Build Coastguard Worker 
1924*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(0).property_name());
1925*8b6cd535SAndroid Build Coastguard Worker 
1926*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1927*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("https://mail.google.com/calendar/google/",
1928*8b6cd535SAndroid Build Coastguard Worker                           "https://mail.google.com/calendar/google/"));
1929*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
1930*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("google", "google"));
1931*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1932*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("goog", "goog"));
1933*8b6cd535SAndroid Build Coastguard Worker 
1934*8b6cd535SAndroid Build Coastguard Worker   // Query with both url split-token and suffix-token matches
1935*8b6cd535SAndroid Build Coastguard Worker   query_terms = SectionRestrictQueryTermsMap{{"", {"mail"}}};
1936*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(40);
1937*8b6cd535SAndroid Build Coastguard Worker 
1938*8b6cd535SAndroid Build Coastguard Worker   snippet = snippet_retriever_->RetrieveSnippet(
1939*8b6cd535SAndroid Build Coastguard Worker       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
1940*8b6cd535SAndroid Build Coastguard Worker 
1941*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1942*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "url");
1943*8b6cd535SAndroid Build Coastguard Worker 
1944*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(0).property_name());
1945*8b6cd535SAndroid Build Coastguard Worker 
1946*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1947*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("https://mail.google.com/calendar/google/",
1948*8b6cd535SAndroid Build Coastguard Worker                           "https://mail.google.com/calendar/google/"));
1949*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
1950*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("mail", "mail.google.com/calendar/google/"));
1951*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1952*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("mail", "mail"));
1953*8b6cd535SAndroid Build Coastguard Worker 
1954*8b6cd535SAndroid Build Coastguard Worker   // Prefix query with both url split-token and suffix-token matches
1955*8b6cd535SAndroid Build Coastguard Worker   query_terms = SectionRestrictQueryTermsMap{{"", {"http"}}};
1956*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(40);
1957*8b6cd535SAndroid Build Coastguard Worker 
1958*8b6cd535SAndroid Build Coastguard Worker   snippet = snippet_retriever_->RetrieveSnippet(
1959*8b6cd535SAndroid Build Coastguard Worker       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
1960*8b6cd535SAndroid Build Coastguard Worker 
1961*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1962*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "url");
1963*8b6cd535SAndroid Build Coastguard Worker 
1964*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(0).property_name());
1965*8b6cd535SAndroid Build Coastguard Worker 
1966*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
1967*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("https://mail.google.com/calendar/google/",
1968*8b6cd535SAndroid Build Coastguard Worker                           "https://mail.google.com/calendar/google/"));
1969*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
1970*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("https", "https://mail.google.com/calendar/google/"));
1971*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
1972*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("http", "http"));
1973*8b6cd535SAndroid Build Coastguard Worker 
1974*8b6cd535SAndroid Build Coastguard Worker   // Window that's smaller than the input size should not return any matches.
1975*8b6cd535SAndroid Build Coastguard Worker   query_terms = SectionRestrictQueryTermsMap{{"", {"google"}}};
1976*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(10);
1977*8b6cd535SAndroid Build Coastguard Worker 
1978*8b6cd535SAndroid Build Coastguard Worker   snippet = snippet_retriever_->RetrieveSnippet(
1979*8b6cd535SAndroid Build Coastguard Worker       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
1980*8b6cd535SAndroid Build Coastguard Worker 
1981*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(0));
1982*8b6cd535SAndroid Build Coastguard Worker 
1983*8b6cd535SAndroid Build Coastguard Worker   // Test case with more than two matches
1984*8b6cd535SAndroid Build Coastguard Worker   document =
1985*8b6cd535SAndroid Build Coastguard Worker       DocumentBuilder()
1986*8b6cd535SAndroid Build Coastguard Worker           .SetKey("icing", "url/1")
1987*8b6cd535SAndroid Build Coastguard Worker           .SetSchema("urlType")
1988*8b6cd535SAndroid Build Coastguard Worker           .AddStringProperty("url", "https://www.google.com/calendar/google/")
1989*8b6cd535SAndroid Build Coastguard Worker           .Build();
1990*8b6cd535SAndroid Build Coastguard Worker 
1991*8b6cd535SAndroid Build Coastguard Worker   // Prefix query with both url split-token and suffix-token matches
1992*8b6cd535SAndroid Build Coastguard Worker   query_terms = SectionRestrictQueryTermsMap{{"", {"google"}}};
1993*8b6cd535SAndroid Build Coastguard Worker   snippet_spec_.set_max_window_utf32_length(39);
1994*8b6cd535SAndroid Build Coastguard Worker 
1995*8b6cd535SAndroid Build Coastguard Worker   snippet = snippet_retriever_->RetrieveSnippet(
1996*8b6cd535SAndroid Build Coastguard Worker       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
1997*8b6cd535SAndroid Build Coastguard Worker 
1998*8b6cd535SAndroid Build Coastguard Worker   ASSERT_THAT(snippet.entries(), SizeIs(1));
1999*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(snippet.entries(0).property_name(), "url");
2000*8b6cd535SAndroid Build Coastguard Worker 
2001*8b6cd535SAndroid Build Coastguard Worker   content = GetString(&document, snippet.entries(0).property_name());
2002*8b6cd535SAndroid Build Coastguard Worker 
2003*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetWindows(content, snippet.entries(0)),
2004*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("https://www.google.com/calendar/google/",
2005*8b6cd535SAndroid Build Coastguard Worker                           "https://www.google.com/calendar/google/",
2006*8b6cd535SAndroid Build Coastguard Worker                           "https://www.google.com/calendar/google/"));
2007*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetMatches(content, snippet.entries(0)),
2008*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("google", "google", "google.com/calendar/google/"));
2009*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(GetSubMatches(content, snippet.entries(0)),
2010*8b6cd535SAndroid Build Coastguard Worker               ElementsAre("google", "google", "google"));
2011*8b6cd535SAndroid Build Coastguard Worker }
2012*8b6cd535SAndroid Build Coastguard Worker #endif  // ENABLE_URL_TOKENIZER
2013*8b6cd535SAndroid Build Coastguard Worker 
2014*8b6cd535SAndroid Build Coastguard Worker }  // namespace
2015*8b6cd535SAndroid Build Coastguard Worker 
2016*8b6cd535SAndroid Build Coastguard Worker }  // namespace lib
2017*8b6cd535SAndroid Build Coastguard Worker }  // namespace icing
2018