xref: /aosp_15_r20/external/icing/icing/expand/expander-manager_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and∂∂
13 // limitations under the License.
14 
15 #include "icing/expand/expander-manager.h"
16 
17 #include <array>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <thread>  // NOLINT
22 #include <vector>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "gmock/gmock.h"
26 #include "gtest/gtest.h"
27 #include "icing/expand/expander.h"
28 #include "icing/portable/platform.h"
29 #include "icing/testing/common-matchers.h"
30 #include "unicode/uloc.h"
31 
32 namespace icing {
33 namespace lib {
34 
35 namespace {
36 
37 using ::testing::ElementsAre;
38 
39 constexpr std::string_view kRussianLocale = "ru-RU";
40 constexpr std::string_view kTamilLocale = "ta-IN";
41 constexpr std::string_view kUnsupportedLocale = "unsupported_locale";
42 
TEST(ExpanderManagerTest,CreateWithInvalidMaxTermsShouldFail)43 TEST(ExpanderManagerTest, CreateWithInvalidMaxTermsShouldFail) {
44   EXPECT_THAT(ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/-1),
45               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
46   EXPECT_THAT(ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/1),
47               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
48 }
49 
TEST(ExpanderManagerTest,CreateWithAnyLocaleShouldSucceed)50 TEST(ExpanderManagerTest, CreateWithAnyLocaleShouldSucceed) {
51   ICING_ASSERT_OK_AND_ASSIGN(
52       std::unique_ptr<ExpanderManager> expander_manager,
53       ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/2));
54   EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
55 
56   ICING_ASSERT_OK_AND_ASSIGN(
57       expander_manager, ExpanderManager::Create(ULOC_FRENCH,
58                                                 /*max_terms_per_expander=*/2));
59   EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
60 
61   ICING_ASSERT_OK_AND_ASSIGN(
62       expander_manager, ExpanderManager::Create(std::string(kUnsupportedLocale),
63                                                 /*max_terms_per_expander=*/2));
64   if (IsStemmingEnabled()) {
65     EXPECT_THAT(expander_manager->default_locale(),
66                 ExpanderManager::kDefaultEnglishLocale);
67   } else {
68     EXPECT_THAT(expander_manager->default_locale(),
69                 std::string(kUnsupportedLocale));
70   }
71 }
72 
TEST(ExpanderManagerTest,ProcessTerm_exactMatchReturnsOriginalTerm)73 TEST(ExpanderManagerTest, ProcessTerm_exactMatchReturnsOriginalTerm) {
74   ICING_ASSERT_OK_AND_ASSIGN(
75       std::unique_ptr<ExpanderManager> expander_manager,
76       ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/3));
77 
78   std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
79       "running", TermMatchType::EXACT_ONLY, ULOC_US);
80   EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
81   EXPECT_THAT(expanded_terms, ElementsAre(ExpandedTerm(
82                                   "running", /*is_stemmed_term_in=*/false)));
83 }
84 
TEST(ExpanderManagerTest,ProcessTerm_prefixMatchReturnsOriginalTerm)85 TEST(ExpanderManagerTest, ProcessTerm_prefixMatchReturnsOriginalTerm) {
86   ICING_ASSERT_OK_AND_ASSIGN(
87       std::unique_ptr<ExpanderManager> expander_manager,
88       ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/3));
89 
90   std::vector<ExpandedTerm> expanded_terms =
91       expander_manager->ProcessTerm("running", TermMatchType::PREFIX, ULOC_US);
92   EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
93   EXPECT_THAT(expanded_terms, ElementsAre(ExpandedTerm(
94                                   "running", /*is_stemmed_term_in=*/false)));
95 }
96 
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithDefaultLocale)97 TEST(ExpanderManagerTest, ProcessTerm_stemmingMatchWithDefaultLocale) {
98   if (!IsStemmingEnabled()) {
99     GTEST_SKIP() << "Skipping test because stemming is not enabled.";
100   }
101 
102   ICING_ASSERT_OK_AND_ASSIGN(
103       std::unique_ptr<ExpanderManager> expander_manager,
104       ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/3));
105 
106   std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
107       "running", TermMatchType::STEMMING, ULOC_US);
108   EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
109   EXPECT_THAT(expanded_terms,
110               ElementsAre(ExpandedTerm("running", /*is_stemmed_term_in=*/false),
111                           ExpandedTerm("run", /*is_stemmed_term_in=*/true)));
112 
113   expanded_terms =
114       expander_manager->ProcessTerm("tests", TermMatchType::STEMMING, ULOC_US);
115   EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
116   EXPECT_THAT(expanded_terms,
117               ElementsAre(ExpandedTerm("tests", /*is_stemmed_term_in=*/false),
118                           ExpandedTerm("test", /*is_stemmed_term_in=*/true)));
119 }
120 
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithNonDefaultLocale)121 TEST(ExpanderManagerTest, ProcessTerm_stemmingMatchWithNonDefaultLocale) {
122   if (!IsStemmingEnabled()) {
123     GTEST_SKIP() << "Skipping test because stemming is not enabled.";
124   }
125 
126   ICING_ASSERT_OK_AND_ASSIGN(
127       std::unique_ptr<ExpanderManager> expander_manager,
128       ExpanderManager::Create(ULOC_FRENCH, /*max_terms_per_expander=*/3));
129 
130   std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
131       "running", TermMatchType::STEMMING, ULOC_US);
132   EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
133   EXPECT_THAT(expanded_terms,
134               ElementsAre(ExpandedTerm("running", /*is_stemmed_term_in=*/false),
135                           ExpandedTerm("run", /*is_stemmed_term_in=*/true)));
136 
137   expanded_terms = expander_manager->ProcessTerm(
138       "torpedearon", TermMatchType::STEMMING, "es_ES");
139   EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
140   EXPECT_THAT(
141       expanded_terms,
142       ElementsAre(ExpandedTerm("torpedearon", /*is_stemmed_term_in=*/false),
143                   ExpandedTerm("torped", /*is_stemmed_term_in=*/true)));
144 }
145 
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithUnsupportedLocaleUsesDefault)146 TEST(ExpanderManagerTest,
147      ProcessTerm_stemmingMatchWithUnsupportedLocaleUsesDefault) {
148   if (!IsStemmingEnabled()) {
149     GTEST_SKIP() << "Skipping test because stemming is not enabled.";
150   }
151 
152   ICING_ASSERT_OK_AND_ASSIGN(
153       std::unique_ptr<ExpanderManager> expander_manager,
154       ExpanderManager::Create(ULOC_FRENCH, /*max_terms_per_expander=*/3));
155 
156   std::string unsupported_locale_str = std::string(kUnsupportedLocale);
157   std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
158       "running", TermMatchType::STEMMING, unsupported_locale_str);
159   EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
160   EXPECT_THAT(expanded_terms, ElementsAre(ExpandedTerm(
161                                   "running", /*is_stemmed_term_in=*/false)));
162 
163   expanded_terms = expander_manager->ProcessTerm(
164       "majestueuse", TermMatchType::STEMMING, unsupported_locale_str);
165   EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
166   EXPECT_THAT(
167       expanded_terms,
168       ElementsAre(ExpandedTerm("majestueuse", /*is_stemmed_term_in=*/false),
169                   ExpandedTerm("majestu", /*is_stemmed_term_in=*/true)));
170 }
171 
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithUnsupportedDefaultLocaleUsesEnglish)172 TEST(ExpanderManagerTest,
173      ProcessTerm_stemmingMatchWithUnsupportedDefaultLocaleUsesEnglish) {
174   if (!IsStemmingEnabled()) {
175     GTEST_SKIP() << "Skipping test because stemming is not enabled.";
176   }
177 
178   ICING_ASSERT_OK_AND_ASSIGN(
179       std::unique_ptr<ExpanderManager> expander_manager,
180       ExpanderManager::Create(std::string(kUnsupportedLocale),
181                               /*max_terms_per_expander=*/3));
182 
183   std::string unsupported_locale_str = std::string(kUnsupportedLocale);
184   std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
185       "running", TermMatchType::STEMMING, unsupported_locale_str);
186   EXPECT_THAT(expander_manager->default_locale(),
187               ExpanderManager::kDefaultEnglishLocale);
188   EXPECT_THAT(expanded_terms,
189               ElementsAre(ExpandedTerm("running", /*is_stemmed_term_in=*/false),
190                           ExpandedTerm("run", /*is_stemmed_term_in=*/true)));
191 
192   expanded_terms = expander_manager->ProcessTerm(
193       "majestueuse", TermMatchType::STEMMING, unsupported_locale_str);
194   EXPECT_THAT(expander_manager->default_locale(),
195               ExpanderManager::kDefaultEnglishLocale);
196   EXPECT_THAT(
197       expanded_terms,
198       ElementsAre(ExpandedTerm("majestueuse", /*is_stemmed_term_in=*/false),
199                   ExpandedTerm("majestueus", /*is_stemmed_term_in=*/true)));
200 }
201 
TEST(ExpanderManagerTest,ThreadSafety)202 TEST(ExpanderManagerTest, ThreadSafety) {
203   ICING_ASSERT_OK_AND_ASSIGN(
204       std::unique_ptr<ExpanderManager> expander_manager,
205       ExpanderManager::Create(ULOC_US,
206                               /*max_terms_per_expander=*/1000));
207 
208   constexpr int kNumTerms = 10;
209   constexpr int kNumLocales = 5;
210   constexpr std::array<std::string_view, kNumLocales> kLocales = {
211       ULOC_US, ULOC_FRENCH, kRussianLocale, kTamilLocale, kUnsupportedLocale};
212   constexpr std::array<std::string_view, kNumTerms> kTerms = {
213       "running", "majestueuse", "валяется", "இக்கதையின்", "testing",
214       "test",    "running",     "говорить", "அக்கரையில்", "manager"};
215 
216   std::array<std::string_view, kNumTerms> kStems;
217   if (IsStemmingEnabled()) {
218     kStems = {"run",  "majestu", "валя",  "கதை", "test",
219               "test", "running", "говор", "கரை", "manag"};
220   } else {
221     // Stemming is not enabled, so the stemmed terms are the same as the
222     // original terms.
223     kStems = kTerms;
224   }
225 
226   // Create kNumThreads threads. Call ProcessTerm() from each thread in
227   // parallel using different locales. There should be no crashes.
228   constexpr int kNumThreads = 50;
229   std::vector<std::vector<ExpandedTerm>> expanded_terms(kNumThreads);
230   auto callable = [&](int thread_id) {
231     std::string locale = std::string(kLocales[thread_id % kNumLocales]);
232     expanded_terms[thread_id] = expander_manager->ProcessTerm(
233         kTerms[thread_id % kNumTerms], TermMatchType::STEMMING, locale);
234   };
235 
236   // Spawn threads to call ProcessTerm() in parallel.
237   std::vector<std::thread> thread_objs;
238   for (int i = 0; i < kNumThreads; ++i) {
239     thread_objs.emplace_back(callable, i);
240   }
241 
242   // Join threads and verify results
243   for (int i = 0; i < kNumThreads; ++i) {
244     thread_objs[i].join();
245 
246     int term_number = i % kNumTerms;
247     if (kTerms[term_number] == kStems[term_number]) {
248       // No stemmed term generated after expansion.
249       EXPECT_THAT(expanded_terms[i],
250                   ElementsAre(ExpandedTerm(std::string(kTerms[term_number]),
251                                            /*is_stemmed_term_in=*/false)));
252     } else {
253       EXPECT_THAT(expanded_terms[i],
254                   ElementsAre(ExpandedTerm(std::string(kTerms[term_number]),
255                                            /*is_stemmed_term_in=*/false),
256                               ExpandedTerm(std::string(kStems[term_number]),
257                                            /*is_stemmed_term_in=*/true)));
258     }
259   }
260 }
261 
262 }  // namespace
263 
264 }  // namespace lib
265 }  // namespace icing
266