1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and∂∂
13 // limitations under the License.
14
15 #include "icing/expand/expander-manager.h"
16
17 #include <array>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <thread> // NOLINT
22 #include <vector>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "gmock/gmock.h"
26 #include "gtest/gtest.h"
27 #include "icing/expand/expander.h"
28 #include "icing/portable/platform.h"
29 #include "icing/testing/common-matchers.h"
30 #include "unicode/uloc.h"
31
32 namespace icing {
33 namespace lib {
34
35 namespace {
36
37 using ::testing::ElementsAre;
38
39 constexpr std::string_view kRussianLocale = "ru-RU";
40 constexpr std::string_view kTamilLocale = "ta-IN";
41 constexpr std::string_view kUnsupportedLocale = "unsupported_locale";
42
TEST(ExpanderManagerTest,CreateWithInvalidMaxTermsShouldFail)43 TEST(ExpanderManagerTest, CreateWithInvalidMaxTermsShouldFail) {
44 EXPECT_THAT(ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/-1),
45 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
46 EXPECT_THAT(ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/1),
47 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
48 }
49
TEST(ExpanderManagerTest,CreateWithAnyLocaleShouldSucceed)50 TEST(ExpanderManagerTest, CreateWithAnyLocaleShouldSucceed) {
51 ICING_ASSERT_OK_AND_ASSIGN(
52 std::unique_ptr<ExpanderManager> expander_manager,
53 ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/2));
54 EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
55
56 ICING_ASSERT_OK_AND_ASSIGN(
57 expander_manager, ExpanderManager::Create(ULOC_FRENCH,
58 /*max_terms_per_expander=*/2));
59 EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
60
61 ICING_ASSERT_OK_AND_ASSIGN(
62 expander_manager, ExpanderManager::Create(std::string(kUnsupportedLocale),
63 /*max_terms_per_expander=*/2));
64 if (IsStemmingEnabled()) {
65 EXPECT_THAT(expander_manager->default_locale(),
66 ExpanderManager::kDefaultEnglishLocale);
67 } else {
68 EXPECT_THAT(expander_manager->default_locale(),
69 std::string(kUnsupportedLocale));
70 }
71 }
72
TEST(ExpanderManagerTest,ProcessTerm_exactMatchReturnsOriginalTerm)73 TEST(ExpanderManagerTest, ProcessTerm_exactMatchReturnsOriginalTerm) {
74 ICING_ASSERT_OK_AND_ASSIGN(
75 std::unique_ptr<ExpanderManager> expander_manager,
76 ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/3));
77
78 std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
79 "running", TermMatchType::EXACT_ONLY, ULOC_US);
80 EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
81 EXPECT_THAT(expanded_terms, ElementsAre(ExpandedTerm(
82 "running", /*is_stemmed_term_in=*/false)));
83 }
84
TEST(ExpanderManagerTest,ProcessTerm_prefixMatchReturnsOriginalTerm)85 TEST(ExpanderManagerTest, ProcessTerm_prefixMatchReturnsOriginalTerm) {
86 ICING_ASSERT_OK_AND_ASSIGN(
87 std::unique_ptr<ExpanderManager> expander_manager,
88 ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/3));
89
90 std::vector<ExpandedTerm> expanded_terms =
91 expander_manager->ProcessTerm("running", TermMatchType::PREFIX, ULOC_US);
92 EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
93 EXPECT_THAT(expanded_terms, ElementsAre(ExpandedTerm(
94 "running", /*is_stemmed_term_in=*/false)));
95 }
96
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithDefaultLocale)97 TEST(ExpanderManagerTest, ProcessTerm_stemmingMatchWithDefaultLocale) {
98 if (!IsStemmingEnabled()) {
99 GTEST_SKIP() << "Skipping test because stemming is not enabled.";
100 }
101
102 ICING_ASSERT_OK_AND_ASSIGN(
103 std::unique_ptr<ExpanderManager> expander_manager,
104 ExpanderManager::Create(ULOC_US, /*max_terms_per_expander=*/3));
105
106 std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
107 "running", TermMatchType::STEMMING, ULOC_US);
108 EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
109 EXPECT_THAT(expanded_terms,
110 ElementsAre(ExpandedTerm("running", /*is_stemmed_term_in=*/false),
111 ExpandedTerm("run", /*is_stemmed_term_in=*/true)));
112
113 expanded_terms =
114 expander_manager->ProcessTerm("tests", TermMatchType::STEMMING, ULOC_US);
115 EXPECT_THAT(expander_manager->default_locale(), ULOC_US);
116 EXPECT_THAT(expanded_terms,
117 ElementsAre(ExpandedTerm("tests", /*is_stemmed_term_in=*/false),
118 ExpandedTerm("test", /*is_stemmed_term_in=*/true)));
119 }
120
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithNonDefaultLocale)121 TEST(ExpanderManagerTest, ProcessTerm_stemmingMatchWithNonDefaultLocale) {
122 if (!IsStemmingEnabled()) {
123 GTEST_SKIP() << "Skipping test because stemming is not enabled.";
124 }
125
126 ICING_ASSERT_OK_AND_ASSIGN(
127 std::unique_ptr<ExpanderManager> expander_manager,
128 ExpanderManager::Create(ULOC_FRENCH, /*max_terms_per_expander=*/3));
129
130 std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
131 "running", TermMatchType::STEMMING, ULOC_US);
132 EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
133 EXPECT_THAT(expanded_terms,
134 ElementsAre(ExpandedTerm("running", /*is_stemmed_term_in=*/false),
135 ExpandedTerm("run", /*is_stemmed_term_in=*/true)));
136
137 expanded_terms = expander_manager->ProcessTerm(
138 "torpedearon", TermMatchType::STEMMING, "es_ES");
139 EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
140 EXPECT_THAT(
141 expanded_terms,
142 ElementsAre(ExpandedTerm("torpedearon", /*is_stemmed_term_in=*/false),
143 ExpandedTerm("torped", /*is_stemmed_term_in=*/true)));
144 }
145
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithUnsupportedLocaleUsesDefault)146 TEST(ExpanderManagerTest,
147 ProcessTerm_stemmingMatchWithUnsupportedLocaleUsesDefault) {
148 if (!IsStemmingEnabled()) {
149 GTEST_SKIP() << "Skipping test because stemming is not enabled.";
150 }
151
152 ICING_ASSERT_OK_AND_ASSIGN(
153 std::unique_ptr<ExpanderManager> expander_manager,
154 ExpanderManager::Create(ULOC_FRENCH, /*max_terms_per_expander=*/3));
155
156 std::string unsupported_locale_str = std::string(kUnsupportedLocale);
157 std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
158 "running", TermMatchType::STEMMING, unsupported_locale_str);
159 EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
160 EXPECT_THAT(expanded_terms, ElementsAre(ExpandedTerm(
161 "running", /*is_stemmed_term_in=*/false)));
162
163 expanded_terms = expander_manager->ProcessTerm(
164 "majestueuse", TermMatchType::STEMMING, unsupported_locale_str);
165 EXPECT_THAT(expander_manager->default_locale(), ULOC_FRENCH);
166 EXPECT_THAT(
167 expanded_terms,
168 ElementsAre(ExpandedTerm("majestueuse", /*is_stemmed_term_in=*/false),
169 ExpandedTerm("majestu", /*is_stemmed_term_in=*/true)));
170 }
171
TEST(ExpanderManagerTest,ProcessTerm_stemmingMatchWithUnsupportedDefaultLocaleUsesEnglish)172 TEST(ExpanderManagerTest,
173 ProcessTerm_stemmingMatchWithUnsupportedDefaultLocaleUsesEnglish) {
174 if (!IsStemmingEnabled()) {
175 GTEST_SKIP() << "Skipping test because stemming is not enabled.";
176 }
177
178 ICING_ASSERT_OK_AND_ASSIGN(
179 std::unique_ptr<ExpanderManager> expander_manager,
180 ExpanderManager::Create(std::string(kUnsupportedLocale),
181 /*max_terms_per_expander=*/3));
182
183 std::string unsupported_locale_str = std::string(kUnsupportedLocale);
184 std::vector<ExpandedTerm> expanded_terms = expander_manager->ProcessTerm(
185 "running", TermMatchType::STEMMING, unsupported_locale_str);
186 EXPECT_THAT(expander_manager->default_locale(),
187 ExpanderManager::kDefaultEnglishLocale);
188 EXPECT_THAT(expanded_terms,
189 ElementsAre(ExpandedTerm("running", /*is_stemmed_term_in=*/false),
190 ExpandedTerm("run", /*is_stemmed_term_in=*/true)));
191
192 expanded_terms = expander_manager->ProcessTerm(
193 "majestueuse", TermMatchType::STEMMING, unsupported_locale_str);
194 EXPECT_THAT(expander_manager->default_locale(),
195 ExpanderManager::kDefaultEnglishLocale);
196 EXPECT_THAT(
197 expanded_terms,
198 ElementsAre(ExpandedTerm("majestueuse", /*is_stemmed_term_in=*/false),
199 ExpandedTerm("majestueus", /*is_stemmed_term_in=*/true)));
200 }
201
TEST(ExpanderManagerTest,ThreadSafety)202 TEST(ExpanderManagerTest, ThreadSafety) {
203 ICING_ASSERT_OK_AND_ASSIGN(
204 std::unique_ptr<ExpanderManager> expander_manager,
205 ExpanderManager::Create(ULOC_US,
206 /*max_terms_per_expander=*/1000));
207
208 constexpr int kNumTerms = 10;
209 constexpr int kNumLocales = 5;
210 constexpr std::array<std::string_view, kNumLocales> kLocales = {
211 ULOC_US, ULOC_FRENCH, kRussianLocale, kTamilLocale, kUnsupportedLocale};
212 constexpr std::array<std::string_view, kNumTerms> kTerms = {
213 "running", "majestueuse", "валяется", "இக்கதையின்", "testing",
214 "test", "running", "говорить", "அக்கரையில்", "manager"};
215
216 std::array<std::string_view, kNumTerms> kStems;
217 if (IsStemmingEnabled()) {
218 kStems = {"run", "majestu", "валя", "கதை", "test",
219 "test", "running", "говор", "கரை", "manag"};
220 } else {
221 // Stemming is not enabled, so the stemmed terms are the same as the
222 // original terms.
223 kStems = kTerms;
224 }
225
226 // Create kNumThreads threads. Call ProcessTerm() from each thread in
227 // parallel using different locales. There should be no crashes.
228 constexpr int kNumThreads = 50;
229 std::vector<std::vector<ExpandedTerm>> expanded_terms(kNumThreads);
230 auto callable = [&](int thread_id) {
231 std::string locale = std::string(kLocales[thread_id % kNumLocales]);
232 expanded_terms[thread_id] = expander_manager->ProcessTerm(
233 kTerms[thread_id % kNumTerms], TermMatchType::STEMMING, locale);
234 };
235
236 // Spawn threads to call ProcessTerm() in parallel.
237 std::vector<std::thread> thread_objs;
238 for (int i = 0; i < kNumThreads; ++i) {
239 thread_objs.emplace_back(callable, i);
240 }
241
242 // Join threads and verify results
243 for (int i = 0; i < kNumThreads; ++i) {
244 thread_objs[i].join();
245
246 int term_number = i % kNumTerms;
247 if (kTerms[term_number] == kStems[term_number]) {
248 // No stemmed term generated after expansion.
249 EXPECT_THAT(expanded_terms[i],
250 ElementsAre(ExpandedTerm(std::string(kTerms[term_number]),
251 /*is_stemmed_term_in=*/false)));
252 } else {
253 EXPECT_THAT(expanded_terms[i],
254 ElementsAre(ExpandedTerm(std::string(kTerms[term_number]),
255 /*is_stemmed_term_in=*/false),
256 ExpandedTerm(std::string(kStems[term_number]),
257 /*is_stemmed_term_in=*/true)));
258 }
259 }
260 }
261
262 } // namespace
263
264 } // namespace lib
265 } // namespace icing
266