xref: /aosp_15_r20/external/icing/icing/util/character-iterator_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/util/character-iterator.h"
16 
17 #include "gmock/gmock.h"
18 #include "gtest/gtest.h"
19 #include "icing/testing/icu-i18n-test-utils.h"
20 
21 namespace icing {
22 namespace lib {
23 
24 using ::testing::Eq;
25 using ::testing::IsFalse;
26 using ::testing::IsTrue;
27 
TEST(CharacterIteratorTest,BasicUtf8)28 TEST(CharacterIteratorTest, BasicUtf8) {
29   constexpr std::string_view kText = "¿Dónde está la biblioteca?";
30   CharacterIterator iterator(kText);
31   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
32 
33   EXPECT_THAT(iterator.AdvanceToUtf8(4), IsTrue());
34   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
35   EXPECT_THAT(iterator,
36               Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
37                                    /*utf32_index=*/2)));
38 
39   EXPECT_THAT(iterator.AdvanceToUtf8(18), IsTrue());
40   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
41   EXPECT_THAT(iterator,
42               Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
43                                    /*utf32_index=*/15)));
44 
45   EXPECT_THAT(iterator.AdvanceToUtf8(28), IsTrue());
46   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
47   EXPECT_THAT(iterator,
48               Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
49                                    /*utf32_index=*/25)));
50 
51   EXPECT_THAT(iterator.AdvanceToUtf8(29), IsTrue());
52   EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
53   EXPECT_THAT(iterator,
54               Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
55                                    /*utf32_index=*/26)));
56 
57   EXPECT_THAT(iterator.RewindToUtf8(28), IsTrue());
58   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
59   EXPECT_THAT(iterator,
60               Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
61                                    /*utf32_index=*/25)));
62 
63   EXPECT_THAT(iterator.RewindToUtf8(18), IsTrue());
64   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
65   EXPECT_THAT(iterator,
66               Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
67                                    /*utf32_index=*/15)));
68 
69   EXPECT_THAT(iterator.RewindToUtf8(4), IsTrue());
70   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
71   EXPECT_THAT(iterator,
72               Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
73                                    /*utf32_index=*/2)));
74 
75   EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
76   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
77   EXPECT_THAT(iterator,
78               Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
79                                    /*utf32_index=*/0)));
80 }
81 
TEST(CharacterIteratorTest,BasicUtf16)82 TEST(CharacterIteratorTest, BasicUtf16) {
83   constexpr std::string_view kText = "¿Dónde está la biblioteca?";
84   CharacterIterator iterator(kText);
85   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
86 
87   EXPECT_THAT(iterator.AdvanceToUtf16(2), IsTrue());
88   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
89   EXPECT_THAT(iterator,
90               Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
91                                    /*utf32_index=*/2)));
92 
93   EXPECT_THAT(iterator.AdvanceToUtf16(15), IsTrue());
94   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
95   EXPECT_THAT(iterator,
96               Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
97                                    /*utf32_index=*/15)));
98 
99   EXPECT_THAT(iterator.AdvanceToUtf16(25), IsTrue());
100   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
101   EXPECT_THAT(iterator,
102               Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
103                                    /*utf32_index=*/25)));
104 
105   EXPECT_THAT(iterator.AdvanceToUtf16(26), IsTrue());
106   EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
107   EXPECT_THAT(iterator,
108               Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
109                                    /*utf32_index=*/26)));
110 
111   EXPECT_THAT(iterator.RewindToUtf16(25), IsTrue());
112   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
113   EXPECT_THAT(iterator,
114               Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
115                                    /*utf32_index=*/25)));
116 
117   EXPECT_THAT(iterator.RewindToUtf16(15), IsTrue());
118   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
119   EXPECT_THAT(iterator,
120               Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
121                                    /*utf32_index=*/15)));
122 
123   EXPECT_THAT(iterator.RewindToUtf16(2), IsTrue());
124   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
125   EXPECT_THAT(iterator,
126               Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
127                                    /*utf32_index=*/2)));
128 
129   EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
130   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
131   EXPECT_THAT(iterator,
132               Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
133                                    /*utf32_index=*/0)));
134 }
135 
TEST(CharacterIteratorTest,BasicUtf32)136 TEST(CharacterIteratorTest, BasicUtf32) {
137   constexpr std::string_view kText = "¿Dónde está la biblioteca?";
138   CharacterIterator iterator(kText);
139   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
140 
141   EXPECT_THAT(iterator.AdvanceToUtf32(2), IsTrue());
142   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
143   EXPECT_THAT(iterator,
144               Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
145                                    /*utf32_index=*/2)));
146 
147   EXPECT_THAT(iterator.AdvanceToUtf32(15), IsTrue());
148   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
149   EXPECT_THAT(iterator,
150               Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
151                                    /*utf32_index=*/15)));
152 
153   EXPECT_THAT(iterator.AdvanceToUtf32(25), IsTrue());
154   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
155   EXPECT_THAT(iterator,
156               Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
157                                    /*utf32_index=*/25)));
158 
159   EXPECT_THAT(iterator.AdvanceToUtf32(26), IsTrue());
160   EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
161   EXPECT_THAT(iterator,
162               Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
163                                    /*utf32_index=*/26)));
164 
165   EXPECT_THAT(iterator.RewindToUtf32(25), IsTrue());
166   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
167   EXPECT_THAT(iterator,
168               Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
169                                    /*utf32_index=*/25)));
170 
171   EXPECT_THAT(iterator.RewindToUtf32(15), IsTrue());
172   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
173   EXPECT_THAT(iterator,
174               Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
175                                    /*utf32_index=*/15)));
176 
177   EXPECT_THAT(iterator.RewindToUtf32(2), IsTrue());
178   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
179   EXPECT_THAT(iterator,
180               Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
181                                    /*utf32_index=*/2)));
182 
183   EXPECT_THAT(iterator.RewindToUtf32(0), IsTrue());
184   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
185   EXPECT_THAT(iterator,
186               Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
187                                    /*utf32_index=*/0)));
188 }
189 
TEST(CharacterIteratorTest,InvalidUtf)190 TEST(CharacterIteratorTest, InvalidUtf) {
191   // "\255" is an invalid sequence.
192   constexpr std::string_view kText = "foo \255 bar";
193   CharacterIterator iterator(kText);
194 
195   // Try to advance to the 'b' in 'bar'. This will fail and leave us pointed at
196   // the invalid sequence '\255'. Get CurrentChar() should return an invalid
197   // character.
198   EXPECT_THAT(iterator.AdvanceToUtf8(6), IsFalse());
199   EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
200   CharacterIterator exp_iterator(kText, /*utf8_index=*/4, /*utf16_index=*/4,
201                                  /*utf32_index=*/4);
202   EXPECT_THAT(iterator, Eq(exp_iterator));
203 
204   EXPECT_THAT(iterator.AdvanceToUtf16(6), IsFalse());
205   EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
206   EXPECT_THAT(iterator, Eq(exp_iterator));
207 
208   EXPECT_THAT(iterator.AdvanceToUtf32(6), IsFalse());
209   EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
210   EXPECT_THAT(iterator, Eq(exp_iterator));
211 
212   // Create the iterator with it pointing at the 'b' in 'bar'.
213   iterator = CharacterIterator(kText, /*utf8_index=*/6, /*utf16_index=*/6,
214                                /*utf32_index=*/6);
215   EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
216 
217   // Try to advance to the last 'o' in 'foo'. This will fail and leave us
218   // pointed at the ' ' before the invalid sequence '\255'.
219   exp_iterator = CharacterIterator(kText, /*utf8_index=*/5, /*utf16_index=*/5,
220                                    /*utf32_index=*/5);
221   EXPECT_THAT(iterator.RewindToUtf8(2), IsFalse());
222   EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
223   EXPECT_THAT(iterator, Eq(exp_iterator));
224 
225   EXPECT_THAT(iterator.RewindToUtf16(2), IsFalse());
226   EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
227   EXPECT_THAT(iterator, Eq(exp_iterator));
228 
229   EXPECT_THAT(iterator.RewindToUtf32(2), IsFalse());
230   EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
231   EXPECT_THAT(iterator, Eq(exp_iterator));
232 }
233 
TEST(CharacterIteratorTest,MoveToUtfNegativeIndex)234 TEST(CharacterIteratorTest, MoveToUtfNegativeIndex) {
235   constexpr std::string_view kText = "¿Dónde está la biblioteca?";
236 
237   CharacterIterator iterator_utf8(kText, /*utf8_index=*/-1, /*utf16_index=*/0,
238                              /*utf32_index=*/0);
239   // We should be able to successfully move when the index is negative.
240   EXPECT_THAT(iterator_utf8.MoveToUtf8(0), IsTrue());
241   // The character cache should be reset and contain the first character when
242   // resetting to index 0.
243   EXPECT_THAT(UCharToString(iterator_utf8.GetCurrentChar()), Eq("¿"));
244   EXPECT_THAT(iterator_utf8.utf8_index(), Eq(0));
245   EXPECT_THAT(iterator_utf8.utf16_index(), Eq(0));
246   EXPECT_THAT(iterator_utf8.utf32_index(), Eq(0));
247 
248   CharacterIterator iterator_utf16(kText, /*utf8_index=*/0, /*utf16_index=*/-1,
249                              /*utf32_index=*/0);
250   EXPECT_THAT(iterator_utf16.MoveToUtf16(1), IsTrue());
251   EXPECT_THAT(iterator_utf16.GetCurrentChar(), Eq('D'));
252   EXPECT_THAT(iterator_utf16.utf8_index(), Eq(2));
253   EXPECT_THAT(iterator_utf16.utf16_index(), Eq(1));
254   EXPECT_THAT(iterator_utf16.utf32_index(), Eq(1));
255 
256   CharacterIterator iterator_utf32(kText, /*utf8_index=*/0, /*utf16_index=*/0,
257                              /*utf32_index=*/-1);
258   EXPECT_THAT(iterator_utf32.MoveToUtf32(2), IsTrue());
259   EXPECT_THAT(UCharToString(iterator_utf32.GetCurrentChar()), Eq("ó"));
260   EXPECT_THAT(iterator_utf32.utf8_index(), Eq(3));
261   EXPECT_THAT(iterator_utf32.utf16_index(), Eq(2));
262   EXPECT_THAT(iterator_utf32.utf32_index(), Eq(2));
263 }
264 
265 }  // namespace lib
266 }  // namespace icing
267