1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/util/character-iterator.h"
16
17 #include "gmock/gmock.h"
18 #include "gtest/gtest.h"
19 #include "icing/testing/icu-i18n-test-utils.h"
20
21 namespace icing {
22 namespace lib {
23
24 using ::testing::Eq;
25 using ::testing::IsFalse;
26 using ::testing::IsTrue;
27
TEST(CharacterIteratorTest,BasicUtf8)28 TEST(CharacterIteratorTest, BasicUtf8) {
29 constexpr std::string_view kText = "¿Dónde está la biblioteca?";
30 CharacterIterator iterator(kText);
31 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
32
33 EXPECT_THAT(iterator.AdvanceToUtf8(4), IsTrue());
34 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
35 EXPECT_THAT(iterator,
36 Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
37 /*utf32_index=*/2)));
38
39 EXPECT_THAT(iterator.AdvanceToUtf8(18), IsTrue());
40 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
41 EXPECT_THAT(iterator,
42 Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
43 /*utf32_index=*/15)));
44
45 EXPECT_THAT(iterator.AdvanceToUtf8(28), IsTrue());
46 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
47 EXPECT_THAT(iterator,
48 Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
49 /*utf32_index=*/25)));
50
51 EXPECT_THAT(iterator.AdvanceToUtf8(29), IsTrue());
52 EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
53 EXPECT_THAT(iterator,
54 Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
55 /*utf32_index=*/26)));
56
57 EXPECT_THAT(iterator.RewindToUtf8(28), IsTrue());
58 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
59 EXPECT_THAT(iterator,
60 Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
61 /*utf32_index=*/25)));
62
63 EXPECT_THAT(iterator.RewindToUtf8(18), IsTrue());
64 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
65 EXPECT_THAT(iterator,
66 Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
67 /*utf32_index=*/15)));
68
69 EXPECT_THAT(iterator.RewindToUtf8(4), IsTrue());
70 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
71 EXPECT_THAT(iterator,
72 Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
73 /*utf32_index=*/2)));
74
75 EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
76 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
77 EXPECT_THAT(iterator,
78 Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
79 /*utf32_index=*/0)));
80 }
81
TEST(CharacterIteratorTest,BasicUtf16)82 TEST(CharacterIteratorTest, BasicUtf16) {
83 constexpr std::string_view kText = "¿Dónde está la biblioteca?";
84 CharacterIterator iterator(kText);
85 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
86
87 EXPECT_THAT(iterator.AdvanceToUtf16(2), IsTrue());
88 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
89 EXPECT_THAT(iterator,
90 Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
91 /*utf32_index=*/2)));
92
93 EXPECT_THAT(iterator.AdvanceToUtf16(15), IsTrue());
94 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
95 EXPECT_THAT(iterator,
96 Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
97 /*utf32_index=*/15)));
98
99 EXPECT_THAT(iterator.AdvanceToUtf16(25), IsTrue());
100 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
101 EXPECT_THAT(iterator,
102 Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
103 /*utf32_index=*/25)));
104
105 EXPECT_THAT(iterator.AdvanceToUtf16(26), IsTrue());
106 EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
107 EXPECT_THAT(iterator,
108 Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
109 /*utf32_index=*/26)));
110
111 EXPECT_THAT(iterator.RewindToUtf16(25), IsTrue());
112 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
113 EXPECT_THAT(iterator,
114 Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
115 /*utf32_index=*/25)));
116
117 EXPECT_THAT(iterator.RewindToUtf16(15), IsTrue());
118 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
119 EXPECT_THAT(iterator,
120 Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
121 /*utf32_index=*/15)));
122
123 EXPECT_THAT(iterator.RewindToUtf16(2), IsTrue());
124 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
125 EXPECT_THAT(iterator,
126 Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
127 /*utf32_index=*/2)));
128
129 EXPECT_THAT(iterator.RewindToUtf8(0), IsTrue());
130 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
131 EXPECT_THAT(iterator,
132 Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
133 /*utf32_index=*/0)));
134 }
135
TEST(CharacterIteratorTest,BasicUtf32)136 TEST(CharacterIteratorTest, BasicUtf32) {
137 constexpr std::string_view kText = "¿Dónde está la biblioteca?";
138 CharacterIterator iterator(kText);
139 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
140
141 EXPECT_THAT(iterator.AdvanceToUtf32(2), IsTrue());
142 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
143 EXPECT_THAT(iterator,
144 Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
145 /*utf32_index=*/2)));
146
147 EXPECT_THAT(iterator.AdvanceToUtf32(15), IsTrue());
148 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
149 EXPECT_THAT(iterator,
150 Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
151 /*utf32_index=*/15)));
152
153 EXPECT_THAT(iterator.AdvanceToUtf32(25), IsTrue());
154 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
155 EXPECT_THAT(iterator,
156 Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
157 /*utf32_index=*/25)));
158
159 EXPECT_THAT(iterator.AdvanceToUtf32(26), IsTrue());
160 EXPECT_THAT(iterator.GetCurrentChar(), Eq(0));
161 EXPECT_THAT(iterator,
162 Eq(CharacterIterator(kText, /*utf8_index=*/29, /*utf16_index=*/26,
163 /*utf32_index=*/26)));
164
165 EXPECT_THAT(iterator.RewindToUtf32(25), IsTrue());
166 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("?"));
167 EXPECT_THAT(iterator,
168 Eq(CharacterIterator(kText, /*utf8_index=*/28, /*utf16_index=*/25,
169 /*utf32_index=*/25)));
170
171 EXPECT_THAT(iterator.RewindToUtf32(15), IsTrue());
172 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
173 EXPECT_THAT(iterator,
174 Eq(CharacterIterator(kText, /*utf8_index=*/18, /*utf16_index=*/15,
175 /*utf32_index=*/15)));
176
177 EXPECT_THAT(iterator.RewindToUtf32(2), IsTrue());
178 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("ó"));
179 EXPECT_THAT(iterator,
180 Eq(CharacterIterator(kText, /*utf8_index=*/3, /*utf16_index=*/2,
181 /*utf32_index=*/2)));
182
183 EXPECT_THAT(iterator.RewindToUtf32(0), IsTrue());
184 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("¿"));
185 EXPECT_THAT(iterator,
186 Eq(CharacterIterator(kText, /*utf8_index=*/0, /*utf16_index=*/0,
187 /*utf32_index=*/0)));
188 }
189
TEST(CharacterIteratorTest,InvalidUtf)190 TEST(CharacterIteratorTest, InvalidUtf) {
191 // "\255" is an invalid sequence.
192 constexpr std::string_view kText = "foo \255 bar";
193 CharacterIterator iterator(kText);
194
195 // Try to advance to the 'b' in 'bar'. This will fail and leave us pointed at
196 // the invalid sequence '\255'. Get CurrentChar() should return an invalid
197 // character.
198 EXPECT_THAT(iterator.AdvanceToUtf8(6), IsFalse());
199 EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
200 CharacterIterator exp_iterator(kText, /*utf8_index=*/4, /*utf16_index=*/4,
201 /*utf32_index=*/4);
202 EXPECT_THAT(iterator, Eq(exp_iterator));
203
204 EXPECT_THAT(iterator.AdvanceToUtf16(6), IsFalse());
205 EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
206 EXPECT_THAT(iterator, Eq(exp_iterator));
207
208 EXPECT_THAT(iterator.AdvanceToUtf32(6), IsFalse());
209 EXPECT_THAT(iterator.GetCurrentChar(), Eq(i18n_utils::kInvalidUChar32));
210 EXPECT_THAT(iterator, Eq(exp_iterator));
211
212 // Create the iterator with it pointing at the 'b' in 'bar'.
213 iterator = CharacterIterator(kText, /*utf8_index=*/6, /*utf16_index=*/6,
214 /*utf32_index=*/6);
215 EXPECT_THAT(UCharToString(iterator.GetCurrentChar()), Eq("b"));
216
217 // Try to advance to the last 'o' in 'foo'. This will fail and leave us
218 // pointed at the ' ' before the invalid sequence '\255'.
219 exp_iterator = CharacterIterator(kText, /*utf8_index=*/5, /*utf16_index=*/5,
220 /*utf32_index=*/5);
221 EXPECT_THAT(iterator.RewindToUtf8(2), IsFalse());
222 EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
223 EXPECT_THAT(iterator, Eq(exp_iterator));
224
225 EXPECT_THAT(iterator.RewindToUtf16(2), IsFalse());
226 EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
227 EXPECT_THAT(iterator, Eq(exp_iterator));
228
229 EXPECT_THAT(iterator.RewindToUtf32(2), IsFalse());
230 EXPECT_THAT(iterator.GetCurrentChar(), Eq(' '));
231 EXPECT_THAT(iterator, Eq(exp_iterator));
232 }
233
TEST(CharacterIteratorTest,MoveToUtfNegativeIndex)234 TEST(CharacterIteratorTest, MoveToUtfNegativeIndex) {
235 constexpr std::string_view kText = "¿Dónde está la biblioteca?";
236
237 CharacterIterator iterator_utf8(kText, /*utf8_index=*/-1, /*utf16_index=*/0,
238 /*utf32_index=*/0);
239 // We should be able to successfully move when the index is negative.
240 EXPECT_THAT(iterator_utf8.MoveToUtf8(0), IsTrue());
241 // The character cache should be reset and contain the first character when
242 // resetting to index 0.
243 EXPECT_THAT(UCharToString(iterator_utf8.GetCurrentChar()), Eq("¿"));
244 EXPECT_THAT(iterator_utf8.utf8_index(), Eq(0));
245 EXPECT_THAT(iterator_utf8.utf16_index(), Eq(0));
246 EXPECT_THAT(iterator_utf8.utf32_index(), Eq(0));
247
248 CharacterIterator iterator_utf16(kText, /*utf8_index=*/0, /*utf16_index=*/-1,
249 /*utf32_index=*/0);
250 EXPECT_THAT(iterator_utf16.MoveToUtf16(1), IsTrue());
251 EXPECT_THAT(iterator_utf16.GetCurrentChar(), Eq('D'));
252 EXPECT_THAT(iterator_utf16.utf8_index(), Eq(2));
253 EXPECT_THAT(iterator_utf16.utf16_index(), Eq(1));
254 EXPECT_THAT(iterator_utf16.utf32_index(), Eq(1));
255
256 CharacterIterator iterator_utf32(kText, /*utf8_index=*/0, /*utf16_index=*/0,
257 /*utf32_index=*/-1);
258 EXPECT_THAT(iterator_utf32.MoveToUtf32(2), IsTrue());
259 EXPECT_THAT(UCharToString(iterator_utf32.GetCurrentChar()), Eq("ó"));
260 EXPECT_THAT(iterator_utf32.utf8_index(), Eq(3));
261 EXPECT_THAT(iterator_utf32.utf16_index(), Eq(2));
262 EXPECT_THAT(iterator_utf32.utf32_index(), Eq(2));
263 }
264
265 } // namespace lib
266 } // namespace icing
267