xref: /aosp_15_r20/external/skia/modules/skunicode/tests/SkUnicodeTest.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 
2 /*
3  * Copyright 2023 Google LLC
4  *
5  * Use of this source code is governed by a BSD-style license that can be
6  * found in the LICENSE file.
7  */
8 #include "include/core/SkSpan.h"
9 #include "include/core/SkStream.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypeface.h"
12 #include "src/base/SkBitmaskEnum.h"
13 #include "tests/Test.h"
14 
15 #include "modules/skunicode/include/SkUnicode.h"
16 
17 #if defined(SK_UNICODE_ICU_IMPLEMENTATION)
18 #include "modules/skunicode/include/SkUnicode_icu.h"
19 #endif
20 #if defined(SK_UNICODE_LIBGRAPHEME_IMPLEMENTATION)
21 #include "modules/skunicode/include/SkUnicode_libgrapheme.h"
22 #endif
23 #if defined(SK_UNICODE_ICU4X_IMPLEMENTATION)
24 #include "modules/skunicode/include/SkUnicode_icu4x.h"
25 #endif
26 #if defined(SK_UNICODE_CLIENT_IMPLEMENTATION)
27 #include "modules/skunicode/include/SkUnicode_client.h"
28 #endif
29 
30 #include <vector>
31 
32 #ifdef SK_UNICODE_ICU_IMPLEMENTATION
33 #define DEF_TEST_ICU(name, reporter) \
34     DEF_TEST(name##ICU, reporter) { name(reporter, SkUnicodes::ICU::Make()); }
35 #else
36 #define DEF_TEST_ICU(name, reporter)
37 #endif
38 
39 #ifdef SK_UNICODE_ICU4X_IMPLEMENTATION
40 #define DEF_TEST_ICU4X(name, reporter) \
41     DEF_TEST(name##ICU4X, reporter) { name(reporter, SkUnicodes::ICU4X::Make()); }
42 #else
43 #define DEF_TEST_ICU4X(name, reporter)
44 #endif
45 
46 #ifdef SK_UNICODE_LIBGRAPHEME_IMPLEMENTATION
47 #define DEF_TEST_LIBGRAPHEME(name, reporter) \
48     DEF_TEST(name##LIBGRAPHEME, reporter) { name(reporter, SkUnicodes::Libgrapheme::Make()); }
49 #else
50 #define DEF_TEST_LIBGRAPHEME(name, reporter)
51 #endif
52 
53 #define DEF_TEST_NOIMPL(name, reporter)
54 
55 #define DEF_TEST_UNICODES(name, reporter) \
56     static void name(skiatest::Reporter* reporter, sk_sp<SkUnicode> unicode); \
57     DEF_TEST_ICU(name, reporter) \
58     DEF_TEST_ICU4X(name, reporter) \
59     DEF_TEST_LIBGRAPHEME(name, reporter) \
60     DEF_TEST_NOIMPL(name, reporter) \
61     void name(skiatest::Reporter* reporter, sk_sp<SkUnicode> unicode)
62 
63 #define DEF_TEST_ICU_UNICODES(name, reporter) \
64     static void name(skiatest::Reporter* reporter, sk_sp<SkUnicode> unicode); \
65     DEF_TEST_ICU(name, reporter) \
66     DEF_TEST_ICU4X(name, reporter) \
67     DEF_TEST_NOIMPL(name, reporter) \
68     void name(skiatest::Reporter* reporter, sk_sp<SkUnicode> unicode)
69 
70 using namespace skia_private;
71 
72 #ifdef SK_UNICODE_CLIENT_IMPLEMENTATION
UNIX_ONLY_TEST(SkUnicode_Client,reporter)73 UNIX_ONLY_TEST(SkUnicode_Client, reporter) {
74     std::u16string text = u"\U000f2008";
75     auto utf8 = SkUnicode::convertUtf16ToUtf8(text.data(), text.size());
76     auto client = SkUnicodes::Client::Make
77                   (SkSpan<char>(&utf8[0], utf8.size()), {}, {}, {});
78     skia_private::TArray<SkUnicode::CodeUnitFlags, true> results;
79     client->computeCodeUnitFlags(utf8.data(), utf8.size(), false, &results);
80 
81     for (auto flag : results) {
82         REPORTER_ASSERT(reporter, !SkUnicode::hasPartOfWhiteSpaceBreakFlag(flag));
83     }
84 }
85 #endif
86 
87 #if defined(SK_UNICODE_ICU_IMPLEMENTATION)
UNIX_ONLY_TEST(SkUnicode_Compiled_Native,reporter)88 UNIX_ONLY_TEST(SkUnicode_Compiled_Native, reporter) {
89     auto icu = SkUnicodes::ICU::Make();
90     if (!icu) {
91         REPORTER_ASSERT(reporter, icu);
92         return;
93     }
94     std::u16string text = u"\U000f2008";
95     auto utf8 = SkUnicode::convertUtf16ToUtf8(text.data(), text.size());
96     skia_private::TArray<SkUnicode::CodeUnitFlags, true> results;
97     icu->computeCodeUnitFlags(utf8.data(), utf8.size(), false, &results);
98     for (auto flag : results) {
99         REPORTER_ASSERT(reporter, !SkUnicode::hasPartOfWhiteSpaceBreakFlag(flag));
100     }
101 }
102 #endif
103 
104 #if defined(SK_UNICODE_LIBGRAPHEME_IMPLEMENTATION)
UNIX_ONLY_TEST(SkUnicode_GetUtf8Words,reporter)105 UNIX_ONLY_TEST(SkUnicode_GetUtf8Words, reporter) {
106     SkString text("1 22 333 4444 55555 666666 7777777");
107     std::vector<SkUnicode::Position> expected = { 0, 1, 2, 4, 5, 8, 9, 13, 14, 19, 20, 26, 27, 34 };
108     auto libgrapheme = SkUnicodes::Libgrapheme::Make();
109     std::vector<SkUnicode::Position> results;
110     auto result = libgrapheme->getUtf8Words(text.data(), text.size(), "en", &results);
111     REPORTER_ASSERT(reporter, result);
112     REPORTER_ASSERT(reporter, results.size() == expected.size());
113     for (auto i = 0ul; i < results.size(); ++i) {
114         REPORTER_ASSERT(reporter, results[i] == expected[i]);
115     }
116 }
117 #endif
118 
119 #if defined(SK_UNICODE_ICU_IMPLEMENTATION)
UNIX_ONLY_TEST(SkUnicode_Compiled_GetSentences,reporter)120 UNIX_ONLY_TEST(SkUnicode_Compiled_GetSentences, reporter) {
121     auto icu = SkUnicodes::ICU::Make();
122     if (!icu) {
123         REPORTER_ASSERT(reporter, icu);
124         return;
125     }
126     SkString text("Hello world! Hello world? Hello world... Not a sentence end: 3.1415926");
127     std::vector<SkUnicode::Position> expected = {0, 13, 26, 41, 70};
128     std::vector<SkUnicode::Position> results;
129     auto result = icu->getSentences(text.data(), text.size(), nullptr, &results);
130     REPORTER_ASSERT(reporter, result);
131     REPORTER_ASSERT(reporter, results.size() == expected.size());
132     for (auto i = 0ul; i < results.size(); ++i) {
133         REPORTER_ASSERT(reporter, results[i] == expected[i]);
134     }
135 }
136 #endif
137 
hasWordFlag(SkUnicode::CodeUnitFlags flags)138 bool hasWordFlag(SkUnicode::CodeUnitFlags flags) {
139     return (flags & SkUnicode::kWordBreak) == SkUnicode::kWordBreak;
140 }
141 
142 // On Windows libgrapheme produces different results
DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsLTR,reporter)143 DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsLTR, reporter) {
144     if (!unicode) {
145         return;
146     }
147     SkString text("1 22 333 4444 55555 666666 7777777");
148     std::vector<SkUnicode::BidiRegion> results;
149     auto result = unicode->getBidiRegions(text.data(),
150                                           text.size(),
151                                           SkUnicode::TextDirection::kLTR,
152                                           &results);
153     REPORTER_ASSERT(reporter, result);
154     REPORTER_ASSERT(reporter, results.size() == 1);
155     REPORTER_ASSERT(reporter, results[0].start == 0 &&
156                               results[0].end == text.size() &&
157                               results[0].level == 0);
158 }
159 
DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsRTL,reporter)160 DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsRTL, reporter) {
161     if (!unicode) {
162         return;
163     }
164     SkString text("الهيمنة على العالم عبارة قبيحة ، أفضل أن أسميها تحسين العالم.");
165     std::vector<SkUnicode::BidiRegion> results;
166     auto result = unicode->getBidiRegions(text.data(),
167                                           text.size(),
168                                           SkUnicode::TextDirection::kRTL,
169                                           &results);
170     REPORTER_ASSERT(reporter, result);
171     REPORTER_ASSERT(reporter, results.size() == 1);
172     REPORTER_ASSERT(reporter, results[0].start == 0 &&
173                               results[0].end == text.size() &&
174                               results[0].level == 1);
175 }
176 
DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsMix1,reporter)177 DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsMix1, reporter) {
178     if (!unicode) {
179         return;
180     }
181     // Spaces become Arabic (RTL) but numbers remain English (LTR)
182     SkString text("1 22 333 4444 55555 666666 7777777");
183     std::vector<SkUnicode::BidiRegion> expected = {
184         {0, 1, 2},
185         {1, 2, 1},
186         {2, 4, 2},
187         {4, 5, 1},
188         {5, 8, 2},
189         {8, 9, 1},
190         {9, 13, 2},
191         {13, 14, 1},
192         {14, 19, 2},
193         {19, 20, 1},
194         {20, 26, 2},
195         {26, 27, 1},
196         {27, 34, 2},
197     };
198     std::vector<SkUnicode::BidiRegion> results;
199     auto result = unicode->getBidiRegions(text.data(),
200                                           text.size(),
201                                           SkUnicode::TextDirection::kRTL,
202                                           &results);
203     REPORTER_ASSERT(reporter, result);
204     REPORTER_ASSERT(reporter, results.size() == expected.size());
205     for (auto i = 0ul; i < results.size(); ++i) {
206       REPORTER_ASSERT(reporter, results[i].start == expected[i].start &&
207                                 results[i].end == expected[i].end &&
208                                 results[i].level == expected[i].level);
209     }
210 }
211 
DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsMix2,reporter)212 DEF_TEST_ICU_UNICODES(SkUnicode_GetBidiRegionsMix2, reporter) {
213     if (!unicode) {
214       return;
215     }
216     // Few Russian/English words (ЛТР) in the mix
217     SkString text("World ЛТР Domination هي عبارة قبيحة ، أفضل أن أسميها World ЛТР Optimization.");
218     std::vector<SkUnicode::BidiRegion> expected = {
219         { 0, 24, 0},
220         { 24, 80, 1},
221         { 80, 107, 0},
222     };
223     std::vector<SkUnicode::BidiRegion> results;
224     auto result = unicode->getBidiRegions(text.data(),
225                                           text.size(),
226                                           SkUnicode::TextDirection::kLTR,
227                                           &results);
228     REPORTER_ASSERT(reporter, result);
229     REPORTER_ASSERT(reporter, results.size() == expected.size());
230     for (auto i = 0ul; i < results.size(); ++i) {
231         REPORTER_ASSERT(reporter, results[i].start == expected[i].start &&
232                                   results[i].end == expected[i].end &&
233                                   results[i].level == expected[i].level);
234     }
235 }
236 
237 // Currently, libgrapheme uses different default rules and produces slightly
238 // different results; it does not matter for text shaping
DEF_TEST_ICU_UNICODES(SkUnicode_ToUpper,reporter)239 DEF_TEST_ICU_UNICODES(SkUnicode_ToUpper, reporter) {
240     if (!unicode) {
241         return;
242     }
243     SkString lower("abcdefghijklmnopqrstuvwxyz");
244     SkString upper("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
245     auto icu_result1 = unicode->toUpper(lower);
246     REPORTER_ASSERT(reporter, icu_result1.equals(upper));
247     auto icu_result2 = unicode->toUpper(upper);
248     REPORTER_ASSERT(reporter, icu_result2.equals(upper));
249 }
250 
DEF_TEST_ICU_UNICODES(SkUnicode_ComputeCodeUnitFlags,reporter)251 DEF_TEST_ICU_UNICODES(SkUnicode_ComputeCodeUnitFlags, reporter) {
252     if (!unicode) {
253         return;
254     }
255     //SkString text("World domination is such an ugly phrase - I prefer to call it world optimisation");
256     SkString text("1\n22 333 4444 55555 666666 7777777");
257     // 4 8 13 19 24
258     TArray<SkUnicode::CodeUnitFlags> results;
259     auto result = unicode->computeCodeUnitFlags(text.data(),
260                                                 text.size(),
261                                                 /*replaceTabs=*/true,
262                                                 &results);
263     REPORTER_ASSERT(reporter, result);
264     REPORTER_ASSERT(reporter, results.size() == SkToS16(text.size() + 1));
265     for (auto i = 0; i < results.size(); ++i) {
266         auto flags = results[i];
267         auto expected = SkUnicode::CodeUnitFlags::kGraphemeStart;
268         if (i == 1) {
269             expected |= SkUnicode::CodeUnitFlags::kControl;
270         }
271         if (i == 2) {
272             expected |= SkUnicode::CodeUnitFlags::kHardLineBreakBefore;
273         }
274         if (i == 1 || i == 4 || i == 8 || i == 13 || i == 19 || i == 26) {
275             expected |= SkUnicode::CodeUnitFlags::kPartOfWhiteSpaceBreak;
276             expected |= SkUnicode::CodeUnitFlags::kPartOfIntraWordBreak;
277         }
278         if (i == 0 || i == 2 || i == 5 || i == 9 || i == 14 || i == 20
279                                                  || i == 27 || i == 34) {
280             expected |= SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
281         }
282         REPORTER_ASSERT(reporter, flags == expected);
283     }
284 }
285 
DEF_TEST_UNICODES(SkUnicode_ReorderVisual,reporter)286 DEF_TEST_UNICODES(SkUnicode_ReorderVisual, reporter) {
287     if (!unicode) {
288         return;
289     }
290     auto reorder = [&](std::vector<SkUnicode::BidiLevel> levels,
291                        std::vector<int32_t> expected) {
292             std::vector<int32_t> logicalOrder(levels.size());
293             unicode->reorderVisual(levels.data(), levels.size(), logicalOrder.data());
294             for (auto i = 0ul; i < levels.size(); ++i) {
295                 REPORTER_ASSERT(reporter, expected[i] == logicalOrder[i]);
296             }
297         };
298     reorder({}, {});
299     reorder({0}, {0});
300     reorder({1}, {0});
301     reorder({0, 1, 0, 1}, {0, 1, 2, 3});
302 }
303 
SkUnicode_Emoji(SkUnicode * icu,skiatest::Reporter * reporter)304 [[maybe_unused]] static void SkUnicode_Emoji(SkUnicode* icu, skiatest::Reporter* reporter) {
305     std::u32string emojis(U"������������");
306     std::u32string not_emojis(U"満毎行昼本可");
307     for (auto e : emojis) {
308         REPORTER_ASSERT(reporter, icu->isEmoji(e));
309     }
310     for (auto n: not_emojis) {
311         REPORTER_ASSERT(reporter, !icu->isEmoji(n));
312     }
313 }
314 
315 #ifdef SK_UNICODE_ICU_IMPLEMENTATION
UNIX_ONLY_TEST(SkUnicode_Compiled_Emoji,reporter)316 UNIX_ONLY_TEST(SkUnicode_Compiled_Emoji, reporter) {
317     auto icu = SkUnicodes::ICU::Make();
318     if (!icu) {
319         REPORTER_ASSERT(reporter, icu);
320         return;
321     }
322     SkUnicode_Emoji(icu.get(), reporter);
323 }
324 #endif
325 
326 #ifdef SK_UNICODE_ICU4X_IMPLEMENTATION
UNIX_ONLY_TEST(SkUnicode_ICU4X_Emoji,reporter)327 UNIX_ONLY_TEST(SkUnicode_ICU4X_Emoji, reporter) {
328     auto icu = SkUnicodes::ICU4X::Make();
329     if (!icu) {
330         REPORTER_ASSERT(reporter, icu);
331         return;
332     }
333     SkUnicode_Emoji(icu.get(), reporter);
334 }
335 #endif
336 
SkUnicode_Ideographic(SkUnicode * icu,skiatest::Reporter * reporter)337 [[maybe_unused]] static void SkUnicode_Ideographic(SkUnicode* icu, skiatest::Reporter* reporter) {
338     std::u32string ideographic(U"満毎行昼本可");
339     std::u32string not_ideographic(U"������������");
340     for (auto i : ideographic) {
341         REPORTER_ASSERT(reporter, icu->isIdeographic(i));
342     }
343     for (auto n: not_ideographic) {
344         REPORTER_ASSERT(reporter, !icu->isIdeographic(n));
345     }
346 }
347 
348 #ifdef SK_UNICODE_ICU_IMPLEMENTATION
UNIX_ONLY_TEST(SkUnicode_Compiled_Ideographic,reporter)349 UNIX_ONLY_TEST(SkUnicode_Compiled_Ideographic, reporter) {
350     auto icu = SkUnicodes::ICU::Make();
351     if (!icu) {
352         REPORTER_ASSERT(reporter, icu);
353         return;
354     }
355     SkUnicode_Ideographic(icu.get(), reporter);
356 }
357 #endif
358 
359 #ifdef SK_UNICODE_ICU4X_IMPLEMENTATION
UNIX_ONLY_TEST(SkUnicode_ICU4X_Ideographic,reporter)360 UNIX_ONLY_TEST(SkUnicode_ICU4X_Ideographic, reporter) {
361     auto icu = SkUnicodes::ICU4X::Make();
362     if (!icu) {
363         REPORTER_ASSERT(reporter, icu);
364         return;
365     }
366     SkUnicode_Ideographic(icu.get(), reporter);
367 }
368 #endif
369