1*834a2baaSAndroid Build Coastguard Worker /*
2*834a2baaSAndroid Build Coastguard Worker * Copyright (C) 2017 The Android Open Source Project
3*834a2baaSAndroid Build Coastguard Worker *
4*834a2baaSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*834a2baaSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*834a2baaSAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*834a2baaSAndroid Build Coastguard Worker *
8*834a2baaSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*834a2baaSAndroid Build Coastguard Worker *
10*834a2baaSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*834a2baaSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*834a2baaSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*834a2baaSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*834a2baaSAndroid Build Coastguard Worker * limitations under the License.
15*834a2baaSAndroid Build Coastguard Worker */
16*834a2baaSAndroid Build Coastguard Worker
17*834a2baaSAndroid Build Coastguard Worker #include <gtest/gtest.h>
18*834a2baaSAndroid Build Coastguard Worker
19*834a2baaSAndroid Build Coastguard Worker #include "FeatureFlags.h"
20*834a2baaSAndroid Build Coastguard Worker #include "FileUtils.h"
21*834a2baaSAndroid Build Coastguard Worker #include "minikin/Hyphenator.h"
22*834a2baaSAndroid Build Coastguard Worker
23*834a2baaSAndroid Build Coastguard Worker #ifndef NELEM
24*834a2baaSAndroid Build Coastguard Worker #define NELEM(x) ((sizeof(x) / sizeof((x)[0])))
25*834a2baaSAndroid Build Coastguard Worker #endif
26*834a2baaSAndroid Build Coastguard Worker
27*834a2baaSAndroid Build Coastguard Worker namespace minikin {
28*834a2baaSAndroid Build Coastguard Worker
29*834a2baaSAndroid Build Coastguard Worker const char* usHyph = "/system/usr/hyphen-data/hyph-en-us.hyb";
30*834a2baaSAndroid Build Coastguard Worker const char* ptHyph = "/system/usr/hyphen-data/hyph-pt.hyb";
31*834a2baaSAndroid Build Coastguard Worker const char* malayalamHyph = "/system/usr/hyphen-data/hyph-ml.hyb";
32*834a2baaSAndroid Build Coastguard Worker
33*834a2baaSAndroid Build Coastguard Worker const uint16_t HYPHEN_MINUS = 0x002D;
34*834a2baaSAndroid Build Coastguard Worker const uint16_t SOFT_HYPHEN = 0x00AD;
35*834a2baaSAndroid Build Coastguard Worker const uint16_t MIDDLE_DOT = 0x00B7;
36*834a2baaSAndroid Build Coastguard Worker const uint16_t GREEK_LOWER_ALPHA = 0x03B1;
37*834a2baaSAndroid Build Coastguard Worker const uint16_t ARMENIAN_AYB = 0x0531;
38*834a2baaSAndroid Build Coastguard Worker const uint16_t HEBREW_ALEF = 0x05D0;
39*834a2baaSAndroid Build Coastguard Worker const uint16_t ARABIC_ALEF = 0x0627;
40*834a2baaSAndroid Build Coastguard Worker const uint16_t ARABIC_BEH = 0x0628;
41*834a2baaSAndroid Build Coastguard Worker const uint16_t ARABIC_ZWARAKAY = 0x0659;
42*834a2baaSAndroid Build Coastguard Worker const uint16_t MALAYALAM_KA = 0x0D15;
43*834a2baaSAndroid Build Coastguard Worker const uint16_t UCAS_E = 0x1401;
44*834a2baaSAndroid Build Coastguard Worker const uint16_t HYPHEN = 0x2010;
45*834a2baaSAndroid Build Coastguard Worker const uint16_t EN_DASH = 0x2013;
46*834a2baaSAndroid Build Coastguard Worker
47*834a2baaSAndroid Build Coastguard Worker typedef std::function<Hyphenator*(const uint8_t*, size_t, size_t, size_t, const std::string&)>
48*834a2baaSAndroid Build Coastguard Worker Generator;
49*834a2baaSAndroid Build Coastguard Worker
50*834a2baaSAndroid Build Coastguard Worker class HyphenatorTest : public testing::TestWithParam<Generator> {};
51*834a2baaSAndroid Build Coastguard Worker
52*834a2baaSAndroid Build Coastguard Worker INSTANTIATE_TEST_SUITE_P(HyphenatorInstantiation, HyphenatorTest,
53*834a2baaSAndroid Build Coastguard Worker testing::Values(HyphenatorCXX::loadBinary, Hyphenator::loadBinaryForRust),
__anon3b361fc70102(const testing::TestParamInfo<HyphenatorTest::ParamType>& info) 54*834a2baaSAndroid Build Coastguard Worker [](const testing::TestParamInfo<HyphenatorTest::ParamType>& info) {
55*834a2baaSAndroid Build Coastguard Worker switch (info.index) {
56*834a2baaSAndroid Build Coastguard Worker case 0:
57*834a2baaSAndroid Build Coastguard Worker return "CXX";
58*834a2baaSAndroid Build Coastguard Worker case 1:
59*834a2baaSAndroid Build Coastguard Worker return "Rust";
60*834a2baaSAndroid Build Coastguard Worker default:
61*834a2baaSAndroid Build Coastguard Worker return "Unknown";
62*834a2baaSAndroid Build Coastguard Worker }
63*834a2baaSAndroid Build Coastguard Worker });
64*834a2baaSAndroid Build Coastguard Worker
65*834a2baaSAndroid Build Coastguard Worker // Simple test for US English. This tests "table", which happens to be the in the exceptions list.
TEST_P(HyphenatorTest,usEnglishAutomaticHyphenation)66*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, usEnglishAutomaticHyphenation) {
67*834a2baaSAndroid Build Coastguard Worker std::vector<uint8_t> patternData = readWholeFile(usHyph);
68*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(patternData.data(), patternData.size(), 2, 3, "en");
69*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'t', 'a', 'b', 'l', 'e'};
70*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
71*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
72*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)5, result.size());
73*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
74*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
75*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
76*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
77*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
78*834a2baaSAndroid Build Coastguard Worker }
79*834a2baaSAndroid Build Coastguard Worker
80*834a2baaSAndroid Build Coastguard Worker // Catalan l·l should break as l-/l
TEST_P(HyphenatorTest,catalanMiddleDot)81*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, catalanMiddleDot) {
82*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "ca");
83*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'l', 'l', MIDDLE_DOT, 'l', 'l'};
84*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
85*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
86*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)5, result.size());
87*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
88*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
89*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
90*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN, result[3]);
91*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
92*834a2baaSAndroid Build Coastguard Worker }
93*834a2baaSAndroid Build Coastguard Worker
94*834a2baaSAndroid Build Coastguard Worker // Catalan l·l should not break if the word is too short.
TEST_P(HyphenatorTest,catalanMiddleDotShortWord)95*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, catalanMiddleDotShortWord) {
96*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "ca");
97*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'l', MIDDLE_DOT, 'l'};
98*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
99*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
100*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
101*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
102*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
103*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
104*834a2baaSAndroid Build Coastguard Worker }
105*834a2baaSAndroid Build Coastguard Worker
106*834a2baaSAndroid Build Coastguard Worker // If we break on a hyphen in Polish, the hyphen should be repeated on the next line.
TEST_P(HyphenatorTest,polishHyphen)107*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, polishHyphen) {
108*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "pl");
109*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'x', HYPHEN, 'y'};
110*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
111*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
112*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
113*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
114*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
115*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE, result[2]);
116*834a2baaSAndroid Build Coastguard Worker }
117*834a2baaSAndroid Build Coastguard Worker
118*834a2baaSAndroid Build Coastguard Worker // If the language is Polish but the script is not Latin, don't use Polish rules for hyphenation.
TEST_P(HyphenatorTest,polishHyphenButNonLatinWord)119*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, polishHyphenButNonLatinWord) {
120*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "pl");
121*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {GREEK_LOWER_ALPHA, HYPHEN, GREEK_LOWER_ALPHA};
122*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
123*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
124*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
125*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
126*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
127*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
128*834a2baaSAndroid Build Coastguard Worker }
129*834a2baaSAndroid Build Coastguard Worker
130*834a2baaSAndroid Build Coastguard Worker // Polish en dash doesn't repeat on next line (as far as we know), but just provides a break
131*834a2baaSAndroid Build Coastguard Worker // opportunity.
TEST_P(HyphenatorTest,polishEnDash)132*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, polishEnDash) {
133*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "pl");
134*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'x', EN_DASH, 'y'};
135*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
136*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
137*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
138*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
139*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
140*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
141*834a2baaSAndroid Build Coastguard Worker }
142*834a2baaSAndroid Build Coastguard Worker
143*834a2baaSAndroid Build Coastguard Worker // If we break on a hyphen in Slovenian, the hyphen should be repeated on the next line. (Same as
144*834a2baaSAndroid Build Coastguard Worker // Polish.)
TEST_P(HyphenatorTest,slovenianHyphen)145*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, slovenianHyphen) {
146*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "sl");
147*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'x', HYPHEN, 'y'};
148*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
149*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
150*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
151*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
152*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
153*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE, result[2]);
154*834a2baaSAndroid Build Coastguard Worker }
155*834a2baaSAndroid Build Coastguard Worker
156*834a2baaSAndroid Build Coastguard Worker // In Latin script text, soft hyphens should insert a visible hyphen if broken at.
TEST_P(HyphenatorTest,latinSoftHyphen)157*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, latinSoftHyphen) {
158*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
159*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'x', SOFT_HYPHEN, 'y'};
160*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
161*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
162*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
163*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
164*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
165*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
166*834a2baaSAndroid Build Coastguard Worker }
167*834a2baaSAndroid Build Coastguard Worker
168*834a2baaSAndroid Build Coastguard Worker // Soft hyphens at the beginning of a word are not useful in linebreaking.
TEST_P(HyphenatorTest,latinSoftHyphenStartingTheWord)169*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, latinSoftHyphenStartingTheWord) {
170*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
171*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {SOFT_HYPHEN, 'y'};
172*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
173*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
174*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)2, result.size());
175*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
176*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
177*834a2baaSAndroid Build Coastguard Worker }
178*834a2baaSAndroid Build Coastguard Worker
179*834a2baaSAndroid Build Coastguard Worker // In Malayalam script text, soft hyphens should not insert a visible hyphen if broken at.
TEST_P(HyphenatorTest,malayalamSoftHyphen)180*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, malayalamSoftHyphen) {
181*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
182*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {MALAYALAM_KA, SOFT_HYPHEN, MALAYALAM_KA};
183*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
184*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
185*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
186*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
187*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
188*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
189*834a2baaSAndroid Build Coastguard Worker }
190*834a2baaSAndroid Build Coastguard Worker
191*834a2baaSAndroid Build Coastguard Worker // In automatically hyphenated Malayalam script text, we should not insert a visible hyphen.
TEST_P(HyphenatorTest,malayalamAutomaticHyphenation)192*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, malayalamAutomaticHyphenation) {
193*834a2baaSAndroid Build Coastguard Worker std::vector<uint8_t> patternData = readWholeFile(malayalamHyph);
194*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(patternData.data(), patternData.size(), 2, 2, "en");
195*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA};
196*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
197*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
198*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)5, result.size());
199*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
200*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
201*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
202*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[3]);
203*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
204*834a2baaSAndroid Build Coastguard Worker }
205*834a2baaSAndroid Build Coastguard Worker
206*834a2baaSAndroid Build Coastguard Worker // In Armenian script text, soft hyphens should insert an Armenian hyphen if broken at.
TEST_P(HyphenatorTest,aremenianSoftHyphen)207*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, aremenianSoftHyphen) {
208*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
209*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {ARMENIAN_AYB, SOFT_HYPHEN, ARMENIAN_AYB};
210*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
211*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
212*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
213*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
214*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
215*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN, result[2]);
216*834a2baaSAndroid Build Coastguard Worker }
217*834a2baaSAndroid Build Coastguard Worker
218*834a2baaSAndroid Build Coastguard Worker // In Hebrew script text, soft hyphens should insert a normal hyphen if broken at, for now.
219*834a2baaSAndroid Build Coastguard Worker // We may need to change this to maqaf later.
TEST_P(HyphenatorTest,hebrewSoftHyphen)220*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, hebrewSoftHyphen) {
221*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
222*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {HEBREW_ALEF, SOFT_HYPHEN, HEBREW_ALEF};
223*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
224*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
225*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
226*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
227*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
228*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
229*834a2baaSAndroid Build Coastguard Worker }
230*834a2baaSAndroid Build Coastguard Worker
231*834a2baaSAndroid Build Coastguard Worker // Soft hyphen between two Arabic letters that join should keep the joining
232*834a2baaSAndroid Build Coastguard Worker // behavior when broken across lines.
TEST_P(HyphenatorTest,arabicSoftHyphenConnecting)233*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, arabicSoftHyphenConnecting) {
234*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
235*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {ARABIC_BEH, SOFT_HYPHEN, ARABIC_BEH};
236*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
237*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
238*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
239*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
240*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
241*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[2]);
242*834a2baaSAndroid Build Coastguard Worker }
243*834a2baaSAndroid Build Coastguard Worker
244*834a2baaSAndroid Build Coastguard Worker // Arabic letters may be joining on one side, but if it's the wrong side, we
245*834a2baaSAndroid Build Coastguard Worker // should use the normal hyphen.
TEST_P(HyphenatorTest,arabicSoftHyphenNonConnecting)246*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, arabicSoftHyphenNonConnecting) {
247*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
248*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {ARABIC_ALEF, SOFT_HYPHEN, ARABIC_BEH};
249*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
250*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
251*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
252*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
253*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
254*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
255*834a2baaSAndroid Build Coastguard Worker }
256*834a2baaSAndroid Build Coastguard Worker
257*834a2baaSAndroid Build Coastguard Worker // Skip transparent characters until you find a non-transparent one.
TEST_P(HyphenatorTest,arabicSoftHyphenSkipTransparents)258*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, arabicSoftHyphenSkipTransparents) {
259*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
260*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH};
261*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
262*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
263*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)5, result.size());
264*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
265*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
266*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
267*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[3]);
268*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
269*834a2baaSAndroid Build Coastguard Worker }
270*834a2baaSAndroid Build Coastguard Worker
271*834a2baaSAndroid Build Coastguard Worker // Skip transparent characters until you find a non-transparent one. If we get to one end without
272*834a2baaSAndroid Build Coastguard Worker // finding anything, we are still non-joining.
TEST_P(HyphenatorTest,arabicSoftHyphenTransparentsAtEnd)273*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, arabicSoftHyphenTransparentsAtEnd) {
274*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
275*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY};
276*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
277*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
278*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)4, result.size());
279*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
280*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
281*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
282*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[3]);
283*834a2baaSAndroid Build Coastguard Worker }
284*834a2baaSAndroid Build Coastguard Worker
285*834a2baaSAndroid Build Coastguard Worker // Skip transparent characters until you find a non-transparent one. If we get to one end without
286*834a2baaSAndroid Build Coastguard Worker // finding anything, we are still non-joining.
TEST_P(HyphenatorTest,arabicSoftHyphenTransparentsAtStart)287*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, arabicSoftHyphenTransparentsAtStart) {
288*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
289*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH};
290*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
291*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
292*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)4, result.size());
293*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
294*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
295*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
296*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
297*834a2baaSAndroid Build Coastguard Worker }
298*834a2baaSAndroid Build Coastguard Worker
299*834a2baaSAndroid Build Coastguard Worker // In Unified Canadian Aboriginal script (UCAS) text, soft hyphens should insert a UCAS hyphen.
TEST_P(HyphenatorTest,ucasSoftHyphen)300*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, ucasSoftHyphen) {
301*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
302*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {UCAS_E, SOFT_HYPHEN, UCAS_E};
303*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
304*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
305*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
306*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
307*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
308*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]);
309*834a2baaSAndroid Build Coastguard Worker }
310*834a2baaSAndroid Build Coastguard Worker
311*834a2baaSAndroid Build Coastguard Worker // Presently, soft hyphen looks at the character after it to determine hyphenation type. This is a
312*834a2baaSAndroid Build Coastguard Worker // little arbitrary, but let's test it anyway.
TEST_P(HyphenatorTest,mixedScriptSoftHyphen)313*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, mixedScriptSoftHyphen) {
314*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
315*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'a', SOFT_HYPHEN, UCAS_E};
316*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
317*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
318*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
319*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
320*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
321*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]);
322*834a2baaSAndroid Build Coastguard Worker }
323*834a2baaSAndroid Build Coastguard Worker
324*834a2baaSAndroid Build Coastguard Worker // Hard hyphens provide a breaking opportunity with nothing extra inserted.
TEST_P(HyphenatorTest,hardHyphen)325*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, hardHyphen) {
326*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
327*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'x', HYPHEN, 'y'};
328*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
329*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
330*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
331*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
332*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
333*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
334*834a2baaSAndroid Build Coastguard Worker }
335*834a2baaSAndroid Build Coastguard Worker
336*834a2baaSAndroid Build Coastguard Worker // Hyphen-minuses also provide a breaking opportunity with nothing extra inserted.
TEST_P(HyphenatorTest,hyphenMinus)337*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, hyphenMinus) {
338*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(nullptr, 0, 2, 2, "en");
339*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'x', HYPHEN_MINUS, 'y'};
340*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
341*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
342*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)3, result.size());
343*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
344*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
345*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
346*834a2baaSAndroid Build Coastguard Worker }
347*834a2baaSAndroid Build Coastguard Worker
348*834a2baaSAndroid Build Coastguard Worker // If the word starts with a hard hyphen or hyphen-minus, it doesn't make sense to break
349*834a2baaSAndroid Build Coastguard Worker // it at that point.
TEST_P(HyphenatorTest,startingHyphenMinus)350*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, startingHyphenMinus) {
351*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 0, 2, 2, "en");
352*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {HYPHEN_MINUS, 'y'};
353*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
354*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
355*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)2, result.size());
356*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
357*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
358*834a2baaSAndroid Build Coastguard Worker }
359*834a2baaSAndroid Build Coastguard Worker
TEST_P(HyphenatorTest,hyphenationWithHyphen)360*834a2baaSAndroid Build Coastguard Worker TEST_P(HyphenatorTest, hyphenationWithHyphen) {
361*834a2baaSAndroid Build Coastguard Worker std::vector<uint8_t> patternData = readWholeFile(ptHyph);
362*834a2baaSAndroid Build Coastguard Worker Hyphenator* hyphenator = GetParam()(patternData.data(), patternData.size(), 2, 3, "pt");
363*834a2baaSAndroid Build Coastguard Worker const uint16_t word[] = {'b', 'o', 'a', 's', '-', 'v', 'i', 'n', 'd', 'a', 's'};
364*834a2baaSAndroid Build Coastguard Worker std::vector<HyphenationType> result;
365*834a2baaSAndroid Build Coastguard Worker hyphenator->hyphenate(word, &result);
366*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ((size_t)11, result.size());
367*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
368*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
369*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
370*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
371*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[4]);
372*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[5]);
373*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[6]);
374*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[7]);
375*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[8]);
376*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[9]);
377*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[10]);
378*834a2baaSAndroid Build Coastguard Worker EXPECT_EQ(HyphenationType::DONT_BREAK, result[11]);
379*834a2baaSAndroid Build Coastguard Worker }
380*834a2baaSAndroid Build Coastguard Worker
381*834a2baaSAndroid Build Coastguard Worker } // namespace minikin
382