xref: /aosp_15_r20/frameworks/minikin/tests/unittest/GraphemeBreakTests.cpp (revision 834a2baab5fdfc28e9a428ee87c7ea8f6a06a53d)
1*834a2baaSAndroid Build Coastguard Worker /*
2*834a2baaSAndroid Build Coastguard Worker  * Copyright (C) 2015 The Android Open Source Project
3*834a2baaSAndroid Build Coastguard Worker  *
4*834a2baaSAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*834a2baaSAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*834a2baaSAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*834a2baaSAndroid Build Coastguard Worker  *
8*834a2baaSAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*834a2baaSAndroid Build Coastguard Worker  *
10*834a2baaSAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*834a2baaSAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*834a2baaSAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*834a2baaSAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*834a2baaSAndroid Build Coastguard Worker  * limitations under the License.
15*834a2baaSAndroid Build Coastguard Worker  */
16*834a2baaSAndroid Build Coastguard Worker 
17*834a2baaSAndroid Build Coastguard Worker #include "minikin/GraphemeBreak.h"
18*834a2baaSAndroid Build Coastguard Worker 
19*834a2baaSAndroid Build Coastguard Worker #include <vector>
20*834a2baaSAndroid Build Coastguard Worker 
21*834a2baaSAndroid Build Coastguard Worker #include <gtest/gtest.h>
22*834a2baaSAndroid Build Coastguard Worker 
23*834a2baaSAndroid Build Coastguard Worker #include "UnicodeUtils.h"
24*834a2baaSAndroid Build Coastguard Worker 
25*834a2baaSAndroid Build Coastguard Worker namespace minikin {
26*834a2baaSAndroid Build Coastguard Worker 
IsBreak(const char * src)27*834a2baaSAndroid Build Coastguard Worker bool IsBreak(const char* src) {
28*834a2baaSAndroid Build Coastguard Worker     const size_t BUF_SIZE = 256;
29*834a2baaSAndroid Build Coastguard Worker     uint16_t buf[BUF_SIZE];
30*834a2baaSAndroid Build Coastguard Worker     size_t offset;
31*834a2baaSAndroid Build Coastguard Worker     size_t size;
32*834a2baaSAndroid Build Coastguard Worker     ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
33*834a2baaSAndroid Build Coastguard Worker     return GraphemeBreak::isGraphemeBreak(nullptr, buf, 0, size, offset);
34*834a2baaSAndroid Build Coastguard Worker }
35*834a2baaSAndroid Build Coastguard Worker 
IsBreakWithAdvances(const float * advances,const char * src)36*834a2baaSAndroid Build Coastguard Worker bool IsBreakWithAdvances(const float* advances, const char* src) {
37*834a2baaSAndroid Build Coastguard Worker     const size_t BUF_SIZE = 256;
38*834a2baaSAndroid Build Coastguard Worker     uint16_t buf[BUF_SIZE];
39*834a2baaSAndroid Build Coastguard Worker     size_t offset;
40*834a2baaSAndroid Build Coastguard Worker     size_t size;
41*834a2baaSAndroid Build Coastguard Worker     ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
42*834a2baaSAndroid Build Coastguard Worker     return GraphemeBreak::isGraphemeBreak(advances, buf, 0, size, offset);
43*834a2baaSAndroid Build Coastguard Worker }
44*834a2baaSAndroid Build Coastguard Worker 
TEST(GraphemeBreak,utf16)45*834a2baaSAndroid Build Coastguard Worker TEST(GraphemeBreak, utf16) {
46*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+D83C | U+DC31"));  // emoji, U+1F431
47*834a2baaSAndroid Build Coastguard Worker 
48*834a2baaSAndroid Build Coastguard Worker     // tests for invalid UTF-16
49*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+D800 | U+D800"));  // two leading surrogates
50*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+DC00 | U+DC00"));  // two trailing surrogates
51*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+D800"));     // lonely leading surrogate
52*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+DC00 | 'a'"));     // lonely trailing surrogate
53*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+D800 | 'a'"));     // leading surrogate followed by non-surrogate
54*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+DC00"));     // non-surrogate followed by trailing surrogate
55*834a2baaSAndroid Build Coastguard Worker }
56*834a2baaSAndroid Build Coastguard Worker 
TEST(GraphemeBreak,rules)57*834a2baaSAndroid Build Coastguard Worker TEST(GraphemeBreak, rules) {
58*834a2baaSAndroid Build Coastguard Worker     // Rule GB1, sot ÷; Rule GB2, ÷ eot
59*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("| 'a'"));
60*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' |"));
61*834a2baaSAndroid Build Coastguard Worker 
62*834a2baaSAndroid Build Coastguard Worker     // Rule GB3, CR x LF
63*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+000D | U+000A"));  // CR x LF
64*834a2baaSAndroid Build Coastguard Worker 
65*834a2baaSAndroid Build Coastguard Worker     // Rule GB4, (Control | CR | LF) ÷
66*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+2028"));  // Line separator
67*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+000D"));  // LF
68*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+000A"));  // CR
69*834a2baaSAndroid Build Coastguard Worker 
70*834a2baaSAndroid Build Coastguard Worker     // Rule GB5, ÷ (Control | CR | LF)
71*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+2028 | 'a'"));  // Line separator
72*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+000D | 'a'"));  // LF
73*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+000A | 'a'"));  // CR
74*834a2baaSAndroid Build Coastguard Worker 
75*834a2baaSAndroid Build Coastguard Worker     // Rule GB6, L x ( L | V | LV | LVT )
76*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1100 | U+1100"));  // L x L
77*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1100 | U+1161"));  // L x V
78*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1100 | U+AC00"));  // L x LV
79*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1100 | U+AC01"));  // L x LVT
80*834a2baaSAndroid Build Coastguard Worker 
81*834a2baaSAndroid Build Coastguard Worker     // Rule GB7, ( LV | V ) x ( V | T )
82*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+AC00 | U+1161"));  // LV x V
83*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1161 | U+1161"));  // V x V
84*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+AC00 | U+11A8"));  // LV x T
85*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1161 | U+11A8"));  // V x T
86*834a2baaSAndroid Build Coastguard Worker 
87*834a2baaSAndroid Build Coastguard Worker     // Rule GB8, ( LVT | T ) x T
88*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+AC01 | U+11A8"));  // LVT x T
89*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+11A8 | U+11A8"));  // T x T
90*834a2baaSAndroid Build Coastguard Worker 
91*834a2baaSAndroid Build Coastguard Worker     // Other hangul pairs not counted above _are_ breaks (GB10)
92*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+AC00 | U+1100"));  // LV x L
93*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+AC01 | U+1100"));  // LVT x L
94*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+11A8 | U+1100"));  // T x L
95*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+11A8 | U+AC00"));  // T x LV
96*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+11A8 | U+AC01"));  // T x LVT
97*834a2baaSAndroid Build Coastguard Worker 
98*834a2baaSAndroid Build Coastguard Worker     // Rule GB12 and Rule GB13, Regional_Indicator x Regional_Indicator
99*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8"));
100*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8"));   // Regional indicator pair (flag)
101*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
102*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8"));  // Regional indicator pair (flag)
103*834a2baaSAndroid Build Coastguard Worker 
104*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA"));   // Regional indicator pair (flag)
105*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
106*834a2baaSAndroid Build Coastguard Worker     // Same case as the two above, knowing that the first two characters ligate, which is what
107*834a2baaSAndroid Build Coastguard Worker     // would typically happen.
108*834a2baaSAndroid Build Coastguard Worker     const float firstPairLigated[] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0};  // Two entries per codepoint
109*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA"));
110*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA"));
111*834a2baaSAndroid Build Coastguard Worker     // Repeat the tests, But now the font doesn't have a ligature for the first two characters,
112*834a2baaSAndroid Build Coastguard Worker     // while it does have a ligature for the last two. This could happen for fonts that do not
113*834a2baaSAndroid Build Coastguard Worker     // support some (potentially encoded later than they were developed) flags.
114*834a2baaSAndroid Build Coastguard Worker     const float secondPairLigated[] = {1.0, 0.0, 1.0, 0.0, 0.0, 0.0};
115*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA"));
116*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA"));
117*834a2baaSAndroid Build Coastguard Worker 
118*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA"));   // Regional indicator pair (flag)
119*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
120*834a2baaSAndroid Build Coastguard Worker 
121*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(
122*834a2baaSAndroid Build Coastguard Worker             IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
123*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(
124*834a2baaSAndroid Build Coastguard Worker             IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
125*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(
126*834a2baaSAndroid Build Coastguard Worker             IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8"));  // Regional indicator pair (flag)
127*834a2baaSAndroid Build Coastguard Worker 
128*834a2baaSAndroid Build Coastguard Worker     // Rule GB9, x (Extend | ZWJ)
129*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' | U+0301"));  // combining accent
130*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' | U+200D"));  // ZWJ
131*834a2baaSAndroid Build Coastguard Worker     // Rule GB9a, x SpacingMark
132*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+0915 | U+093E"));  // KA, AA (spacing mark)
133*834a2baaSAndroid Build Coastguard Worker     // Rule GB9b, Prepend x
134*834a2baaSAndroid Build Coastguard Worker     // see tailoring test for prepend, as current ICU doesn't have any characters in the class
135*834a2baaSAndroid Build Coastguard Worker 
136*834a2baaSAndroid Build Coastguard Worker     // Rule GB999, Any ÷ Any
137*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | 'b'"));
138*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'f' | 'i'"));              // probable ligature
139*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+0644 | U+0627"));        // probable ligature, lam + alef
140*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+4E00 | U+4E00"));        // CJK ideographs
141*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
142*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | 'a'"));  // Regional indicator pair (flag)
143*834a2baaSAndroid Build Coastguard Worker 
144*834a2baaSAndroid Build Coastguard Worker     // Extended rule for emoji tag sequence.
145*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+1F3F4 'a'"));
146*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' U+1F3F4 | 'a'"));
147*834a2baaSAndroid Build Coastguard Worker 
148*834a2baaSAndroid Build Coastguard Worker     // Immediate tag_term after tag_base.
149*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E007F 'a'"));
150*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F3F4 | U+E007F"));
151*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' U+1F3F4 U+E007F | 'a'"));
152*834a2baaSAndroid Build Coastguard Worker 
153*834a2baaSAndroid Build Coastguard Worker     // Flag sequence
154*834a2baaSAndroid Build Coastguard Worker     // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag sequence for the flag
155*834a2baaSAndroid Build Coastguard Worker     // of Scotland.
156*834a2baaSAndroid Build Coastguard Worker     // U+1F3F4 is WAVING BLACK FLAG. This can be a tag_base character.
157*834a2baaSAndroid Build Coastguard Worker     // U+E0067 is TAG LATIN SMALL LETTER G. This can be a part of tag_spec.
158*834a2baaSAndroid Build Coastguard Worker     // U+E0062 is TAG LATIN SMALL LETTER B. This can be a part of tag_spec.
159*834a2baaSAndroid Build Coastguard Worker     // U+E0073 is TAG LATIN SMALL LETTER S. This can be a part of tag_spec.
160*834a2baaSAndroid Build Coastguard Worker     // U+E0063 is TAG LATIN SMALL LETTER C. This can be a part of tag_spec.
161*834a2baaSAndroid Build Coastguard Worker     // U+E0074 is TAG LATIN SMALL LETTER T. This can be a part of tag_spec.
162*834a2baaSAndroid Build Coastguard Worker     // U+E007F is CANCEL TAG. This is a tag_term character.
163*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
164*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F3F4 | U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
165*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 | U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
166*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 | U+E0073 U+E0063 U+E0074 U+E007F"));
167*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 | U+E0063 U+E0074 U+E007F"));
168*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 | U+E0074 U+E007F"));
169*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 | U+E007F"));
170*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F | 'a'"));
171*834a2baaSAndroid Build Coastguard Worker }
172*834a2baaSAndroid Build Coastguard Worker 
TEST(GraphemeBreak,tailoring)173*834a2baaSAndroid Build Coastguard Worker TEST(GraphemeBreak, tailoring) {
174*834a2baaSAndroid Build Coastguard Worker     // control characters that we interpret as "extend"
175*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' | U+00AD"));   // soft hyphen
176*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' | U+200B"));   // zwsp
177*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' | U+200E"));   // lrm
178*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' | U+202A"));   // lre
179*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("'a' | U+E0041"));  // tag character
180*834a2baaSAndroid Build Coastguard Worker 
181*834a2baaSAndroid Build Coastguard Worker     // UTC-approved characters for the Prepend class
182*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+06DD | U+0661"));  // arabic subtending mark + digit one
183*834a2baaSAndroid Build Coastguard Worker 
184*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+0E01 | U+0E33"));  // Thai sara am
185*834a2baaSAndroid Build Coastguard Worker 
186*834a2baaSAndroid Build Coastguard Worker     // virama is not a grapheme break, but "pure killer" is
187*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
188*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
189*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
190*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+0E01 U+0E3A | U+0E01"));   // thai phinthu = pure killer
191*834a2baaSAndroid Build Coastguard Worker 
192*834a2baaSAndroid Build Coastguard Worker     // Repetition of above tests, but with a given advances array that implies everything
193*834a2baaSAndroid Build Coastguard Worker     // became just one cluster.
194*834a2baaSAndroid Build Coastguard Worker     const float conjoined[] = {1.0, 0.0, 0.0};
195*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(conjoined,
196*834a2baaSAndroid Build Coastguard Worker                                      "U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
197*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(conjoined,
198*834a2baaSAndroid Build Coastguard Worker                                      "U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
199*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(conjoined,
200*834a2baaSAndroid Build Coastguard Worker                                      "U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
201*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(conjoined,
202*834a2baaSAndroid Build Coastguard Worker                                     "U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
203*834a2baaSAndroid Build Coastguard Worker 
204*834a2baaSAndroid Build Coastguard Worker     // Repetition of above tests, but with a given advances array that the virama did not
205*834a2baaSAndroid Build Coastguard Worker     // form a cluster with the following consonant. The difference is that there is now
206*834a2baaSAndroid Build Coastguard Worker     // a grapheme break after the virama in ka+virama+ka.
207*834a2baaSAndroid Build Coastguard Worker     const float separate[] = {1.0, 0.0, 1.0};
208*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(separate,
209*834a2baaSAndroid Build Coastguard Worker                                      "U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
210*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(separate,
211*834a2baaSAndroid Build Coastguard Worker                                     "U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
212*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(separate,
213*834a2baaSAndroid Build Coastguard Worker                                      "U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
214*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(separate,
215*834a2baaSAndroid Build Coastguard Worker                                     "U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
216*834a2baaSAndroid Build Coastguard Worker 
217*834a2baaSAndroid Build Coastguard Worker     // suppress grapheme breaks in zwj emoji sequences
218*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468"));
219*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D | U+1F48B U+200D U+1F468"));
220*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D | U+1F468"));
221*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F468 U+200D | U+1F469 U+200D U+1F466"));
222*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F468 U+200D U+1F469 U+200D | U+1F466"));
223*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F469 U+200D U+1F467 U+200D U+1F466"));
224*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D | U+1F467 U+200D U+1F466"));
225*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466"));
226*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8"));
227*834a2baaSAndroid Build Coastguard Worker 
228*834a2baaSAndroid Build Coastguard Worker     // Do not break before and after zwj with all kind of emoji characters.
229*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464"));
230*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464"));
231*834a2baaSAndroid Build Coastguard Worker 
232*834a2baaSAndroid Build Coastguard Worker     // ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve the break
233*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764"));
234*834a2baaSAndroid Build Coastguard Worker }
235*834a2baaSAndroid Build Coastguard Worker 
TEST(GraphemeBreak,emojiModifiers)236*834a2baaSAndroid Build Coastguard Worker TEST(GraphemeBreak, emojiModifiers) {
237*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+261D | U+1F3FB"));   // white up pointing index + modifier
238*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+270C | U+1F3FB"));   // victory hand + modifier
239*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB"));  // boy + modifier
240*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC"));  // boy + modifier
241*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD"));  // boy + modifier
242*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE"));  // boy + modifier
243*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF"));  // boy + modifier
244*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF"));  // sign of the horns + modifier
245*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF"));  // selfie (Unicode 9) + modifier
246*834a2baaSAndroid Build Coastguard Worker     // Reptition of the tests above, with the knowledge that they are ligated.
247*834a2baaSAndroid Build Coastguard Worker     const float ligated1_2[] = {1.0, 0.0, 0.0};
248*834a2baaSAndroid Build Coastguard Worker     const float ligated2_2[] = {1.0, 0.0, 0.0, 0.0};
249*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+261D | U+1F3FB"));
250*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+270C | U+1F3FB"));
251*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FB"));
252*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FC"));
253*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FD"));
254*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FE"));
255*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FF"));
256*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F918 | U+1F3FF"));
257*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F933 | U+1F3FF"));
258*834a2baaSAndroid Build Coastguard Worker     // Reptition of the tests above, with the knowledge that they are not ligated.
259*834a2baaSAndroid Build Coastguard Worker     const float unligated1_2[] = {1.0, 1.0, 0.0};
260*834a2baaSAndroid Build Coastguard Worker     const float unligated2_2[] = {1.0, 0.0, 1.0, 0.0};
261*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+261D | U+1F3FB"));
262*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+270C | U+1F3FB"));
263*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FB"));
264*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FC"));
265*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FD"));
266*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FE"));
267*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FF"));
268*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F918 | U+1F3FF"));
269*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F933 | U+1F3FF"));
270*834a2baaSAndroid Build Coastguard Worker 
271*834a2baaSAndroid Build Coastguard Worker     // adding extend characters between emoji base and modifier doesn't affect grapheme cluster
272*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+270C U+FE0E | U+1F3FB"));  // victory hand + text style + modifier
273*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+270C U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
274*834a2baaSAndroid Build Coastguard Worker     // Reptition of the two tests above, with the knowledge that they are ligated.
275*834a2baaSAndroid Build Coastguard Worker     const float ligated1_1_2[] = {1.0, 0.0, 0.0, 0.0};
276*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0E | U+1F3FB"));
277*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0F | U+1F3FB"));
278*834a2baaSAndroid Build Coastguard Worker     // Reptition of the first two tests, with the knowledge that they are not ligated.
279*834a2baaSAndroid Build Coastguard Worker     const float unligated1_1_2[] = {1.0, 0.0, 1.0, 0.0};
280*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0E | U+1F3FB"));
281*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0F | U+1F3FB"));
282*834a2baaSAndroid Build Coastguard Worker 
283*834a2baaSAndroid Build Coastguard Worker     // rat is not an emoji modifer
284*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreak("U+1F466 | U+1F400"));  // boy + rat
285*834a2baaSAndroid Build Coastguard Worker }
286*834a2baaSAndroid Build Coastguard Worker 
TEST(GraphemeBreak,genderBalancedEmoji)287*834a2baaSAndroid Build Coastguard Worker TEST(GraphemeBreak, genderBalancedEmoji) {
288*834a2baaSAndroid Build Coastguard Worker     // U+1F469 is WOMAN, U+200D is ZWJ, U+1F4BC is BRIEFCASE.
289*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+1F4BC"));
290*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F4BC"));
291*834a2baaSAndroid Build Coastguard Worker     // The above two cases, when the ligature is not supported in the font. We now expect a break
292*834a2baaSAndroid Build Coastguard Worker     // between them.
293*834a2baaSAndroid Build Coastguard Worker     const float unligated2_1_2[] = {1.0, 0.0, 0.0, 1.0, 0.0};
294*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 | U+200D U+1F4BC"));
295*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 U+200D | U+1F4BC"));
296*834a2baaSAndroid Build Coastguard Worker 
297*834a2baaSAndroid Build Coastguard Worker     // U+2695 has now emoji property, so should be part of ZWJ sequence.
298*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+2695"));
299*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2695"));
300*834a2baaSAndroid Build Coastguard Worker     // The above two cases, when the ligature is not supported in the font. We now expect a break
301*834a2baaSAndroid Build Coastguard Worker     // between them.
302*834a2baaSAndroid Build Coastguard Worker     const float unligated2_1_1[] = {1.0, 0.0, 0.0, 1.0};
303*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 | U+200D U+2695"));
304*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 U+200D | U+2695"));
305*834a2baaSAndroid Build Coastguard Worker }
306*834a2baaSAndroid Build Coastguard Worker 
TEST(GraphemeBreak,offsets)307*834a2baaSAndroid Build Coastguard Worker TEST(GraphemeBreak, offsets) {
308*834a2baaSAndroid Build Coastguard Worker     uint16_t string[] = {0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301};
309*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 2));
310*834a2baaSAndroid Build Coastguard Worker     EXPECT_FALSE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 3));
311*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 4));
312*834a2baaSAndroid Build Coastguard Worker     EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 5));
313*834a2baaSAndroid Build Coastguard Worker }
314*834a2baaSAndroid Build Coastguard Worker 
TEST(GraphemeBreak,startWithZWJ)315*834a2baaSAndroid Build Coastguard Worker TEST(GraphemeBreak, startWithZWJ) {
316*834a2baaSAndroid Build Coastguard Worker     // It used to be looking before the ZWJ char even if it is the start of the text.
317*834a2baaSAndroid Build Coastguard Worker     IsBreak("U+200D | U+1F5E8");  // UB sanitizer will catch if minikin looks the char before ZWJ
318*834a2baaSAndroid Build Coastguard Worker }
319*834a2baaSAndroid Build Coastguard Worker 
320*834a2baaSAndroid Build Coastguard Worker }  // namespace minikin
321