1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1999-2016, International Business Machines
7*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker * file name: scrptrun.cpp
11*0e209d39SAndroid Build Coastguard Worker *
12*0e209d39SAndroid Build Coastguard Worker * created on: 10/17/2001
13*0e209d39SAndroid Build Coastguard Worker * created by: Eric R. Mader
14*0e209d39SAndroid Build Coastguard Worker */
15*0e209d39SAndroid Build Coastguard Worker
16*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/uscript.h"
18*0e209d39SAndroid Build Coastguard Worker
19*0e209d39SAndroid Build Coastguard Worker #include "cmemory.h"
20*0e209d39SAndroid Build Coastguard Worker #include "scrptrun.h"
21*0e209d39SAndroid Build Coastguard Worker
22*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
23*0e209d39SAndroid Build Coastguard Worker
24*0e209d39SAndroid Build Coastguard Worker const char ScriptRun::fgClassID=0;
25*0e209d39SAndroid Build Coastguard Worker
26*0e209d39SAndroid Build Coastguard Worker UChar32 ScriptRun::pairedChars[] = {
27*0e209d39SAndroid Build Coastguard Worker 0x0028, 0x0029, // ascii paired punctuation
28*0e209d39SAndroid Build Coastguard Worker 0x003c, 0x003e,
29*0e209d39SAndroid Build Coastguard Worker 0x005b, 0x005d,
30*0e209d39SAndroid Build Coastguard Worker 0x007b, 0x007d,
31*0e209d39SAndroid Build Coastguard Worker 0x00ab, 0x00bb, // guillemets
32*0e209d39SAndroid Build Coastguard Worker 0x2018, 0x2019, // general punctuation
33*0e209d39SAndroid Build Coastguard Worker 0x201c, 0x201d,
34*0e209d39SAndroid Build Coastguard Worker 0x2039, 0x203a,
35*0e209d39SAndroid Build Coastguard Worker 0x3008, 0x3009, // chinese paired punctuation
36*0e209d39SAndroid Build Coastguard Worker 0x300a, 0x300b,
37*0e209d39SAndroid Build Coastguard Worker 0x300c, 0x300d,
38*0e209d39SAndroid Build Coastguard Worker 0x300e, 0x300f,
39*0e209d39SAndroid Build Coastguard Worker 0x3010, 0x3011,
40*0e209d39SAndroid Build Coastguard Worker 0x3014, 0x3015,
41*0e209d39SAndroid Build Coastguard Worker 0x3016, 0x3017,
42*0e209d39SAndroid Build Coastguard Worker 0x3018, 0x3019,
43*0e209d39SAndroid Build Coastguard Worker 0x301a, 0x301b
44*0e209d39SAndroid Build Coastguard Worker };
45*0e209d39SAndroid Build Coastguard Worker
46*0e209d39SAndroid Build Coastguard Worker const int32_t ScriptRun::pairedCharCount = UPRV_LENGTHOF(pairedChars);
47*0e209d39SAndroid Build Coastguard Worker const int32_t ScriptRun::pairedCharPower = 1 << highBit(pairedCharCount);
48*0e209d39SAndroid Build Coastguard Worker const int32_t ScriptRun::pairedCharExtra = pairedCharCount - pairedCharPower;
49*0e209d39SAndroid Build Coastguard Worker
highBit(int32_t value)50*0e209d39SAndroid Build Coastguard Worker int8_t ScriptRun::highBit(int32_t value)
51*0e209d39SAndroid Build Coastguard Worker {
52*0e209d39SAndroid Build Coastguard Worker if (value <= 0) {
53*0e209d39SAndroid Build Coastguard Worker return -32;
54*0e209d39SAndroid Build Coastguard Worker }
55*0e209d39SAndroid Build Coastguard Worker
56*0e209d39SAndroid Build Coastguard Worker int8_t bit = 0;
57*0e209d39SAndroid Build Coastguard Worker
58*0e209d39SAndroid Build Coastguard Worker if (value >= 1 << 16) {
59*0e209d39SAndroid Build Coastguard Worker value >>= 16;
60*0e209d39SAndroid Build Coastguard Worker bit += 16;
61*0e209d39SAndroid Build Coastguard Worker }
62*0e209d39SAndroid Build Coastguard Worker
63*0e209d39SAndroid Build Coastguard Worker if (value >= 1 << 8) {
64*0e209d39SAndroid Build Coastguard Worker value >>= 8;
65*0e209d39SAndroid Build Coastguard Worker bit += 8;
66*0e209d39SAndroid Build Coastguard Worker }
67*0e209d39SAndroid Build Coastguard Worker
68*0e209d39SAndroid Build Coastguard Worker if (value >= 1 << 4) {
69*0e209d39SAndroid Build Coastguard Worker value >>= 4;
70*0e209d39SAndroid Build Coastguard Worker bit += 4;
71*0e209d39SAndroid Build Coastguard Worker }
72*0e209d39SAndroid Build Coastguard Worker
73*0e209d39SAndroid Build Coastguard Worker if (value >= 1 << 2) {
74*0e209d39SAndroid Build Coastguard Worker value >>= 2;
75*0e209d39SAndroid Build Coastguard Worker bit += 2;
76*0e209d39SAndroid Build Coastguard Worker }
77*0e209d39SAndroid Build Coastguard Worker
78*0e209d39SAndroid Build Coastguard Worker if (value >= 1 << 1) {
79*0e209d39SAndroid Build Coastguard Worker value >>= 1;
80*0e209d39SAndroid Build Coastguard Worker bit += 1;
81*0e209d39SAndroid Build Coastguard Worker }
82*0e209d39SAndroid Build Coastguard Worker
83*0e209d39SAndroid Build Coastguard Worker return bit;
84*0e209d39SAndroid Build Coastguard Worker }
85*0e209d39SAndroid Build Coastguard Worker
getPairIndex(UChar32 ch)86*0e209d39SAndroid Build Coastguard Worker int32_t ScriptRun::getPairIndex(UChar32 ch)
87*0e209d39SAndroid Build Coastguard Worker {
88*0e209d39SAndroid Build Coastguard Worker int32_t probe = pairedCharPower;
89*0e209d39SAndroid Build Coastguard Worker int32_t index = 0;
90*0e209d39SAndroid Build Coastguard Worker
91*0e209d39SAndroid Build Coastguard Worker if (ch >= pairedChars[pairedCharExtra]) {
92*0e209d39SAndroid Build Coastguard Worker index = pairedCharExtra;
93*0e209d39SAndroid Build Coastguard Worker }
94*0e209d39SAndroid Build Coastguard Worker
95*0e209d39SAndroid Build Coastguard Worker while (probe > (1 << 0)) {
96*0e209d39SAndroid Build Coastguard Worker probe >>= 1;
97*0e209d39SAndroid Build Coastguard Worker
98*0e209d39SAndroid Build Coastguard Worker if (ch >= pairedChars[index + probe]) {
99*0e209d39SAndroid Build Coastguard Worker index += probe;
100*0e209d39SAndroid Build Coastguard Worker }
101*0e209d39SAndroid Build Coastguard Worker }
102*0e209d39SAndroid Build Coastguard Worker
103*0e209d39SAndroid Build Coastguard Worker if (pairedChars[index] != ch) {
104*0e209d39SAndroid Build Coastguard Worker index = -1;
105*0e209d39SAndroid Build Coastguard Worker }
106*0e209d39SAndroid Build Coastguard Worker
107*0e209d39SAndroid Build Coastguard Worker return index;
108*0e209d39SAndroid Build Coastguard Worker }
109*0e209d39SAndroid Build Coastguard Worker
sameScript(int32_t scriptOne,int32_t scriptTwo)110*0e209d39SAndroid Build Coastguard Worker UBool ScriptRun::sameScript(int32_t scriptOne, int32_t scriptTwo)
111*0e209d39SAndroid Build Coastguard Worker {
112*0e209d39SAndroid Build Coastguard Worker return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
113*0e209d39SAndroid Build Coastguard Worker }
114*0e209d39SAndroid Build Coastguard Worker
next()115*0e209d39SAndroid Build Coastguard Worker UBool ScriptRun::next()
116*0e209d39SAndroid Build Coastguard Worker {
117*0e209d39SAndroid Build Coastguard Worker int32_t startSP = parenSP; // used to find the first new open character
118*0e209d39SAndroid Build Coastguard Worker UErrorCode error = U_ZERO_ERROR;
119*0e209d39SAndroid Build Coastguard Worker
120*0e209d39SAndroid Build Coastguard Worker // if we've fallen off the end of the text, we're done
121*0e209d39SAndroid Build Coastguard Worker if (scriptEnd >= charLimit) {
122*0e209d39SAndroid Build Coastguard Worker return false;
123*0e209d39SAndroid Build Coastguard Worker }
124*0e209d39SAndroid Build Coastguard Worker
125*0e209d39SAndroid Build Coastguard Worker scriptCode = USCRIPT_COMMON;
126*0e209d39SAndroid Build Coastguard Worker
127*0e209d39SAndroid Build Coastguard Worker for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) {
128*0e209d39SAndroid Build Coastguard Worker char16_t high = charArray[scriptEnd];
129*0e209d39SAndroid Build Coastguard Worker UChar32 ch = high;
130*0e209d39SAndroid Build Coastguard Worker
131*0e209d39SAndroid Build Coastguard Worker // if the character is a high surrogate and it's not the last one
132*0e209d39SAndroid Build Coastguard Worker // in the text, see if it's followed by a low surrogate
133*0e209d39SAndroid Build Coastguard Worker if (high >= 0xD800 && high <= 0xDBFF && scriptEnd < charLimit - 1)
134*0e209d39SAndroid Build Coastguard Worker {
135*0e209d39SAndroid Build Coastguard Worker char16_t low = charArray[scriptEnd + 1];
136*0e209d39SAndroid Build Coastguard Worker
137*0e209d39SAndroid Build Coastguard Worker // if it is followed by a low surrogate,
138*0e209d39SAndroid Build Coastguard Worker // consume it and form the full character
139*0e209d39SAndroid Build Coastguard Worker if (low >= 0xDC00 && low <= 0xDFFF) {
140*0e209d39SAndroid Build Coastguard Worker ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
141*0e209d39SAndroid Build Coastguard Worker scriptEnd += 1;
142*0e209d39SAndroid Build Coastguard Worker }
143*0e209d39SAndroid Build Coastguard Worker }
144*0e209d39SAndroid Build Coastguard Worker
145*0e209d39SAndroid Build Coastguard Worker UScriptCode sc = uscript_getScript(ch, &error);
146*0e209d39SAndroid Build Coastguard Worker int32_t pairIndex = getPairIndex(ch);
147*0e209d39SAndroid Build Coastguard Worker
148*0e209d39SAndroid Build Coastguard Worker // Paired character handling:
149*0e209d39SAndroid Build Coastguard Worker //
150*0e209d39SAndroid Build Coastguard Worker // if it's an open character, push it onto the stack.
151*0e209d39SAndroid Build Coastguard Worker // if it's a close character, find the matching open on the
152*0e209d39SAndroid Build Coastguard Worker // stack, and use that script code. Any non-matching open
153*0e209d39SAndroid Build Coastguard Worker // characters above it on the stack will be poped.
154*0e209d39SAndroid Build Coastguard Worker if (pairIndex >= 0) {
155*0e209d39SAndroid Build Coastguard Worker if ((pairIndex & 1) == 0) {
156*0e209d39SAndroid Build Coastguard Worker parenStack[++parenSP].pairIndex = pairIndex;
157*0e209d39SAndroid Build Coastguard Worker parenStack[parenSP].scriptCode = scriptCode;
158*0e209d39SAndroid Build Coastguard Worker } else if (parenSP >= 0) {
159*0e209d39SAndroid Build Coastguard Worker int32_t pi = pairIndex & ~1;
160*0e209d39SAndroid Build Coastguard Worker
161*0e209d39SAndroid Build Coastguard Worker while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
162*0e209d39SAndroid Build Coastguard Worker parenSP -= 1;
163*0e209d39SAndroid Build Coastguard Worker }
164*0e209d39SAndroid Build Coastguard Worker
165*0e209d39SAndroid Build Coastguard Worker if (parenSP < startSP) {
166*0e209d39SAndroid Build Coastguard Worker startSP = parenSP;
167*0e209d39SAndroid Build Coastguard Worker }
168*0e209d39SAndroid Build Coastguard Worker
169*0e209d39SAndroid Build Coastguard Worker if (parenSP >= 0) {
170*0e209d39SAndroid Build Coastguard Worker sc = parenStack[parenSP].scriptCode;
171*0e209d39SAndroid Build Coastguard Worker }
172*0e209d39SAndroid Build Coastguard Worker }
173*0e209d39SAndroid Build Coastguard Worker }
174*0e209d39SAndroid Build Coastguard Worker
175*0e209d39SAndroid Build Coastguard Worker if (sameScript(scriptCode, sc)) {
176*0e209d39SAndroid Build Coastguard Worker if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
177*0e209d39SAndroid Build Coastguard Worker scriptCode = sc;
178*0e209d39SAndroid Build Coastguard Worker
179*0e209d39SAndroid Build Coastguard Worker // now that we have a final script code, fix any open
180*0e209d39SAndroid Build Coastguard Worker // characters we pushed before we knew the script code.
181*0e209d39SAndroid Build Coastguard Worker while (startSP < parenSP) {
182*0e209d39SAndroid Build Coastguard Worker parenStack[++startSP].scriptCode = scriptCode;
183*0e209d39SAndroid Build Coastguard Worker }
184*0e209d39SAndroid Build Coastguard Worker }
185*0e209d39SAndroid Build Coastguard Worker
186*0e209d39SAndroid Build Coastguard Worker // if this character is a close paired character,
187*0e209d39SAndroid Build Coastguard Worker // pop it from the stack
188*0e209d39SAndroid Build Coastguard Worker if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
189*0e209d39SAndroid Build Coastguard Worker parenSP -= 1;
190*0e209d39SAndroid Build Coastguard Worker startSP -= 1;
191*0e209d39SAndroid Build Coastguard Worker }
192*0e209d39SAndroid Build Coastguard Worker } else {
193*0e209d39SAndroid Build Coastguard Worker // if the run broke on a surrogate pair,
194*0e209d39SAndroid Build Coastguard Worker // end it before the high surrogate
195*0e209d39SAndroid Build Coastguard Worker if (ch >= 0x10000) {
196*0e209d39SAndroid Build Coastguard Worker scriptEnd -= 1;
197*0e209d39SAndroid Build Coastguard Worker }
198*0e209d39SAndroid Build Coastguard Worker
199*0e209d39SAndroid Build Coastguard Worker break;
200*0e209d39SAndroid Build Coastguard Worker }
201*0e209d39SAndroid Build Coastguard Worker }
202*0e209d39SAndroid Build Coastguard Worker
203*0e209d39SAndroid Build Coastguard Worker return true;
204*0e209d39SAndroid Build Coastguard Worker }
205*0e209d39SAndroid Build Coastguard Worker
206*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
207