xref: /aosp_15_r20/external/libutf/rune.c (revision e72f39528b91793251d8cc21b78f3f8706ae7c47)
1*e72f3952SAlexander Dorokhine /*
2*e72f3952SAlexander Dorokhine  * The authors of this software are Rob Pike and Ken Thompson.
3*e72f3952SAlexander Dorokhine  *              Copyright (c) 2002 by Lucent Technologies.
4*e72f3952SAlexander Dorokhine  * Permission to use, copy, modify, and distribute this software for any
5*e72f3952SAlexander Dorokhine  * purpose without fee is hereby granted, provided that this entire notice
6*e72f3952SAlexander Dorokhine  * is included in all copies of any software which is or includes a copy
7*e72f3952SAlexander Dorokhine  * or modification of this software and in all copies of the supporting
8*e72f3952SAlexander Dorokhine  * documentation for such software.
9*e72f3952SAlexander Dorokhine  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10*e72f3952SAlexander Dorokhine  * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
11*e72f3952SAlexander Dorokhine  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12*e72f3952SAlexander Dorokhine  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13*e72f3952SAlexander Dorokhine  */
14*e72f3952SAlexander Dorokhine #include <stdarg.h>
15*e72f3952SAlexander Dorokhine #include <string.h>
16*e72f3952SAlexander Dorokhine #include "utf.h"
17*e72f3952SAlexander Dorokhine #include "utfdef.h"
18*e72f3952SAlexander Dorokhine 
19*e72f3952SAlexander Dorokhine enum
20*e72f3952SAlexander Dorokhine {
21*e72f3952SAlexander Dorokhine 	Bit1	= 7,
22*e72f3952SAlexander Dorokhine 	Bitx	= 6,
23*e72f3952SAlexander Dorokhine 	Bit2	= 5,
24*e72f3952SAlexander Dorokhine 	Bit3	= 4,
25*e72f3952SAlexander Dorokhine 	Bit4	= 3,
26*e72f3952SAlexander Dorokhine 	Bit5	= 2,
27*e72f3952SAlexander Dorokhine 
28*e72f3952SAlexander Dorokhine 	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
29*e72f3952SAlexander Dorokhine 	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
30*e72f3952SAlexander Dorokhine 	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
31*e72f3952SAlexander Dorokhine 	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
32*e72f3952SAlexander Dorokhine 	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
33*e72f3952SAlexander Dorokhine 	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
34*e72f3952SAlexander Dorokhine 
35*e72f3952SAlexander Dorokhine 	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
36*e72f3952SAlexander Dorokhine 	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
37*e72f3952SAlexander Dorokhine 	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
38*e72f3952SAlexander Dorokhine 	Rune4	= (1<<(Bit4+3*Bitx))-1,
39*e72f3952SAlexander Dorokhine                                         /* 0001 1111 1111 1111 1111 1111 */
40*e72f3952SAlexander Dorokhine 
41*e72f3952SAlexander Dorokhine 	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
42*e72f3952SAlexander Dorokhine 	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
43*e72f3952SAlexander Dorokhine 
44*e72f3952SAlexander Dorokhine 	Bad	= Runeerror,
45*e72f3952SAlexander Dorokhine };
46*e72f3952SAlexander Dorokhine 
47*e72f3952SAlexander Dorokhine /*
48*e72f3952SAlexander Dorokhine  * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
49*e72f3952SAlexander Dorokhine  * This is a slower but "safe" version of the old chartorune
50*e72f3952SAlexander Dorokhine  * that works on strings that are not necessarily null-terminated.
51*e72f3952SAlexander Dorokhine  *
52*e72f3952SAlexander Dorokhine  * If you know for sure that your string is null-terminated,
53*e72f3952SAlexander Dorokhine  * chartorune will be a bit faster.
54*e72f3952SAlexander Dorokhine  *
55*e72f3952SAlexander Dorokhine  * It is guaranteed not to attempt to access "length"
56*e72f3952SAlexander Dorokhine  * past the incoming pointer.  This is to avoid
57*e72f3952SAlexander Dorokhine  * possible access violations.  If the string appears to be
58*e72f3952SAlexander Dorokhine  * well-formed but incomplete (i.e., to get the whole Rune
59*e72f3952SAlexander Dorokhine  * we'd need to read past str+length) then we'll set the Rune
60*e72f3952SAlexander Dorokhine  * to Bad and return 0.
61*e72f3952SAlexander Dorokhine  *
62*e72f3952SAlexander Dorokhine  * Note that if we have decoding problems for other
63*e72f3952SAlexander Dorokhine  * reasons, we return 1 instead of 0.
64*e72f3952SAlexander Dorokhine  */
65*e72f3952SAlexander Dorokhine int
charntorune(Rune * rune,const char * str,int length)66*e72f3952SAlexander Dorokhine charntorune(Rune *rune, const char *str, int length)
67*e72f3952SAlexander Dorokhine {
68*e72f3952SAlexander Dorokhine 	int c, c1, c2, c3;
69*e72f3952SAlexander Dorokhine 	long l;
70*e72f3952SAlexander Dorokhine 
71*e72f3952SAlexander Dorokhine 	/* When we're not allowed to read anything */
72*e72f3952SAlexander Dorokhine 	if(length <= 0) {
73*e72f3952SAlexander Dorokhine 		goto badlen;
74*e72f3952SAlexander Dorokhine 	}
75*e72f3952SAlexander Dorokhine 
76*e72f3952SAlexander Dorokhine 	/*
77*e72f3952SAlexander Dorokhine 	 * one character sequence (7-bit value)
78*e72f3952SAlexander Dorokhine 	 *	00000-0007F => T1
79*e72f3952SAlexander Dorokhine 	 */
80*e72f3952SAlexander Dorokhine 	c = *(uchar*)str;
81*e72f3952SAlexander Dorokhine 	if(c < Tx) {
82*e72f3952SAlexander Dorokhine 		*rune = c;
83*e72f3952SAlexander Dorokhine 		return 1;
84*e72f3952SAlexander Dorokhine 	}
85*e72f3952SAlexander Dorokhine 
86*e72f3952SAlexander Dorokhine 	// If we can't read more than one character we must stop
87*e72f3952SAlexander Dorokhine 	if(length <= 1) {
88*e72f3952SAlexander Dorokhine 		goto badlen;
89*e72f3952SAlexander Dorokhine 	}
90*e72f3952SAlexander Dorokhine 
91*e72f3952SAlexander Dorokhine 	/*
92*e72f3952SAlexander Dorokhine 	 * two character sequence (11-bit value)
93*e72f3952SAlexander Dorokhine 	 *	0080-07FF => T2 Tx
94*e72f3952SAlexander Dorokhine 	 */
95*e72f3952SAlexander Dorokhine 	c1 = *(uchar*)(str+1) ^ Tx;
96*e72f3952SAlexander Dorokhine 	if(c1 & Testx)
97*e72f3952SAlexander Dorokhine 		goto bad;
98*e72f3952SAlexander Dorokhine 	if(c < T3) {
99*e72f3952SAlexander Dorokhine 		if(c < T2)
100*e72f3952SAlexander Dorokhine 			goto bad;
101*e72f3952SAlexander Dorokhine 		l = ((c << Bitx) | c1) & Rune2;
102*e72f3952SAlexander Dorokhine 		if(l <= Rune1)
103*e72f3952SAlexander Dorokhine 			goto bad;
104*e72f3952SAlexander Dorokhine 		*rune = l;
105*e72f3952SAlexander Dorokhine 		return 2;
106*e72f3952SAlexander Dorokhine 	}
107*e72f3952SAlexander Dorokhine 
108*e72f3952SAlexander Dorokhine 	// If we can't read more than two characters we must stop
109*e72f3952SAlexander Dorokhine 	if(length <= 2) {
110*e72f3952SAlexander Dorokhine 		goto badlen;
111*e72f3952SAlexander Dorokhine 	}
112*e72f3952SAlexander Dorokhine 
113*e72f3952SAlexander Dorokhine 	/*
114*e72f3952SAlexander Dorokhine 	 * three character sequence (16-bit value)
115*e72f3952SAlexander Dorokhine 	 *	0800-FFFF => T3 Tx Tx
116*e72f3952SAlexander Dorokhine 	 */
117*e72f3952SAlexander Dorokhine 	c2 = *(uchar*)(str+2) ^ Tx;
118*e72f3952SAlexander Dorokhine 	if(c2 & Testx)
119*e72f3952SAlexander Dorokhine 		goto bad;
120*e72f3952SAlexander Dorokhine 	if(c < T4) {
121*e72f3952SAlexander Dorokhine 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
122*e72f3952SAlexander Dorokhine 		if(l <= Rune2)
123*e72f3952SAlexander Dorokhine 			goto bad;
124*e72f3952SAlexander Dorokhine 		*rune = l;
125*e72f3952SAlexander Dorokhine 		return 3;
126*e72f3952SAlexander Dorokhine 	}
127*e72f3952SAlexander Dorokhine 
128*e72f3952SAlexander Dorokhine 	if (length <= 3)
129*e72f3952SAlexander Dorokhine 		goto badlen;
130*e72f3952SAlexander Dorokhine 
131*e72f3952SAlexander Dorokhine 	/*
132*e72f3952SAlexander Dorokhine 	 * four character sequence (21-bit value)
133*e72f3952SAlexander Dorokhine 	 *	10000-1FFFFF => T4 Tx Tx Tx
134*e72f3952SAlexander Dorokhine 	 */
135*e72f3952SAlexander Dorokhine 	c3 = *(uchar*)(str+3) ^ Tx;
136*e72f3952SAlexander Dorokhine 	if (c3 & Testx)
137*e72f3952SAlexander Dorokhine 		goto bad;
138*e72f3952SAlexander Dorokhine 	if (c < T5) {
139*e72f3952SAlexander Dorokhine 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
140*e72f3952SAlexander Dorokhine 		if (l <= Rune3)
141*e72f3952SAlexander Dorokhine 			goto bad;
142*e72f3952SAlexander Dorokhine 		if (l > Runemax)
143*e72f3952SAlexander Dorokhine 			goto bad;
144*e72f3952SAlexander Dorokhine 		*rune = l;
145*e72f3952SAlexander Dorokhine 		return 4;
146*e72f3952SAlexander Dorokhine 	}
147*e72f3952SAlexander Dorokhine 
148*e72f3952SAlexander Dorokhine 	// Support for 5-byte or longer UTF-8 would go here, but
149*e72f3952SAlexander Dorokhine 	// since we don't have that, we'll just fall through to bad.
150*e72f3952SAlexander Dorokhine 
151*e72f3952SAlexander Dorokhine 	/*
152*e72f3952SAlexander Dorokhine 	 * bad decoding
153*e72f3952SAlexander Dorokhine 	 */
154*e72f3952SAlexander Dorokhine bad:
155*e72f3952SAlexander Dorokhine 	*rune = Bad;
156*e72f3952SAlexander Dorokhine 	return 1;
157*e72f3952SAlexander Dorokhine badlen:
158*e72f3952SAlexander Dorokhine 	*rune = Bad;
159*e72f3952SAlexander Dorokhine 	return 0;
160*e72f3952SAlexander Dorokhine 
161*e72f3952SAlexander Dorokhine }
162*e72f3952SAlexander Dorokhine 
163*e72f3952SAlexander Dorokhine 
164*e72f3952SAlexander Dorokhine /*
165*e72f3952SAlexander Dorokhine  * This is the older "unsafe" version, which works fine on
166*e72f3952SAlexander Dorokhine  * null-terminated strings.
167*e72f3952SAlexander Dorokhine  */
168*e72f3952SAlexander Dorokhine int
chartorune(Rune * rune,const char * str)169*e72f3952SAlexander Dorokhine chartorune(Rune *rune, const char *str)
170*e72f3952SAlexander Dorokhine {
171*e72f3952SAlexander Dorokhine 	int c, c1, c2, c3;
172*e72f3952SAlexander Dorokhine 	long l;
173*e72f3952SAlexander Dorokhine 
174*e72f3952SAlexander Dorokhine 	/*
175*e72f3952SAlexander Dorokhine 	 * one character sequence
176*e72f3952SAlexander Dorokhine 	 *	00000-0007F => T1
177*e72f3952SAlexander Dorokhine 	 */
178*e72f3952SAlexander Dorokhine 	c = *(uchar*)str;
179*e72f3952SAlexander Dorokhine 	if(c < Tx) {
180*e72f3952SAlexander Dorokhine 		*rune = c;
181*e72f3952SAlexander Dorokhine 		return 1;
182*e72f3952SAlexander Dorokhine 	}
183*e72f3952SAlexander Dorokhine 
184*e72f3952SAlexander Dorokhine 	/*
185*e72f3952SAlexander Dorokhine 	 * two character sequence
186*e72f3952SAlexander Dorokhine 	 *	0080-07FF => T2 Tx
187*e72f3952SAlexander Dorokhine 	 */
188*e72f3952SAlexander Dorokhine 	c1 = *(uchar*)(str+1) ^ Tx;
189*e72f3952SAlexander Dorokhine 	if(c1 & Testx)
190*e72f3952SAlexander Dorokhine 		goto bad;
191*e72f3952SAlexander Dorokhine 	if(c < T3) {
192*e72f3952SAlexander Dorokhine 		if(c < T2)
193*e72f3952SAlexander Dorokhine 			goto bad;
194*e72f3952SAlexander Dorokhine 		l = ((c << Bitx) | c1) & Rune2;
195*e72f3952SAlexander Dorokhine 		if(l <= Rune1)
196*e72f3952SAlexander Dorokhine 			goto bad;
197*e72f3952SAlexander Dorokhine 		*rune = l;
198*e72f3952SAlexander Dorokhine 		return 2;
199*e72f3952SAlexander Dorokhine 	}
200*e72f3952SAlexander Dorokhine 
201*e72f3952SAlexander Dorokhine 	/*
202*e72f3952SAlexander Dorokhine 	 * three character sequence
203*e72f3952SAlexander Dorokhine 	 *	0800-FFFF => T3 Tx Tx
204*e72f3952SAlexander Dorokhine 	 */
205*e72f3952SAlexander Dorokhine 	c2 = *(uchar*)(str+2) ^ Tx;
206*e72f3952SAlexander Dorokhine 	if(c2 & Testx)
207*e72f3952SAlexander Dorokhine 		goto bad;
208*e72f3952SAlexander Dorokhine 	if(c < T4) {
209*e72f3952SAlexander Dorokhine 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
210*e72f3952SAlexander Dorokhine 		if(l <= Rune2)
211*e72f3952SAlexander Dorokhine 			goto bad;
212*e72f3952SAlexander Dorokhine 		*rune = l;
213*e72f3952SAlexander Dorokhine 		return 3;
214*e72f3952SAlexander Dorokhine 	}
215*e72f3952SAlexander Dorokhine 
216*e72f3952SAlexander Dorokhine 	/*
217*e72f3952SAlexander Dorokhine 	 * four character sequence (21-bit value)
218*e72f3952SAlexander Dorokhine 	 *	10000-1FFFFF => T4 Tx Tx Tx
219*e72f3952SAlexander Dorokhine 	 */
220*e72f3952SAlexander Dorokhine 	c3 = *(uchar*)(str+3) ^ Tx;
221*e72f3952SAlexander Dorokhine 	if (c3 & Testx)
222*e72f3952SAlexander Dorokhine 		goto bad;
223*e72f3952SAlexander Dorokhine 	if (c < T5) {
224*e72f3952SAlexander Dorokhine 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
225*e72f3952SAlexander Dorokhine 		if (l <= Rune3)
226*e72f3952SAlexander Dorokhine 			goto bad;
227*e72f3952SAlexander Dorokhine 		if (l > Runemax)
228*e72f3952SAlexander Dorokhine 			goto bad;
229*e72f3952SAlexander Dorokhine 		*rune = l;
230*e72f3952SAlexander Dorokhine 		return 4;
231*e72f3952SAlexander Dorokhine 	}
232*e72f3952SAlexander Dorokhine 
233*e72f3952SAlexander Dorokhine 	/*
234*e72f3952SAlexander Dorokhine 	 * Support for 5-byte or longer UTF-8 would go here, but
235*e72f3952SAlexander Dorokhine 	 * since we don't have that, we'll just fall through to bad.
236*e72f3952SAlexander Dorokhine 	 */
237*e72f3952SAlexander Dorokhine 
238*e72f3952SAlexander Dorokhine 	/*
239*e72f3952SAlexander Dorokhine 	 * bad decoding
240*e72f3952SAlexander Dorokhine 	 */
241*e72f3952SAlexander Dorokhine bad:
242*e72f3952SAlexander Dorokhine 	*rune = Bad;
243*e72f3952SAlexander Dorokhine 	return 1;
244*e72f3952SAlexander Dorokhine }
245*e72f3952SAlexander Dorokhine 
246*e72f3952SAlexander Dorokhine int
isvalidcharntorune(const char * str,int length,Rune * rune,int * consumed)247*e72f3952SAlexander Dorokhine isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
248*e72f3952SAlexander Dorokhine 	*consumed = charntorune(rune, str, length);
249*e72f3952SAlexander Dorokhine 	return *rune != Runeerror || *consumed == 3;
250*e72f3952SAlexander Dorokhine }
251*e72f3952SAlexander Dorokhine 
252*e72f3952SAlexander Dorokhine int
runetochar(char * str,const Rune * rune)253*e72f3952SAlexander Dorokhine runetochar(char *str, const Rune *rune)
254*e72f3952SAlexander Dorokhine {
255*e72f3952SAlexander Dorokhine 	/* Runes are signed, so convert to unsigned for range check. */
256*e72f3952SAlexander Dorokhine 	unsigned long c;
257*e72f3952SAlexander Dorokhine 
258*e72f3952SAlexander Dorokhine 	/*
259*e72f3952SAlexander Dorokhine 	 * one character sequence
260*e72f3952SAlexander Dorokhine 	 *	00000-0007F => 00-7F
261*e72f3952SAlexander Dorokhine 	 */
262*e72f3952SAlexander Dorokhine 	c = *rune;
263*e72f3952SAlexander Dorokhine 	if(c <= Rune1) {
264*e72f3952SAlexander Dorokhine 		str[0] = c;
265*e72f3952SAlexander Dorokhine 		return 1;
266*e72f3952SAlexander Dorokhine 	}
267*e72f3952SAlexander Dorokhine 
268*e72f3952SAlexander Dorokhine 	/*
269*e72f3952SAlexander Dorokhine 	 * two character sequence
270*e72f3952SAlexander Dorokhine 	 *	0080-07FF => T2 Tx
271*e72f3952SAlexander Dorokhine 	 */
272*e72f3952SAlexander Dorokhine 	if(c <= Rune2) {
273*e72f3952SAlexander Dorokhine 		str[0] = T2 | (c >> 1*Bitx);
274*e72f3952SAlexander Dorokhine 		str[1] = Tx | (c & Maskx);
275*e72f3952SAlexander Dorokhine 		return 2;
276*e72f3952SAlexander Dorokhine 	}
277*e72f3952SAlexander Dorokhine 
278*e72f3952SAlexander Dorokhine 	/*
279*e72f3952SAlexander Dorokhine 	 * If the Rune is out of range, convert it to the error rune.
280*e72f3952SAlexander Dorokhine 	 * Do this test here because the error rune encodes to three bytes.
281*e72f3952SAlexander Dorokhine 	 * Doing it earlier would duplicate work, since an out of range
282*e72f3952SAlexander Dorokhine 	 * Rune wouldn't have fit in one or two bytes.
283*e72f3952SAlexander Dorokhine 	 */
284*e72f3952SAlexander Dorokhine 	if (c > Runemax)
285*e72f3952SAlexander Dorokhine 		c = Runeerror;
286*e72f3952SAlexander Dorokhine 
287*e72f3952SAlexander Dorokhine 	/*
288*e72f3952SAlexander Dorokhine 	 * three character sequence
289*e72f3952SAlexander Dorokhine 	 *	0800-FFFF => T3 Tx Tx
290*e72f3952SAlexander Dorokhine 	 */
291*e72f3952SAlexander Dorokhine 	if (c <= Rune3) {
292*e72f3952SAlexander Dorokhine 		str[0] = T3 |  (c >> 2*Bitx);
293*e72f3952SAlexander Dorokhine 		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
294*e72f3952SAlexander Dorokhine 		str[2] = Tx |  (c & Maskx);
295*e72f3952SAlexander Dorokhine 		return 3;
296*e72f3952SAlexander Dorokhine 	}
297*e72f3952SAlexander Dorokhine 
298*e72f3952SAlexander Dorokhine 	/*
299*e72f3952SAlexander Dorokhine 	 * four character sequence (21-bit value)
300*e72f3952SAlexander Dorokhine 	 *     10000-1FFFFF => T4 Tx Tx Tx
301*e72f3952SAlexander Dorokhine 	 */
302*e72f3952SAlexander Dorokhine 	str[0] = T4 | (c >> 3*Bitx);
303*e72f3952SAlexander Dorokhine 	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
304*e72f3952SAlexander Dorokhine 	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
305*e72f3952SAlexander Dorokhine 	str[3] = Tx | (c & Maskx);
306*e72f3952SAlexander Dorokhine 	return 4;
307*e72f3952SAlexander Dorokhine }
308*e72f3952SAlexander Dorokhine 
309*e72f3952SAlexander Dorokhine int
runelen(Rune rune)310*e72f3952SAlexander Dorokhine runelen(Rune rune)
311*e72f3952SAlexander Dorokhine {
312*e72f3952SAlexander Dorokhine 	char str[10];
313*e72f3952SAlexander Dorokhine 
314*e72f3952SAlexander Dorokhine 	return runetochar(str, &rune);
315*e72f3952SAlexander Dorokhine }
316*e72f3952SAlexander Dorokhine 
317*e72f3952SAlexander Dorokhine int
runenlen(const Rune * r,int nrune)318*e72f3952SAlexander Dorokhine runenlen(const Rune *r, int nrune)
319*e72f3952SAlexander Dorokhine {
320*e72f3952SAlexander Dorokhine 	int nb;
321*e72f3952SAlexander Dorokhine 	ulong c;	/* Rune is signed, so use unsigned for range check. */
322*e72f3952SAlexander Dorokhine 
323*e72f3952SAlexander Dorokhine 	nb = 0;
324*e72f3952SAlexander Dorokhine 	while(nrune--) {
325*e72f3952SAlexander Dorokhine 		c = *r++;
326*e72f3952SAlexander Dorokhine 		if (c <= Rune1)
327*e72f3952SAlexander Dorokhine 			nb++;
328*e72f3952SAlexander Dorokhine 		else if (c <= Rune2)
329*e72f3952SAlexander Dorokhine 			nb += 2;
330*e72f3952SAlexander Dorokhine 		else if (c <= Rune3)
331*e72f3952SAlexander Dorokhine 			nb += 3;
332*e72f3952SAlexander Dorokhine 		else if (c <= Runemax)
333*e72f3952SAlexander Dorokhine 			nb += 4;
334*e72f3952SAlexander Dorokhine 		else
335*e72f3952SAlexander Dorokhine 			nb += 3;	/* Runeerror = 0xFFFD, see runetochar */
336*e72f3952SAlexander Dorokhine 	}
337*e72f3952SAlexander Dorokhine 	return nb;
338*e72f3952SAlexander Dorokhine }
339*e72f3952SAlexander Dorokhine 
340*e72f3952SAlexander Dorokhine int
fullrune(const char * str,int n)341*e72f3952SAlexander Dorokhine fullrune(const char *str, int n)
342*e72f3952SAlexander Dorokhine {
343*e72f3952SAlexander Dorokhine 	if (n > 0) {
344*e72f3952SAlexander Dorokhine 		int c = *(uchar*)str;
345*e72f3952SAlexander Dorokhine 		if (c < Tx)
346*e72f3952SAlexander Dorokhine 			return 1;
347*e72f3952SAlexander Dorokhine 		if (n > 1) {
348*e72f3952SAlexander Dorokhine 			if (c < T3)
349*e72f3952SAlexander Dorokhine 				return 1;
350*e72f3952SAlexander Dorokhine 			if (n > 2) {
351*e72f3952SAlexander Dorokhine 				if (c < T4 || n > 3)
352*e72f3952SAlexander Dorokhine 					return 1;
353*e72f3952SAlexander Dorokhine 			}
354*e72f3952SAlexander Dorokhine 		}
355*e72f3952SAlexander Dorokhine 	}
356*e72f3952SAlexander Dorokhine 	return 0;
357*e72f3952SAlexander Dorokhine }
358