xref: /aosp_15_r20/external/grpc-grpc/third_party/utf8_range/naive.c (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1*cc02d7e2SAndroid Build Coastguard Worker #include <stdio.h>
2*cc02d7e2SAndroid Build Coastguard Worker 
3*cc02d7e2SAndroid Build Coastguard Worker /*
4*cc02d7e2SAndroid Build Coastguard Worker  * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
5*cc02d7e2SAndroid Build Coastguard Worker  *
6*cc02d7e2SAndroid Build Coastguard Worker  * Table 3-7. Well-Formed UTF-8 Byte Sequences
7*cc02d7e2SAndroid Build Coastguard Worker  *
8*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
9*cc02d7e2SAndroid Build Coastguard Worker  * | Code Points        | First Byte | Second Byte | Third Byte | Fourth Byte |
10*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
11*cc02d7e2SAndroid Build Coastguard Worker  * | U+0000..U+007F     | 00..7F     |             |            |             |
12*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
13*cc02d7e2SAndroid Build Coastguard Worker  * | U+0080..U+07FF     | C2..DF     | 80..BF      |            |             |
14*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
15*cc02d7e2SAndroid Build Coastguard Worker  * | U+0800..U+0FFF     | E0         | A0..BF      | 80..BF     |             |
16*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
17*cc02d7e2SAndroid Build Coastguard Worker  * | U+1000..U+CFFF     | E1..EC     | 80..BF      | 80..BF     |             |
18*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
19*cc02d7e2SAndroid Build Coastguard Worker  * | U+D000..U+D7FF     | ED         | 80..9F      | 80..BF     |             |
20*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
21*cc02d7e2SAndroid Build Coastguard Worker  * | U+E000..U+FFFF     | EE..EF     | 80..BF      | 80..BF     |             |
22*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
23*cc02d7e2SAndroid Build Coastguard Worker  * | U+10000..U+3FFFF   | F0         | 90..BF      | 80..BF     | 80..BF      |
24*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
25*cc02d7e2SAndroid Build Coastguard Worker  * | U+40000..U+FFFFF   | F1..F3     | 80..BF      | 80..BF     | 80..BF      |
26*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
27*cc02d7e2SAndroid Build Coastguard Worker  * | U+100000..U+10FFFF | F4         | 80..8F      | 80..BF     | 80..BF      |
28*cc02d7e2SAndroid Build Coastguard Worker  * +--------------------+------------+-------------+------------+-------------+
29*cc02d7e2SAndroid Build Coastguard Worker  */
30*cc02d7e2SAndroid Build Coastguard Worker 
31*cc02d7e2SAndroid Build Coastguard Worker /* Return 0 - success,  >0 - index(1 based) of first error char */
utf8_naive(const unsigned char * data,int len)32*cc02d7e2SAndroid Build Coastguard Worker int utf8_naive(const unsigned char *data, int len)
33*cc02d7e2SAndroid Build Coastguard Worker {
34*cc02d7e2SAndroid Build Coastguard Worker     int err_pos = 1;
35*cc02d7e2SAndroid Build Coastguard Worker 
36*cc02d7e2SAndroid Build Coastguard Worker     while (len) {
37*cc02d7e2SAndroid Build Coastguard Worker         int bytes;
38*cc02d7e2SAndroid Build Coastguard Worker         const unsigned char byte1 = data[0];
39*cc02d7e2SAndroid Build Coastguard Worker 
40*cc02d7e2SAndroid Build Coastguard Worker         /* 00..7F */
41*cc02d7e2SAndroid Build Coastguard Worker         if (byte1 <= 0x7F) {
42*cc02d7e2SAndroid Build Coastguard Worker             bytes = 1;
43*cc02d7e2SAndroid Build Coastguard Worker         /* C2..DF, 80..BF */
44*cc02d7e2SAndroid Build Coastguard Worker         } else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
45*cc02d7e2SAndroid Build Coastguard Worker                 (signed char)data[1] <= (signed char)0xBF) {
46*cc02d7e2SAndroid Build Coastguard Worker             bytes = 2;
47*cc02d7e2SAndroid Build Coastguard Worker         } else if (len >= 3) {
48*cc02d7e2SAndroid Build Coastguard Worker             const unsigned char byte2 = data[1];
49*cc02d7e2SAndroid Build Coastguard Worker 
50*cc02d7e2SAndroid Build Coastguard Worker             /* Is byte2, byte3 between 0x80 ~ 0xBF */
51*cc02d7e2SAndroid Build Coastguard Worker             const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
52*cc02d7e2SAndroid Build Coastguard Worker             const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
53*cc02d7e2SAndroid Build Coastguard Worker 
54*cc02d7e2SAndroid Build Coastguard Worker             if (byte2_ok && byte3_ok &&
55*cc02d7e2SAndroid Build Coastguard Worker                      /* E0, A0..BF, 80..BF */
56*cc02d7e2SAndroid Build Coastguard Worker                     ((byte1 == 0xE0 && byte2 >= 0xA0) ||
57*cc02d7e2SAndroid Build Coastguard Worker                      /* E1..EC, 80..BF, 80..BF */
58*cc02d7e2SAndroid Build Coastguard Worker                      (byte1 >= 0xE1 && byte1 <= 0xEC) ||
59*cc02d7e2SAndroid Build Coastguard Worker                      /* ED, 80..9F, 80..BF */
60*cc02d7e2SAndroid Build Coastguard Worker                      (byte1 == 0xED && byte2 <= 0x9F) ||
61*cc02d7e2SAndroid Build Coastguard Worker                      /* EE..EF, 80..BF, 80..BF */
62*cc02d7e2SAndroid Build Coastguard Worker                      (byte1 >= 0xEE && byte1 <= 0xEF))) {
63*cc02d7e2SAndroid Build Coastguard Worker                 bytes = 3;
64*cc02d7e2SAndroid Build Coastguard Worker             } else if (len >= 4) {
65*cc02d7e2SAndroid Build Coastguard Worker                 /* Is byte4 between 0x80 ~ 0xBF */
66*cc02d7e2SAndroid Build Coastguard Worker                 const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
67*cc02d7e2SAndroid Build Coastguard Worker 
68*cc02d7e2SAndroid Build Coastguard Worker                 if (byte2_ok && byte3_ok && byte4_ok &&
69*cc02d7e2SAndroid Build Coastguard Worker                          /* F0, 90..BF, 80..BF, 80..BF */
70*cc02d7e2SAndroid Build Coastguard Worker                         ((byte1 == 0xF0 && byte2 >= 0x90) ||
71*cc02d7e2SAndroid Build Coastguard Worker                          /* F1..F3, 80..BF, 80..BF, 80..BF */
72*cc02d7e2SAndroid Build Coastguard Worker                          (byte1 >= 0xF1 && byte1 <= 0xF3) ||
73*cc02d7e2SAndroid Build Coastguard Worker                          /* F4, 80..8F, 80..BF, 80..BF */
74*cc02d7e2SAndroid Build Coastguard Worker                          (byte1 == 0xF4 && byte2 <= 0x8F))) {
75*cc02d7e2SAndroid Build Coastguard Worker                     bytes = 4;
76*cc02d7e2SAndroid Build Coastguard Worker                 } else {
77*cc02d7e2SAndroid Build Coastguard Worker                     return err_pos;
78*cc02d7e2SAndroid Build Coastguard Worker                 }
79*cc02d7e2SAndroid Build Coastguard Worker             } else {
80*cc02d7e2SAndroid Build Coastguard Worker                 return err_pos;
81*cc02d7e2SAndroid Build Coastguard Worker             }
82*cc02d7e2SAndroid Build Coastguard Worker         } else {
83*cc02d7e2SAndroid Build Coastguard Worker             return err_pos;
84*cc02d7e2SAndroid Build Coastguard Worker         }
85*cc02d7e2SAndroid Build Coastguard Worker 
86*cc02d7e2SAndroid Build Coastguard Worker         len -= bytes;
87*cc02d7e2SAndroid Build Coastguard Worker         err_pos += bytes;
88*cc02d7e2SAndroid Build Coastguard Worker         data += bytes;
89*cc02d7e2SAndroid Build Coastguard Worker     }
90*cc02d7e2SAndroid Build Coastguard Worker 
91*cc02d7e2SAndroid Build Coastguard Worker     return 0;
92*cc02d7e2SAndroid Build Coastguard Worker }
93