xref: /aosp_15_r20/external/boringssl/src/crypto/bytestring/unicode.c (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1*8fb009dcSAndroid Build Coastguard Worker /* Copyright (c) 2018, Google Inc.
2*8fb009dcSAndroid Build Coastguard Worker  *
3*8fb009dcSAndroid Build Coastguard Worker  * Permission to use, copy, modify, and/or distribute this software for any
4*8fb009dcSAndroid Build Coastguard Worker  * purpose with or without fee is hereby granted, provided that the above
5*8fb009dcSAndroid Build Coastguard Worker  * copyright notice and this permission notice appear in all copies.
6*8fb009dcSAndroid Build Coastguard Worker  *
7*8fb009dcSAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8*8fb009dcSAndroid Build Coastguard Worker  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9*8fb009dcSAndroid Build Coastguard Worker  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10*8fb009dcSAndroid Build Coastguard Worker  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11*8fb009dcSAndroid Build Coastguard Worker  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12*8fb009dcSAndroid Build Coastguard Worker  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13*8fb009dcSAndroid Build Coastguard Worker  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14*8fb009dcSAndroid Build Coastguard Worker 
15*8fb009dcSAndroid Build Coastguard Worker #include <openssl/bytestring.h>
16*8fb009dcSAndroid Build Coastguard Worker 
17*8fb009dcSAndroid Build Coastguard Worker #include "internal.h"
18*8fb009dcSAndroid Build Coastguard Worker 
19*8fb009dcSAndroid Build Coastguard Worker 
is_valid_code_point(uint32_t v)20*8fb009dcSAndroid Build Coastguard Worker static int is_valid_code_point(uint32_t v) {
21*8fb009dcSAndroid Build Coastguard Worker   // References in the following are to Unicode 15.0.0.
22*8fb009dcSAndroid Build Coastguard Worker   if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
23*8fb009dcSAndroid Build Coastguard Worker       v > 0x10ffff ||
24*8fb009dcSAndroid Build Coastguard Worker       // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
25*8fb009dcSAndroid Build Coastguard Worker       // as noncharacters (3.4 D14). See also 23.7. As our APIs are intended for
26*8fb009dcSAndroid Build Coastguard Worker       // "open interchange", such as ASN.1, we reject them.
27*8fb009dcSAndroid Build Coastguard Worker       (v & 0xfffe) == 0xfffe ||
28*8fb009dcSAndroid Build Coastguard Worker       (v >= 0xfdd0 && v <= 0xfdef) ||
29*8fb009dcSAndroid Build Coastguard Worker       // Surrogate code points are invalid (3.2 C1).
30*8fb009dcSAndroid Build Coastguard Worker       (v >= 0xd800 && v <= 0xdfff)) {
31*8fb009dcSAndroid Build Coastguard Worker     return 0;
32*8fb009dcSAndroid Build Coastguard Worker   }
33*8fb009dcSAndroid Build Coastguard Worker   return 1;
34*8fb009dcSAndroid Build Coastguard Worker }
35*8fb009dcSAndroid Build Coastguard Worker 
36*8fb009dcSAndroid Build Coastguard Worker // BOTTOM_BITS returns a byte with the bottom |n| bits set.
37*8fb009dcSAndroid Build Coastguard Worker #define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1)
38*8fb009dcSAndroid Build Coastguard Worker 
39*8fb009dcSAndroid Build Coastguard Worker // TOP_BITS returns a byte with the top |n| bits set.
40*8fb009dcSAndroid Build Coastguard Worker #define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n)))
41*8fb009dcSAndroid Build Coastguard Worker 
CBS_get_utf8(CBS * cbs,uint32_t * out)42*8fb009dcSAndroid Build Coastguard Worker int CBS_get_utf8(CBS *cbs, uint32_t *out) {
43*8fb009dcSAndroid Build Coastguard Worker   uint8_t c;
44*8fb009dcSAndroid Build Coastguard Worker   if (!CBS_get_u8(cbs, &c)) {
45*8fb009dcSAndroid Build Coastguard Worker     return 0;
46*8fb009dcSAndroid Build Coastguard Worker   }
47*8fb009dcSAndroid Build Coastguard Worker   if (c <= 0x7f) {
48*8fb009dcSAndroid Build Coastguard Worker     *out = c;
49*8fb009dcSAndroid Build Coastguard Worker     return 1;
50*8fb009dcSAndroid Build Coastguard Worker   }
51*8fb009dcSAndroid Build Coastguard Worker   uint32_t v, lower_bound;
52*8fb009dcSAndroid Build Coastguard Worker   size_t len;
53*8fb009dcSAndroid Build Coastguard Worker   if ((c & TOP_BITS(3)) == TOP_BITS(2)) {
54*8fb009dcSAndroid Build Coastguard Worker     v = c & BOTTOM_BITS(5);
55*8fb009dcSAndroid Build Coastguard Worker     len = 1;
56*8fb009dcSAndroid Build Coastguard Worker     lower_bound = 0x80;
57*8fb009dcSAndroid Build Coastguard Worker   } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) {
58*8fb009dcSAndroid Build Coastguard Worker     v = c & BOTTOM_BITS(4);
59*8fb009dcSAndroid Build Coastguard Worker     len = 2;
60*8fb009dcSAndroid Build Coastguard Worker     lower_bound = 0x800;
61*8fb009dcSAndroid Build Coastguard Worker   } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) {
62*8fb009dcSAndroid Build Coastguard Worker     v = c & BOTTOM_BITS(3);
63*8fb009dcSAndroid Build Coastguard Worker     len = 3;
64*8fb009dcSAndroid Build Coastguard Worker     lower_bound = 0x10000;
65*8fb009dcSAndroid Build Coastguard Worker   } else {
66*8fb009dcSAndroid Build Coastguard Worker     return 0;
67*8fb009dcSAndroid Build Coastguard Worker   }
68*8fb009dcSAndroid Build Coastguard Worker   for (size_t i = 0; i < len; i++) {
69*8fb009dcSAndroid Build Coastguard Worker     if (!CBS_get_u8(cbs, &c) ||
70*8fb009dcSAndroid Build Coastguard Worker         (c & TOP_BITS(2)) != TOP_BITS(1)) {
71*8fb009dcSAndroid Build Coastguard Worker       return 0;
72*8fb009dcSAndroid Build Coastguard Worker     }
73*8fb009dcSAndroid Build Coastguard Worker     v <<= 6;
74*8fb009dcSAndroid Build Coastguard Worker     v |= c & BOTTOM_BITS(6);
75*8fb009dcSAndroid Build Coastguard Worker   }
76*8fb009dcSAndroid Build Coastguard Worker   if (!is_valid_code_point(v) ||
77*8fb009dcSAndroid Build Coastguard Worker       v < lower_bound) {
78*8fb009dcSAndroid Build Coastguard Worker     return 0;
79*8fb009dcSAndroid Build Coastguard Worker   }
80*8fb009dcSAndroid Build Coastguard Worker   *out = v;
81*8fb009dcSAndroid Build Coastguard Worker   return 1;
82*8fb009dcSAndroid Build Coastguard Worker }
83*8fb009dcSAndroid Build Coastguard Worker 
CBS_get_latin1(CBS * cbs,uint32_t * out)84*8fb009dcSAndroid Build Coastguard Worker int CBS_get_latin1(CBS *cbs, uint32_t *out) {
85*8fb009dcSAndroid Build Coastguard Worker   uint8_t c;
86*8fb009dcSAndroid Build Coastguard Worker   if (!CBS_get_u8(cbs, &c)) {
87*8fb009dcSAndroid Build Coastguard Worker     return 0;
88*8fb009dcSAndroid Build Coastguard Worker   }
89*8fb009dcSAndroid Build Coastguard Worker   *out = c;
90*8fb009dcSAndroid Build Coastguard Worker   return 1;
91*8fb009dcSAndroid Build Coastguard Worker }
92*8fb009dcSAndroid Build Coastguard Worker 
CBS_get_ucs2_be(CBS * cbs,uint32_t * out)93*8fb009dcSAndroid Build Coastguard Worker int CBS_get_ucs2_be(CBS *cbs, uint32_t *out) {
94*8fb009dcSAndroid Build Coastguard Worker   // Note UCS-2 (used by BMPString) does not support surrogates.
95*8fb009dcSAndroid Build Coastguard Worker   uint16_t c;
96*8fb009dcSAndroid Build Coastguard Worker   if (!CBS_get_u16(cbs, &c) ||
97*8fb009dcSAndroid Build Coastguard Worker       !is_valid_code_point(c)) {
98*8fb009dcSAndroid Build Coastguard Worker     return 0;
99*8fb009dcSAndroid Build Coastguard Worker   }
100*8fb009dcSAndroid Build Coastguard Worker   *out = c;
101*8fb009dcSAndroid Build Coastguard Worker   return 1;
102*8fb009dcSAndroid Build Coastguard Worker }
103*8fb009dcSAndroid Build Coastguard Worker 
CBS_get_utf32_be(CBS * cbs,uint32_t * out)104*8fb009dcSAndroid Build Coastguard Worker int CBS_get_utf32_be(CBS *cbs, uint32_t *out) {
105*8fb009dcSAndroid Build Coastguard Worker   return CBS_get_u32(cbs, out) && is_valid_code_point(*out);
106*8fb009dcSAndroid Build Coastguard Worker }
107*8fb009dcSAndroid Build Coastguard Worker 
CBB_get_utf8_len(uint32_t u)108*8fb009dcSAndroid Build Coastguard Worker size_t CBB_get_utf8_len(uint32_t u) {
109*8fb009dcSAndroid Build Coastguard Worker   if (u <= 0x7f) {
110*8fb009dcSAndroid Build Coastguard Worker     return 1;
111*8fb009dcSAndroid Build Coastguard Worker   }
112*8fb009dcSAndroid Build Coastguard Worker   if (u <= 0x7ff) {
113*8fb009dcSAndroid Build Coastguard Worker     return 2;
114*8fb009dcSAndroid Build Coastguard Worker   }
115*8fb009dcSAndroid Build Coastguard Worker   if (u <= 0xffff) {
116*8fb009dcSAndroid Build Coastguard Worker     return 3;
117*8fb009dcSAndroid Build Coastguard Worker   }
118*8fb009dcSAndroid Build Coastguard Worker   return 4;
119*8fb009dcSAndroid Build Coastguard Worker }
120*8fb009dcSAndroid Build Coastguard Worker 
CBB_add_utf8(CBB * cbb,uint32_t u)121*8fb009dcSAndroid Build Coastguard Worker int CBB_add_utf8(CBB *cbb, uint32_t u) {
122*8fb009dcSAndroid Build Coastguard Worker   if (!is_valid_code_point(u)) {
123*8fb009dcSAndroid Build Coastguard Worker     return 0;
124*8fb009dcSAndroid Build Coastguard Worker   }
125*8fb009dcSAndroid Build Coastguard Worker   if (u <= 0x7f) {
126*8fb009dcSAndroid Build Coastguard Worker     return CBB_add_u8(cbb, (uint8_t)u);
127*8fb009dcSAndroid Build Coastguard Worker   }
128*8fb009dcSAndroid Build Coastguard Worker   if (u <= 0x7ff) {
129*8fb009dcSAndroid Build Coastguard Worker     return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) &&
130*8fb009dcSAndroid Build Coastguard Worker            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
131*8fb009dcSAndroid Build Coastguard Worker   }
132*8fb009dcSAndroid Build Coastguard Worker   if (u <= 0xffff) {
133*8fb009dcSAndroid Build Coastguard Worker     return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) &&
134*8fb009dcSAndroid Build Coastguard Worker            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
135*8fb009dcSAndroid Build Coastguard Worker            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
136*8fb009dcSAndroid Build Coastguard Worker   }
137*8fb009dcSAndroid Build Coastguard Worker   if (u <= 0x10ffff) {
138*8fb009dcSAndroid Build Coastguard Worker     return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) &&
139*8fb009dcSAndroid Build Coastguard Worker            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) &&
140*8fb009dcSAndroid Build Coastguard Worker            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
141*8fb009dcSAndroid Build Coastguard Worker            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
142*8fb009dcSAndroid Build Coastguard Worker   }
143*8fb009dcSAndroid Build Coastguard Worker   return 0;
144*8fb009dcSAndroid Build Coastguard Worker }
145*8fb009dcSAndroid Build Coastguard Worker 
CBB_add_latin1(CBB * cbb,uint32_t u)146*8fb009dcSAndroid Build Coastguard Worker int CBB_add_latin1(CBB *cbb, uint32_t u) {
147*8fb009dcSAndroid Build Coastguard Worker   return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u);
148*8fb009dcSAndroid Build Coastguard Worker }
149*8fb009dcSAndroid Build Coastguard Worker 
CBB_add_ucs2_be(CBB * cbb,uint32_t u)150*8fb009dcSAndroid Build Coastguard Worker int CBB_add_ucs2_be(CBB *cbb, uint32_t u) {
151*8fb009dcSAndroid Build Coastguard Worker   return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u);
152*8fb009dcSAndroid Build Coastguard Worker }
153*8fb009dcSAndroid Build Coastguard Worker 
CBB_add_utf32_be(CBB * cbb,uint32_t u)154*8fb009dcSAndroid Build Coastguard Worker int CBB_add_utf32_be(CBB *cbb, uint32_t u) {
155*8fb009dcSAndroid Build Coastguard Worker   return is_valid_code_point(u) && CBB_add_u32(cbb, u);
156*8fb009dcSAndroid Build Coastguard Worker }
157