xref: /aosp_15_r20/external/cronet/third_party/re2/src/re2/unicode_groups.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_UNICODE_GROUPS_H_
6 #define RE2_UNICODE_GROUPS_H_
7 
8 // Unicode character groups.
9 
10 // The codes get split into ranges of 16-bit codes
11 // and ranges of 32-bit codes.  It would be simpler
12 // to use only 32-bit ranges, but these tables are large
13 // enough to warrant extra care.
14 //
15 // Using just 32-bit ranges gives 27 kB of data.
16 // Adding 16-bit ranges gives 18 kB of data.
17 // Adding an extra table of 16-bit singletons would reduce
18 // to 16.5 kB of data but make the data harder to use;
19 // we don't bother.
20 
21 #include <stdint.h>
22 
23 #include "util/utf.h"
24 
25 namespace re2 {
26 
27 struct URange16
28 {
29   uint16_t lo;
30   uint16_t hi;
31 };
32 
33 struct URange32
34 {
35   Rune lo;
36   Rune hi;
37 };
38 
39 struct UGroup
40 {
41   const char *name;
42   int sign;  // +1 for [abc], -1 for [^abc]
43   const URange16 *r16;
44   int nr16;
45   const URange32 *r32;
46   int nr32;
47 };
48 
49 // Named by property or script name (e.g., "Nd", "N", "Han").
50 // Negated groups are not included.
51 extern const UGroup unicode_groups[];
52 extern const int num_unicode_groups;
53 
54 // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]").
55 // Negated groups are included.
56 extern const UGroup posix_groups[];
57 extern const int num_posix_groups;
58 
59 // Named by Perl name (e.g., "\\d", "\\D").
60 // Negated groups are included.
61 extern const UGroup perl_groups[];
62 extern const int num_perl_groups;
63 
64 }  // namespace re2
65 
66 #endif  // RE2_UNICODE_GROUPS_H_
67