xref: /aosp_15_r20/external/pcre/src/pcre2_maketables.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2016-2020 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi /* This module contains the external function pcre2_maketables(), which builds
43*22dc650dSSadaf Ebrahimi character tables for PCRE2 in the current locale. The file is compiled on its
44*22dc650dSSadaf Ebrahimi own as part of the PCRE2 library. It is also included in the compilation of
45*22dc650dSSadaf Ebrahimi pcre2_dftables.c as a freestanding program, in which case the macro
46*22dc650dSSadaf Ebrahimi PCRE2_DFTABLES is defined. */
47*22dc650dSSadaf Ebrahimi 
48*22dc650dSSadaf Ebrahimi #ifndef PCRE2_DFTABLES    /* Compiling the library */
49*22dc650dSSadaf Ebrahimi #  ifdef HAVE_CONFIG_H
50*22dc650dSSadaf Ebrahimi #  include "config.h"
51*22dc650dSSadaf Ebrahimi #  endif
52*22dc650dSSadaf Ebrahimi #  include "pcre2_internal.h"
53*22dc650dSSadaf Ebrahimi #endif
54*22dc650dSSadaf Ebrahimi 
55*22dc650dSSadaf Ebrahimi /*************************************************
56*22dc650dSSadaf Ebrahimi *           Create PCRE2 character tables        *
57*22dc650dSSadaf Ebrahimi *************************************************/
58*22dc650dSSadaf Ebrahimi 
59*22dc650dSSadaf Ebrahimi /* This function builds a set of character tables for use by PCRE2 and returns
60*22dc650dSSadaf Ebrahimi a pointer to them. They are build using the ctype functions, and consequently
61*22dc650dSSadaf Ebrahimi their contents will depend upon the current locale setting. When compiled as
62*22dc650dSSadaf Ebrahimi part of the library, the store is obtained via a general context malloc, if
63*22dc650dSSadaf Ebrahimi supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
64*22dc650dSSadaf Ebrahimi freestanding auxiliary program) malloc() is used, and the function has a
65*22dc650dSSadaf Ebrahimi different name so as not to clash with the prototype in pcre2.h.
66*22dc650dSSadaf Ebrahimi 
67*22dc650dSSadaf Ebrahimi Arguments:   none when PCRE2_DFTABLES is defined
68*22dc650dSSadaf Ebrahimi                else a PCRE2 general context or NULL
69*22dc650dSSadaf Ebrahimi Returns:     pointer to the contiguous block of data
70*22dc650dSSadaf Ebrahimi                else NULL if memory allocation failed
71*22dc650dSSadaf Ebrahimi */
72*22dc650dSSadaf Ebrahimi 
73*22dc650dSSadaf Ebrahimi #ifdef PCRE2_DFTABLES  /* Included in freestanding pcre2_dftables program */
maketables(void)74*22dc650dSSadaf Ebrahimi static const uint8_t *maketables(void)
75*22dc650dSSadaf Ebrahimi {
76*22dc650dSSadaf Ebrahimi uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
77*22dc650dSSadaf Ebrahimi 
78*22dc650dSSadaf Ebrahimi #else  /* Not PCRE2_DFTABLES, that is, compiling the library */
79*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
80*22dc650dSSadaf Ebrahimi pcre2_maketables(pcre2_general_context *gcontext)
81*22dc650dSSadaf Ebrahimi {
82*22dc650dSSadaf Ebrahimi uint8_t *yield = (uint8_t *)((gcontext != NULL)?
83*22dc650dSSadaf Ebrahimi   gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
84*22dc650dSSadaf Ebrahimi   malloc(TABLES_LENGTH));
85*22dc650dSSadaf Ebrahimi #endif  /* PCRE2_DFTABLES */
86*22dc650dSSadaf Ebrahimi 
87*22dc650dSSadaf Ebrahimi int i;
88*22dc650dSSadaf Ebrahimi uint8_t *p;
89*22dc650dSSadaf Ebrahimi 
90*22dc650dSSadaf Ebrahimi if (yield == NULL) return NULL;
91*22dc650dSSadaf Ebrahimi p = yield;
92*22dc650dSSadaf Ebrahimi 
93*22dc650dSSadaf Ebrahimi /* First comes the lower casing table */
94*22dc650dSSadaf Ebrahimi 
95*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++) *p++ = tolower(i);
96*22dc650dSSadaf Ebrahimi 
97*22dc650dSSadaf Ebrahimi /* Next the case-flipping table */
98*22dc650dSSadaf Ebrahimi 
99*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
100*22dc650dSSadaf Ebrahimi   {
101*22dc650dSSadaf Ebrahimi   int c = islower(i)? toupper(i) : tolower(i);
102*22dc650dSSadaf Ebrahimi   *p++ = (c < 256)? c : i;
103*22dc650dSSadaf Ebrahimi   }
104*22dc650dSSadaf Ebrahimi 
105*22dc650dSSadaf Ebrahimi /* Then the character class tables. Don't try to be clever and save effort on
106*22dc650dSSadaf Ebrahimi exclusive ones - in some locales things may be different.
107*22dc650dSSadaf Ebrahimi 
108*22dc650dSSadaf Ebrahimi Note that the table for "space" includes everything "isspace" gives, including
109*22dc650dSSadaf Ebrahimi VT in the default locale. This makes it work for the POSIX class [:space:].
110*22dc650dSSadaf Ebrahimi From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
111*22dc650dSSadaf Ebrahimi space, because Perl added VT at release 5.18.
112*22dc650dSSadaf Ebrahimi 
113*22dc650dSSadaf Ebrahimi Note also that it is possible for a character to be alnum or alpha without
114*22dc650dSSadaf Ebrahimi being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
115*22dc650dSSadaf Ebrahimi fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
116*22dc650dSSadaf Ebrahimi test for alnum specially. */
117*22dc650dSSadaf Ebrahimi 
118*22dc650dSSadaf Ebrahimi memset(p, 0, cbit_length);
119*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
120*22dc650dSSadaf Ebrahimi   {
121*22dc650dSSadaf Ebrahimi   if (isdigit(i))  p[cbit_digit  + i/8] |= 1u << (i&7);
122*22dc650dSSadaf Ebrahimi   if (isupper(i))  p[cbit_upper  + i/8] |= 1u << (i&7);
123*22dc650dSSadaf Ebrahimi   if (islower(i))  p[cbit_lower  + i/8] |= 1u << (i&7);
124*22dc650dSSadaf Ebrahimi   if (isalnum(i))  p[cbit_word   + i/8] |= 1u << (i&7);
125*22dc650dSSadaf Ebrahimi   if (i == '_')    p[cbit_word   + i/8] |= 1u << (i&7);
126*22dc650dSSadaf Ebrahimi   if (isspace(i))  p[cbit_space  + i/8] |= 1u << (i&7);
127*22dc650dSSadaf Ebrahimi   if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
128*22dc650dSSadaf Ebrahimi   if (isgraph(i))  p[cbit_graph  + i/8] |= 1u << (i&7);
129*22dc650dSSadaf Ebrahimi   if (isprint(i))  p[cbit_print  + i/8] |= 1u << (i&7);
130*22dc650dSSadaf Ebrahimi   if (ispunct(i))  p[cbit_punct  + i/8] |= 1u << (i&7);
131*22dc650dSSadaf Ebrahimi   if (iscntrl(i))  p[cbit_cntrl  + i/8] |= 1u << (i&7);
132*22dc650dSSadaf Ebrahimi   }
133*22dc650dSSadaf Ebrahimi p += cbit_length;
134*22dc650dSSadaf Ebrahimi 
135*22dc650dSSadaf Ebrahimi /* Finally, the character type table. In this, we used to exclude VT from the
136*22dc650dSSadaf Ebrahimi white space chars, because Perl didn't recognize it as such for \s and for
137*22dc650dSSadaf Ebrahimi comments within regexes. However, Perl changed at release 5.18, so PCRE1
138*22dc650dSSadaf Ebrahimi changed at release 8.34 and it's always been this way for PCRE2. */
139*22dc650dSSadaf Ebrahimi 
140*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
141*22dc650dSSadaf Ebrahimi   {
142*22dc650dSSadaf Ebrahimi   int x = 0;
143*22dc650dSSadaf Ebrahimi   if (isspace(i)) x += ctype_space;
144*22dc650dSSadaf Ebrahimi   if (isalpha(i)) x += ctype_letter;
145*22dc650dSSadaf Ebrahimi   if (islower(i)) x += ctype_lcletter;
146*22dc650dSSadaf Ebrahimi   if (isdigit(i)) x += ctype_digit;
147*22dc650dSSadaf Ebrahimi   if (isalnum(i) || i == '_') x += ctype_word;
148*22dc650dSSadaf Ebrahimi   *p++ = x;
149*22dc650dSSadaf Ebrahimi   }
150*22dc650dSSadaf Ebrahimi 
151*22dc650dSSadaf Ebrahimi return yield;
152*22dc650dSSadaf Ebrahimi }
153*22dc650dSSadaf Ebrahimi 
154*22dc650dSSadaf Ebrahimi #ifndef PCRE2_DFTABLES   /* Compiling the library */
155*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
156*22dc650dSSadaf Ebrahimi pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
157*22dc650dSSadaf Ebrahimi {
158*22dc650dSSadaf Ebrahimi   if (gcontext)
159*22dc650dSSadaf Ebrahimi     gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
160*22dc650dSSadaf Ebrahimi   else
161*22dc650dSSadaf Ebrahimi     free((void *)tables);
162*22dc650dSSadaf Ebrahimi }
163*22dc650dSSadaf Ebrahimi #endif
164*22dc650dSSadaf Ebrahimi 
165*22dc650dSSadaf Ebrahimi /* End of pcre2_maketables.c */
166