1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2020 University of Cambridge
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi
19*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi
23*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi
41*22dc650dSSadaf Ebrahimi
42*22dc650dSSadaf Ebrahimi /* This module contains the external function pcre2_maketables(), which builds
43*22dc650dSSadaf Ebrahimi character tables for PCRE2 in the current locale. The file is compiled on its
44*22dc650dSSadaf Ebrahimi own as part of the PCRE2 library. It is also included in the compilation of
45*22dc650dSSadaf Ebrahimi pcre2_dftables.c as a freestanding program, in which case the macro
46*22dc650dSSadaf Ebrahimi PCRE2_DFTABLES is defined. */
47*22dc650dSSadaf Ebrahimi
48*22dc650dSSadaf Ebrahimi #ifndef PCRE2_DFTABLES /* Compiling the library */
49*22dc650dSSadaf Ebrahimi # ifdef HAVE_CONFIG_H
50*22dc650dSSadaf Ebrahimi # include "config.h"
51*22dc650dSSadaf Ebrahimi # endif
52*22dc650dSSadaf Ebrahimi # include "pcre2_internal.h"
53*22dc650dSSadaf Ebrahimi #endif
54*22dc650dSSadaf Ebrahimi
55*22dc650dSSadaf Ebrahimi /*************************************************
56*22dc650dSSadaf Ebrahimi * Create PCRE2 character tables *
57*22dc650dSSadaf Ebrahimi *************************************************/
58*22dc650dSSadaf Ebrahimi
59*22dc650dSSadaf Ebrahimi /* This function builds a set of character tables for use by PCRE2 and returns
60*22dc650dSSadaf Ebrahimi a pointer to them. They are build using the ctype functions, and consequently
61*22dc650dSSadaf Ebrahimi their contents will depend upon the current locale setting. When compiled as
62*22dc650dSSadaf Ebrahimi part of the library, the store is obtained via a general context malloc, if
63*22dc650dSSadaf Ebrahimi supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
64*22dc650dSSadaf Ebrahimi freestanding auxiliary program) malloc() is used, and the function has a
65*22dc650dSSadaf Ebrahimi different name so as not to clash with the prototype in pcre2.h.
66*22dc650dSSadaf Ebrahimi
67*22dc650dSSadaf Ebrahimi Arguments: none when PCRE2_DFTABLES is defined
68*22dc650dSSadaf Ebrahimi else a PCRE2 general context or NULL
69*22dc650dSSadaf Ebrahimi Returns: pointer to the contiguous block of data
70*22dc650dSSadaf Ebrahimi else NULL if memory allocation failed
71*22dc650dSSadaf Ebrahimi */
72*22dc650dSSadaf Ebrahimi
73*22dc650dSSadaf Ebrahimi #ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
maketables(void)74*22dc650dSSadaf Ebrahimi static const uint8_t *maketables(void)
75*22dc650dSSadaf Ebrahimi {
76*22dc650dSSadaf Ebrahimi uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
77*22dc650dSSadaf Ebrahimi
78*22dc650dSSadaf Ebrahimi #else /* Not PCRE2_DFTABLES, that is, compiling the library */
79*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
80*22dc650dSSadaf Ebrahimi pcre2_maketables(pcre2_general_context *gcontext)
81*22dc650dSSadaf Ebrahimi {
82*22dc650dSSadaf Ebrahimi uint8_t *yield = (uint8_t *)((gcontext != NULL)?
83*22dc650dSSadaf Ebrahimi gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
84*22dc650dSSadaf Ebrahimi malloc(TABLES_LENGTH));
85*22dc650dSSadaf Ebrahimi #endif /* PCRE2_DFTABLES */
86*22dc650dSSadaf Ebrahimi
87*22dc650dSSadaf Ebrahimi int i;
88*22dc650dSSadaf Ebrahimi uint8_t *p;
89*22dc650dSSadaf Ebrahimi
90*22dc650dSSadaf Ebrahimi if (yield == NULL) return NULL;
91*22dc650dSSadaf Ebrahimi p = yield;
92*22dc650dSSadaf Ebrahimi
93*22dc650dSSadaf Ebrahimi /* First comes the lower casing table */
94*22dc650dSSadaf Ebrahimi
95*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++) *p++ = tolower(i);
96*22dc650dSSadaf Ebrahimi
97*22dc650dSSadaf Ebrahimi /* Next the case-flipping table */
98*22dc650dSSadaf Ebrahimi
99*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
100*22dc650dSSadaf Ebrahimi {
101*22dc650dSSadaf Ebrahimi int c = islower(i)? toupper(i) : tolower(i);
102*22dc650dSSadaf Ebrahimi *p++ = (c < 256)? c : i;
103*22dc650dSSadaf Ebrahimi }
104*22dc650dSSadaf Ebrahimi
105*22dc650dSSadaf Ebrahimi /* Then the character class tables. Don't try to be clever and save effort on
106*22dc650dSSadaf Ebrahimi exclusive ones - in some locales things may be different.
107*22dc650dSSadaf Ebrahimi
108*22dc650dSSadaf Ebrahimi Note that the table for "space" includes everything "isspace" gives, including
109*22dc650dSSadaf Ebrahimi VT in the default locale. This makes it work for the POSIX class [:space:].
110*22dc650dSSadaf Ebrahimi From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
111*22dc650dSSadaf Ebrahimi space, because Perl added VT at release 5.18.
112*22dc650dSSadaf Ebrahimi
113*22dc650dSSadaf Ebrahimi Note also that it is possible for a character to be alnum or alpha without
114*22dc650dSSadaf Ebrahimi being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
115*22dc650dSSadaf Ebrahimi fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
116*22dc650dSSadaf Ebrahimi test for alnum specially. */
117*22dc650dSSadaf Ebrahimi
118*22dc650dSSadaf Ebrahimi memset(p, 0, cbit_length);
119*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
120*22dc650dSSadaf Ebrahimi {
121*22dc650dSSadaf Ebrahimi if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
122*22dc650dSSadaf Ebrahimi if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
123*22dc650dSSadaf Ebrahimi if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
124*22dc650dSSadaf Ebrahimi if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
125*22dc650dSSadaf Ebrahimi if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
126*22dc650dSSadaf Ebrahimi if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
127*22dc650dSSadaf Ebrahimi if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
128*22dc650dSSadaf Ebrahimi if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
129*22dc650dSSadaf Ebrahimi if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
130*22dc650dSSadaf Ebrahimi if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
131*22dc650dSSadaf Ebrahimi if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
132*22dc650dSSadaf Ebrahimi }
133*22dc650dSSadaf Ebrahimi p += cbit_length;
134*22dc650dSSadaf Ebrahimi
135*22dc650dSSadaf Ebrahimi /* Finally, the character type table. In this, we used to exclude VT from the
136*22dc650dSSadaf Ebrahimi white space chars, because Perl didn't recognize it as such for \s and for
137*22dc650dSSadaf Ebrahimi comments within regexes. However, Perl changed at release 5.18, so PCRE1
138*22dc650dSSadaf Ebrahimi changed at release 8.34 and it's always been this way for PCRE2. */
139*22dc650dSSadaf Ebrahimi
140*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
141*22dc650dSSadaf Ebrahimi {
142*22dc650dSSadaf Ebrahimi int x = 0;
143*22dc650dSSadaf Ebrahimi if (isspace(i)) x += ctype_space;
144*22dc650dSSadaf Ebrahimi if (isalpha(i)) x += ctype_letter;
145*22dc650dSSadaf Ebrahimi if (islower(i)) x += ctype_lcletter;
146*22dc650dSSadaf Ebrahimi if (isdigit(i)) x += ctype_digit;
147*22dc650dSSadaf Ebrahimi if (isalnum(i) || i == '_') x += ctype_word;
148*22dc650dSSadaf Ebrahimi *p++ = x;
149*22dc650dSSadaf Ebrahimi }
150*22dc650dSSadaf Ebrahimi
151*22dc650dSSadaf Ebrahimi return yield;
152*22dc650dSSadaf Ebrahimi }
153*22dc650dSSadaf Ebrahimi
154*22dc650dSSadaf Ebrahimi #ifndef PCRE2_DFTABLES /* Compiling the library */
155*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
156*22dc650dSSadaf Ebrahimi pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
157*22dc650dSSadaf Ebrahimi {
158*22dc650dSSadaf Ebrahimi if (gcontext)
159*22dc650dSSadaf Ebrahimi gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
160*22dc650dSSadaf Ebrahimi else
161*22dc650dSSadaf Ebrahimi free((void *)tables);
162*22dc650dSSadaf Ebrahimi }
163*22dc650dSSadaf Ebrahimi #endif
164*22dc650dSSadaf Ebrahimi
165*22dc650dSSadaf Ebrahimi /* End of pcre2_maketables.c */
166