1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2020 University of Cambridge
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi
19*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi
23*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi
41*22dc650dSSadaf Ebrahimi
42*22dc650dSSadaf Ebrahimi /* This is a freestanding support program to generate a file containing
43*22dc650dSSadaf Ebrahimi character tables for PCRE2. The tables are built using the pcre2_maketables()
44*22dc650dSSadaf Ebrahimi function, which is part of the PCRE2 API. By default, the system's "C" locale
45*22dc650dSSadaf Ebrahimi is used rather than what the building user happens to have set, but the -L
46*22dc650dSSadaf Ebrahimi option can be used to select the current locale from the LC_ALL environment
47*22dc650dSSadaf Ebrahimi variable. By default, the tables are written in source form, but if -b is
48*22dc650dSSadaf Ebrahimi given, they are written in binary. */
49*22dc650dSSadaf Ebrahimi
50*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
51*22dc650dSSadaf Ebrahimi #include "config.h"
52*22dc650dSSadaf Ebrahimi #endif
53*22dc650dSSadaf Ebrahimi
54*22dc650dSSadaf Ebrahimi #include <ctype.h>
55*22dc650dSSadaf Ebrahimi #include <stdio.h>
56*22dc650dSSadaf Ebrahimi #include <string.h>
57*22dc650dSSadaf Ebrahimi #include <locale.h>
58*22dc650dSSadaf Ebrahimi
59*22dc650dSSadaf Ebrahimi #define PCRE2_DFTABLES /* for pcre2_internal.h, pcre2_maketables.c */
60*22dc650dSSadaf Ebrahimi
61*22dc650dSSadaf Ebrahimi #define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
62*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
63*22dc650dSSadaf Ebrahimi
64*22dc650dSSadaf Ebrahimi #include "pcre2_maketables.c"
65*22dc650dSSadaf Ebrahimi
66*22dc650dSSadaf Ebrahimi
67*22dc650dSSadaf Ebrahimi static const char *classlist[] =
68*22dc650dSSadaf Ebrahimi {
69*22dc650dSSadaf Ebrahimi "space", "xdigit", "digit", "upper", "lower",
70*22dc650dSSadaf Ebrahimi "word", "graph", "print", "punct", "cntrl"
71*22dc650dSSadaf Ebrahimi };
72*22dc650dSSadaf Ebrahimi
73*22dc650dSSadaf Ebrahimi
74*22dc650dSSadaf Ebrahimi
75*22dc650dSSadaf Ebrahimi /*************************************************
76*22dc650dSSadaf Ebrahimi * Usage *
77*22dc650dSSadaf Ebrahimi *************************************************/
78*22dc650dSSadaf Ebrahimi
79*22dc650dSSadaf Ebrahimi static void
usage(void)80*22dc650dSSadaf Ebrahimi usage(void)
81*22dc650dSSadaf Ebrahimi {
82*22dc650dSSadaf Ebrahimi (void)fprintf(stderr,
83*22dc650dSSadaf Ebrahimi "Usage: pcre2_dftables [options] <output file>\n"
84*22dc650dSSadaf Ebrahimi " -b Write output in binary (default is source code)\n"
85*22dc650dSSadaf Ebrahimi " -L Use locale from LC_ALL (default is \"C\" locale)\n"
86*22dc650dSSadaf Ebrahimi );
87*22dc650dSSadaf Ebrahimi }
88*22dc650dSSadaf Ebrahimi
89*22dc650dSSadaf Ebrahimi
90*22dc650dSSadaf Ebrahimi
91*22dc650dSSadaf Ebrahimi /*************************************************
92*22dc650dSSadaf Ebrahimi * Entry point *
93*22dc650dSSadaf Ebrahimi *************************************************/
94*22dc650dSSadaf Ebrahimi
main(int argc,char ** argv)95*22dc650dSSadaf Ebrahimi int main(int argc, char **argv)
96*22dc650dSSadaf Ebrahimi {
97*22dc650dSSadaf Ebrahimi FILE *f;
98*22dc650dSSadaf Ebrahimi int i;
99*22dc650dSSadaf Ebrahimi int nclass = 0;
100*22dc650dSSadaf Ebrahimi BOOL binary = FALSE;
101*22dc650dSSadaf Ebrahimi char *env = (char *)"C";
102*22dc650dSSadaf Ebrahimi const uint8_t *tables;
103*22dc650dSSadaf Ebrahimi const uint8_t *base_of_tables;
104*22dc650dSSadaf Ebrahimi
105*22dc650dSSadaf Ebrahimi /* Process options */
106*22dc650dSSadaf Ebrahimi
107*22dc650dSSadaf Ebrahimi for (i = 1; i < argc; i++)
108*22dc650dSSadaf Ebrahimi {
109*22dc650dSSadaf Ebrahimi char *arg = argv[i];
110*22dc650dSSadaf Ebrahimi if (*arg != '-') break;
111*22dc650dSSadaf Ebrahimi
112*22dc650dSSadaf Ebrahimi if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
113*22dc650dSSadaf Ebrahimi {
114*22dc650dSSadaf Ebrahimi usage();
115*22dc650dSSadaf Ebrahimi return 0;
116*22dc650dSSadaf Ebrahimi }
117*22dc650dSSadaf Ebrahimi
118*22dc650dSSadaf Ebrahimi else if (strcmp(arg, "-L") == 0)
119*22dc650dSSadaf Ebrahimi {
120*22dc650dSSadaf Ebrahimi if (setlocale(LC_ALL, "") == NULL)
121*22dc650dSSadaf Ebrahimi {
122*22dc650dSSadaf Ebrahimi (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
123*22dc650dSSadaf Ebrahimi return 1;
124*22dc650dSSadaf Ebrahimi }
125*22dc650dSSadaf Ebrahimi env = getenv("LC_ALL");
126*22dc650dSSadaf Ebrahimi }
127*22dc650dSSadaf Ebrahimi
128*22dc650dSSadaf Ebrahimi else if (strcmp(arg, "-b") == 0)
129*22dc650dSSadaf Ebrahimi binary = TRUE;
130*22dc650dSSadaf Ebrahimi
131*22dc650dSSadaf Ebrahimi else
132*22dc650dSSadaf Ebrahimi {
133*22dc650dSSadaf Ebrahimi (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
134*22dc650dSSadaf Ebrahimi return 1;
135*22dc650dSSadaf Ebrahimi }
136*22dc650dSSadaf Ebrahimi }
137*22dc650dSSadaf Ebrahimi
138*22dc650dSSadaf Ebrahimi if (i != argc - 1)
139*22dc650dSSadaf Ebrahimi {
140*22dc650dSSadaf Ebrahimi (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
141*22dc650dSSadaf Ebrahimi return 1;
142*22dc650dSSadaf Ebrahimi }
143*22dc650dSSadaf Ebrahimi
144*22dc650dSSadaf Ebrahimi /* Make the tables */
145*22dc650dSSadaf Ebrahimi
146*22dc650dSSadaf Ebrahimi tables = maketables();
147*22dc650dSSadaf Ebrahimi base_of_tables = tables;
148*22dc650dSSadaf Ebrahimi
149*22dc650dSSadaf Ebrahimi f = fopen(argv[i], "wb");
150*22dc650dSSadaf Ebrahimi if (f == NULL)
151*22dc650dSSadaf Ebrahimi {
152*22dc650dSSadaf Ebrahimi fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
153*22dc650dSSadaf Ebrahimi return 1;
154*22dc650dSSadaf Ebrahimi }
155*22dc650dSSadaf Ebrahimi
156*22dc650dSSadaf Ebrahimi /* If -b was specified, we write the tables in binary. */
157*22dc650dSSadaf Ebrahimi
158*22dc650dSSadaf Ebrahimi if (binary)
159*22dc650dSSadaf Ebrahimi {
160*22dc650dSSadaf Ebrahimi int yield = 0;
161*22dc650dSSadaf Ebrahimi size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
162*22dc650dSSadaf Ebrahimi if (len != TABLES_LENGTH)
163*22dc650dSSadaf Ebrahimi {
164*22dc650dSSadaf Ebrahimi (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
165*22dc650dSSadaf Ebrahimi "instead of %d\n", (int)len, TABLES_LENGTH);
166*22dc650dSSadaf Ebrahimi yield = 1;
167*22dc650dSSadaf Ebrahimi }
168*22dc650dSSadaf Ebrahimi fclose(f);
169*22dc650dSSadaf Ebrahimi free((void *)base_of_tables);
170*22dc650dSSadaf Ebrahimi return yield;
171*22dc650dSSadaf Ebrahimi }
172*22dc650dSSadaf Ebrahimi
173*22dc650dSSadaf Ebrahimi /* Write the tables as source code for inclusion in the PCRE2 library. There
174*22dc650dSSadaf Ebrahimi are several fprintf() calls here, because gcc in pedantic mode complains about
175*22dc650dSSadaf Ebrahimi the very long string otherwise. */
176*22dc650dSSadaf Ebrahimi
177*22dc650dSSadaf Ebrahimi (void)fprintf(f,
178*22dc650dSSadaf Ebrahimi "/*************************************************\n"
179*22dc650dSSadaf Ebrahimi "* Perl-Compatible Regular Expressions *\n"
180*22dc650dSSadaf Ebrahimi "*************************************************/\n\n"
181*22dc650dSSadaf Ebrahimi "/* This file was automatically written by the pcre2_dftables auxiliary\n"
182*22dc650dSSadaf Ebrahimi "program. It contains character tables that are used when no external\n"
183*22dc650dSSadaf Ebrahimi "tables are passed to PCRE2 by the application that calls it. The tables\n"
184*22dc650dSSadaf Ebrahimi "are used only for characters whose code values are less than 256, and\n"
185*22dc650dSSadaf Ebrahimi "only relevant if not in UCP mode. */\n\n");
186*22dc650dSSadaf Ebrahimi
187*22dc650dSSadaf Ebrahimi (void)fprintf(f,
188*22dc650dSSadaf Ebrahimi "/* This set of tables was written in the %s locale. */\n\n", env);
189*22dc650dSSadaf Ebrahimi
190*22dc650dSSadaf Ebrahimi (void)fprintf(f,
191*22dc650dSSadaf Ebrahimi "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
192*22dc650dSSadaf Ebrahimi "to build alternative versions of this file. This is necessary if you are\n"
193*22dc650dSSadaf Ebrahimi "running in an EBCDIC environment, or if you want to default to a different\n"
194*22dc650dSSadaf Ebrahimi "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
195*22dc650dSSadaf Ebrahimi "these tables in the \"C\" locale by default. This happens automatically if\n"
196*22dc650dSSadaf Ebrahimi "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
197*22dc650dSSadaf Ebrahimi "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
198*22dc650dSSadaf Ebrahimi "locale. */\n\n");
199*22dc650dSSadaf Ebrahimi
200*22dc650dSSadaf Ebrahimi /* Force config.h in z/OS */
201*22dc650dSSadaf Ebrahimi
202*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
203*22dc650dSSadaf Ebrahimi (void)fprintf(f,
204*22dc650dSSadaf Ebrahimi "/* For z/OS, config.h is forced */\n"
205*22dc650dSSadaf Ebrahimi "#ifndef HAVE_CONFIG_H\n"
206*22dc650dSSadaf Ebrahimi "#define HAVE_CONFIG_H 1\n"
207*22dc650dSSadaf Ebrahimi "#endif\n\n");
208*22dc650dSSadaf Ebrahimi #endif
209*22dc650dSSadaf Ebrahimi
210*22dc650dSSadaf Ebrahimi (void)fprintf(f,
211*22dc650dSSadaf Ebrahimi "#ifdef HAVE_CONFIG_H\n"
212*22dc650dSSadaf Ebrahimi "#include \"config.h\"\n"
213*22dc650dSSadaf Ebrahimi "#endif\n\n"
214*22dc650dSSadaf Ebrahimi "#include \"pcre2_internal.h\"\n\n");
215*22dc650dSSadaf Ebrahimi
216*22dc650dSSadaf Ebrahimi (void)fprintf(f,
217*22dc650dSSadaf Ebrahimi "const uint8_t PRIV(default_tables)[] = {\n\n"
218*22dc650dSSadaf Ebrahimi "/* This table is a lower casing table. */\n\n");
219*22dc650dSSadaf Ebrahimi
220*22dc650dSSadaf Ebrahimi (void)fprintf(f, " ");
221*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
222*22dc650dSSadaf Ebrahimi {
223*22dc650dSSadaf Ebrahimi if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
224*22dc650dSSadaf Ebrahimi fprintf(f, "%3d", *tables++);
225*22dc650dSSadaf Ebrahimi if (i != 255) fprintf(f, ",");
226*22dc650dSSadaf Ebrahimi }
227*22dc650dSSadaf Ebrahimi (void)fprintf(f, ",\n\n");
228*22dc650dSSadaf Ebrahimi
229*22dc650dSSadaf Ebrahimi (void)fprintf(f, "/* This table is a case flipping table. */\n\n");
230*22dc650dSSadaf Ebrahimi
231*22dc650dSSadaf Ebrahimi (void)fprintf(f, " ");
232*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
233*22dc650dSSadaf Ebrahimi {
234*22dc650dSSadaf Ebrahimi if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
235*22dc650dSSadaf Ebrahimi fprintf(f, "%3d", *tables++);
236*22dc650dSSadaf Ebrahimi if (i != 255) fprintf(f, ",");
237*22dc650dSSadaf Ebrahimi }
238*22dc650dSSadaf Ebrahimi (void)fprintf(f, ",\n\n");
239*22dc650dSSadaf Ebrahimi
240*22dc650dSSadaf Ebrahimi (void)fprintf(f,
241*22dc650dSSadaf Ebrahimi "/* This table contains bit maps for various character classes. Each map is 32\n"
242*22dc650dSSadaf Ebrahimi "bytes long and the bits run from the least significant end of each byte. The\n"
243*22dc650dSSadaf Ebrahimi "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
244*22dc650dSSadaf Ebrahimi "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
245*22dc650dSSadaf Ebrahimi
246*22dc650dSSadaf Ebrahimi (void)fprintf(f, " ");
247*22dc650dSSadaf Ebrahimi for (i = 0; i < cbit_length; i++)
248*22dc650dSSadaf Ebrahimi {
249*22dc650dSSadaf Ebrahimi if ((i & 7) == 0 && i != 0)
250*22dc650dSSadaf Ebrahimi {
251*22dc650dSSadaf Ebrahimi if ((i & 31) == 0) (void)fprintf(f, "\n");
252*22dc650dSSadaf Ebrahimi if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
253*22dc650dSSadaf Ebrahimi (void)fprintf(f, "\n ");
254*22dc650dSSadaf Ebrahimi }
255*22dc650dSSadaf Ebrahimi (void)fprintf(f, "0x%02x", *tables++);
256*22dc650dSSadaf Ebrahimi if (i != cbit_length - 1) (void)fprintf(f, ",");
257*22dc650dSSadaf Ebrahimi }
258*22dc650dSSadaf Ebrahimi (void)fprintf(f, ",\n\n");
259*22dc650dSSadaf Ebrahimi
260*22dc650dSSadaf Ebrahimi (void)fprintf(f,
261*22dc650dSSadaf Ebrahimi "/* This table identifies various classes of character by individual bits:\n"
262*22dc650dSSadaf Ebrahimi " 0x%02x white space character\n"
263*22dc650dSSadaf Ebrahimi " 0x%02x letter\n"
264*22dc650dSSadaf Ebrahimi " 0x%02x lower case letter\n"
265*22dc650dSSadaf Ebrahimi " 0x%02x decimal digit\n"
266*22dc650dSSadaf Ebrahimi " 0x%02x word (alphanumeric or '_')\n*/\n\n",
267*22dc650dSSadaf Ebrahimi ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
268*22dc650dSSadaf Ebrahimi
269*22dc650dSSadaf Ebrahimi (void)fprintf(f, " ");
270*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
271*22dc650dSSadaf Ebrahimi {
272*22dc650dSSadaf Ebrahimi if ((i & 7) == 0 && i != 0)
273*22dc650dSSadaf Ebrahimi {
274*22dc650dSSadaf Ebrahimi (void)fprintf(f, " /* ");
275*22dc650dSSadaf Ebrahimi if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
276*22dc650dSSadaf Ebrahimi else (void)fprintf(f, "%3d-", i-8);
277*22dc650dSSadaf Ebrahimi if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
278*22dc650dSSadaf Ebrahimi else (void)fprintf(f, "%3d", i-1);
279*22dc650dSSadaf Ebrahimi (void)fprintf(f, " */\n ");
280*22dc650dSSadaf Ebrahimi }
281*22dc650dSSadaf Ebrahimi (void)fprintf(f, "0x%02x", *tables++);
282*22dc650dSSadaf Ebrahimi if (i != 255) (void)fprintf(f, ",");
283*22dc650dSSadaf Ebrahimi }
284*22dc650dSSadaf Ebrahimi
285*22dc650dSSadaf Ebrahimi (void)fprintf(f, "};/* ");
286*22dc650dSSadaf Ebrahimi if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
287*22dc650dSSadaf Ebrahimi else (void)fprintf(f, "%3d-", i-8);
288*22dc650dSSadaf Ebrahimi if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
289*22dc650dSSadaf Ebrahimi else (void)fprintf(f, "%3d", i-1);
290*22dc650dSSadaf Ebrahimi (void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
291*22dc650dSSadaf Ebrahimi
292*22dc650dSSadaf Ebrahimi fclose(f);
293*22dc650dSSadaf Ebrahimi free((void *)base_of_tables);
294*22dc650dSSadaf Ebrahimi return 0;
295*22dc650dSSadaf Ebrahimi }
296*22dc650dSSadaf Ebrahimi
297*22dc650dSSadaf Ebrahimi /* End of pcre2_dftables.c */
298