xref: /aosp_15_r20/external/pcre/src/pcre2_dftables.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2016-2020 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi /* This is a freestanding support program to generate a file containing
43*22dc650dSSadaf Ebrahimi character tables for PCRE2. The tables are built using the pcre2_maketables()
44*22dc650dSSadaf Ebrahimi function, which is part of the PCRE2 API. By default, the system's "C" locale
45*22dc650dSSadaf Ebrahimi is used rather than what the building user happens to have set, but the -L
46*22dc650dSSadaf Ebrahimi option can be used to select the current locale from the LC_ALL environment
47*22dc650dSSadaf Ebrahimi variable. By default, the tables are written in source form, but if -b is
48*22dc650dSSadaf Ebrahimi given, they are written in binary. */
49*22dc650dSSadaf Ebrahimi 
50*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
51*22dc650dSSadaf Ebrahimi #include "config.h"
52*22dc650dSSadaf Ebrahimi #endif
53*22dc650dSSadaf Ebrahimi 
54*22dc650dSSadaf Ebrahimi #include <ctype.h>
55*22dc650dSSadaf Ebrahimi #include <stdio.h>
56*22dc650dSSadaf Ebrahimi #include <string.h>
57*22dc650dSSadaf Ebrahimi #include <locale.h>
58*22dc650dSSadaf Ebrahimi 
59*22dc650dSSadaf Ebrahimi #define PCRE2_DFTABLES            /* for pcre2_internal.h, pcre2_maketables.c */
60*22dc650dSSadaf Ebrahimi 
61*22dc650dSSadaf Ebrahimi #define PCRE2_CODE_UNIT_WIDTH 0   /* Must be set, but not relevant here */
62*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
63*22dc650dSSadaf Ebrahimi 
64*22dc650dSSadaf Ebrahimi #include "pcre2_maketables.c"
65*22dc650dSSadaf Ebrahimi 
66*22dc650dSSadaf Ebrahimi 
67*22dc650dSSadaf Ebrahimi static const char *classlist[] =
68*22dc650dSSadaf Ebrahimi   {
69*22dc650dSSadaf Ebrahimi   "space", "xdigit", "digit", "upper", "lower",
70*22dc650dSSadaf Ebrahimi   "word", "graph", "print", "punct", "cntrl"
71*22dc650dSSadaf Ebrahimi   };
72*22dc650dSSadaf Ebrahimi 
73*22dc650dSSadaf Ebrahimi 
74*22dc650dSSadaf Ebrahimi 
75*22dc650dSSadaf Ebrahimi /*************************************************
76*22dc650dSSadaf Ebrahimi *                  Usage                         *
77*22dc650dSSadaf Ebrahimi *************************************************/
78*22dc650dSSadaf Ebrahimi 
79*22dc650dSSadaf Ebrahimi static void
usage(void)80*22dc650dSSadaf Ebrahimi usage(void)
81*22dc650dSSadaf Ebrahimi {
82*22dc650dSSadaf Ebrahimi (void)fprintf(stderr,
83*22dc650dSSadaf Ebrahimi   "Usage: pcre2_dftables [options] <output file>\n"
84*22dc650dSSadaf Ebrahimi   "  -b    Write output in binary (default is source code)\n"
85*22dc650dSSadaf Ebrahimi   "  -L    Use locale from LC_ALL (default is \"C\" locale)\n"
86*22dc650dSSadaf Ebrahimi   );
87*22dc650dSSadaf Ebrahimi }
88*22dc650dSSadaf Ebrahimi 
89*22dc650dSSadaf Ebrahimi 
90*22dc650dSSadaf Ebrahimi 
91*22dc650dSSadaf Ebrahimi /*************************************************
92*22dc650dSSadaf Ebrahimi *                Entry point                     *
93*22dc650dSSadaf Ebrahimi *************************************************/
94*22dc650dSSadaf Ebrahimi 
main(int argc,char ** argv)95*22dc650dSSadaf Ebrahimi int main(int argc, char **argv)
96*22dc650dSSadaf Ebrahimi {
97*22dc650dSSadaf Ebrahimi FILE *f;
98*22dc650dSSadaf Ebrahimi int i;
99*22dc650dSSadaf Ebrahimi int nclass = 0;
100*22dc650dSSadaf Ebrahimi BOOL binary = FALSE;
101*22dc650dSSadaf Ebrahimi char *env = (char *)"C";
102*22dc650dSSadaf Ebrahimi const uint8_t *tables;
103*22dc650dSSadaf Ebrahimi const uint8_t *base_of_tables;
104*22dc650dSSadaf Ebrahimi 
105*22dc650dSSadaf Ebrahimi /* Process options */
106*22dc650dSSadaf Ebrahimi 
107*22dc650dSSadaf Ebrahimi for (i = 1; i < argc; i++)
108*22dc650dSSadaf Ebrahimi   {
109*22dc650dSSadaf Ebrahimi   char *arg = argv[i];
110*22dc650dSSadaf Ebrahimi   if (*arg != '-') break;
111*22dc650dSSadaf Ebrahimi 
112*22dc650dSSadaf Ebrahimi   if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
113*22dc650dSSadaf Ebrahimi     {
114*22dc650dSSadaf Ebrahimi     usage();
115*22dc650dSSadaf Ebrahimi     return 0;
116*22dc650dSSadaf Ebrahimi     }
117*22dc650dSSadaf Ebrahimi 
118*22dc650dSSadaf Ebrahimi   else if (strcmp(arg, "-L") == 0)
119*22dc650dSSadaf Ebrahimi     {
120*22dc650dSSadaf Ebrahimi     if (setlocale(LC_ALL, "") == NULL)
121*22dc650dSSadaf Ebrahimi       {
122*22dc650dSSadaf Ebrahimi       (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
123*22dc650dSSadaf Ebrahimi       return 1;
124*22dc650dSSadaf Ebrahimi       }
125*22dc650dSSadaf Ebrahimi     env = getenv("LC_ALL");
126*22dc650dSSadaf Ebrahimi     }
127*22dc650dSSadaf Ebrahimi 
128*22dc650dSSadaf Ebrahimi   else if (strcmp(arg, "-b") == 0)
129*22dc650dSSadaf Ebrahimi     binary = TRUE;
130*22dc650dSSadaf Ebrahimi 
131*22dc650dSSadaf Ebrahimi   else
132*22dc650dSSadaf Ebrahimi     {
133*22dc650dSSadaf Ebrahimi     (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
134*22dc650dSSadaf Ebrahimi     return 1;
135*22dc650dSSadaf Ebrahimi     }
136*22dc650dSSadaf Ebrahimi   }
137*22dc650dSSadaf Ebrahimi 
138*22dc650dSSadaf Ebrahimi if (i != argc - 1)
139*22dc650dSSadaf Ebrahimi   {
140*22dc650dSSadaf Ebrahimi   (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
141*22dc650dSSadaf Ebrahimi   return 1;
142*22dc650dSSadaf Ebrahimi   }
143*22dc650dSSadaf Ebrahimi 
144*22dc650dSSadaf Ebrahimi /* Make the tables */
145*22dc650dSSadaf Ebrahimi 
146*22dc650dSSadaf Ebrahimi tables = maketables();
147*22dc650dSSadaf Ebrahimi base_of_tables = tables;
148*22dc650dSSadaf Ebrahimi 
149*22dc650dSSadaf Ebrahimi f = fopen(argv[i], "wb");
150*22dc650dSSadaf Ebrahimi if (f == NULL)
151*22dc650dSSadaf Ebrahimi   {
152*22dc650dSSadaf Ebrahimi   fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
153*22dc650dSSadaf Ebrahimi   return 1;
154*22dc650dSSadaf Ebrahimi   }
155*22dc650dSSadaf Ebrahimi 
156*22dc650dSSadaf Ebrahimi /* If -b was specified, we write the tables in binary. */
157*22dc650dSSadaf Ebrahimi 
158*22dc650dSSadaf Ebrahimi if (binary)
159*22dc650dSSadaf Ebrahimi   {
160*22dc650dSSadaf Ebrahimi   int yield = 0;
161*22dc650dSSadaf Ebrahimi   size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
162*22dc650dSSadaf Ebrahimi   if (len != TABLES_LENGTH)
163*22dc650dSSadaf Ebrahimi     {
164*22dc650dSSadaf Ebrahimi     (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
165*22dc650dSSadaf Ebrahimi      "instead of %d\n", (int)len, TABLES_LENGTH);
166*22dc650dSSadaf Ebrahimi     yield = 1;
167*22dc650dSSadaf Ebrahimi     }
168*22dc650dSSadaf Ebrahimi   fclose(f);
169*22dc650dSSadaf Ebrahimi   free((void *)base_of_tables);
170*22dc650dSSadaf Ebrahimi   return yield;
171*22dc650dSSadaf Ebrahimi   }
172*22dc650dSSadaf Ebrahimi 
173*22dc650dSSadaf Ebrahimi /* Write the tables as source code for inclusion in the PCRE2 library. There
174*22dc650dSSadaf Ebrahimi are several fprintf() calls here, because gcc in pedantic mode complains about
175*22dc650dSSadaf Ebrahimi the very long string otherwise. */
176*22dc650dSSadaf Ebrahimi 
177*22dc650dSSadaf Ebrahimi (void)fprintf(f,
178*22dc650dSSadaf Ebrahimi   "/*************************************************\n"
179*22dc650dSSadaf Ebrahimi   "*      Perl-Compatible Regular Expressions       *\n"
180*22dc650dSSadaf Ebrahimi   "*************************************************/\n\n"
181*22dc650dSSadaf Ebrahimi   "/* This file was automatically written by the pcre2_dftables auxiliary\n"
182*22dc650dSSadaf Ebrahimi   "program. It contains character tables that are used when no external\n"
183*22dc650dSSadaf Ebrahimi   "tables are passed to PCRE2 by the application that calls it. The tables\n"
184*22dc650dSSadaf Ebrahimi   "are used only for characters whose code values are less than 256, and\n"
185*22dc650dSSadaf Ebrahimi   "only relevant if not in UCP mode. */\n\n");
186*22dc650dSSadaf Ebrahimi 
187*22dc650dSSadaf Ebrahimi (void)fprintf(f,
188*22dc650dSSadaf Ebrahimi   "/* This set of tables was written in the %s locale. */\n\n", env);
189*22dc650dSSadaf Ebrahimi 
190*22dc650dSSadaf Ebrahimi (void)fprintf(f,
191*22dc650dSSadaf Ebrahimi   "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
192*22dc650dSSadaf Ebrahimi   "to build alternative versions of this file. This is necessary if you are\n"
193*22dc650dSSadaf Ebrahimi   "running in an EBCDIC environment, or if you want to default to a different\n"
194*22dc650dSSadaf Ebrahimi   "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
195*22dc650dSSadaf Ebrahimi   "these tables in the \"C\" locale by default. This happens automatically if\n"
196*22dc650dSSadaf Ebrahimi   "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
197*22dc650dSSadaf Ebrahimi   "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
198*22dc650dSSadaf Ebrahimi   "locale. */\n\n");
199*22dc650dSSadaf Ebrahimi 
200*22dc650dSSadaf Ebrahimi /* Force config.h in z/OS */
201*22dc650dSSadaf Ebrahimi 
202*22dc650dSSadaf Ebrahimi #if defined NATIVE_ZOS
203*22dc650dSSadaf Ebrahimi (void)fprintf(f,
204*22dc650dSSadaf Ebrahimi   "/* For z/OS, config.h is forced */\n"
205*22dc650dSSadaf Ebrahimi   "#ifndef HAVE_CONFIG_H\n"
206*22dc650dSSadaf Ebrahimi   "#define HAVE_CONFIG_H 1\n"
207*22dc650dSSadaf Ebrahimi   "#endif\n\n");
208*22dc650dSSadaf Ebrahimi #endif
209*22dc650dSSadaf Ebrahimi 
210*22dc650dSSadaf Ebrahimi (void)fprintf(f,
211*22dc650dSSadaf Ebrahimi   "#ifdef HAVE_CONFIG_H\n"
212*22dc650dSSadaf Ebrahimi   "#include \"config.h\"\n"
213*22dc650dSSadaf Ebrahimi   "#endif\n\n"
214*22dc650dSSadaf Ebrahimi   "#include \"pcre2_internal.h\"\n\n");
215*22dc650dSSadaf Ebrahimi 
216*22dc650dSSadaf Ebrahimi (void)fprintf(f,
217*22dc650dSSadaf Ebrahimi   "const uint8_t PRIV(default_tables)[] = {\n\n"
218*22dc650dSSadaf Ebrahimi   "/* This table is a lower casing table. */\n\n");
219*22dc650dSSadaf Ebrahimi 
220*22dc650dSSadaf Ebrahimi (void)fprintf(f, "  ");
221*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
222*22dc650dSSadaf Ebrahimi   {
223*22dc650dSSadaf Ebrahimi   if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
224*22dc650dSSadaf Ebrahimi   fprintf(f, "%3d", *tables++);
225*22dc650dSSadaf Ebrahimi   if (i != 255) fprintf(f, ",");
226*22dc650dSSadaf Ebrahimi   }
227*22dc650dSSadaf Ebrahimi (void)fprintf(f, ",\n\n");
228*22dc650dSSadaf Ebrahimi 
229*22dc650dSSadaf Ebrahimi (void)fprintf(f, "/* This table is a case flipping table. */\n\n");
230*22dc650dSSadaf Ebrahimi 
231*22dc650dSSadaf Ebrahimi (void)fprintf(f, "  ");
232*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
233*22dc650dSSadaf Ebrahimi   {
234*22dc650dSSadaf Ebrahimi   if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
235*22dc650dSSadaf Ebrahimi   fprintf(f, "%3d", *tables++);
236*22dc650dSSadaf Ebrahimi   if (i != 255) fprintf(f, ",");
237*22dc650dSSadaf Ebrahimi   }
238*22dc650dSSadaf Ebrahimi (void)fprintf(f, ",\n\n");
239*22dc650dSSadaf Ebrahimi 
240*22dc650dSSadaf Ebrahimi (void)fprintf(f,
241*22dc650dSSadaf Ebrahimi   "/* This table contains bit maps for various character classes. Each map is 32\n"
242*22dc650dSSadaf Ebrahimi   "bytes long and the bits run from the least significant end of each byte. The\n"
243*22dc650dSSadaf Ebrahimi   "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
244*22dc650dSSadaf Ebrahimi   "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
245*22dc650dSSadaf Ebrahimi 
246*22dc650dSSadaf Ebrahimi (void)fprintf(f, "  ");
247*22dc650dSSadaf Ebrahimi for (i = 0; i < cbit_length; i++)
248*22dc650dSSadaf Ebrahimi   {
249*22dc650dSSadaf Ebrahimi   if ((i & 7) == 0 && i != 0)
250*22dc650dSSadaf Ebrahimi     {
251*22dc650dSSadaf Ebrahimi     if ((i & 31) == 0) (void)fprintf(f, "\n");
252*22dc650dSSadaf Ebrahimi     if ((i & 24) == 8) (void)fprintf(f, "  /* %s */", classlist[nclass++]);
253*22dc650dSSadaf Ebrahimi     (void)fprintf(f, "\n  ");
254*22dc650dSSadaf Ebrahimi     }
255*22dc650dSSadaf Ebrahimi   (void)fprintf(f, "0x%02x", *tables++);
256*22dc650dSSadaf Ebrahimi   if (i != cbit_length - 1) (void)fprintf(f, ",");
257*22dc650dSSadaf Ebrahimi   }
258*22dc650dSSadaf Ebrahimi (void)fprintf(f, ",\n\n");
259*22dc650dSSadaf Ebrahimi 
260*22dc650dSSadaf Ebrahimi (void)fprintf(f,
261*22dc650dSSadaf Ebrahimi   "/* This table identifies various classes of character by individual bits:\n"
262*22dc650dSSadaf Ebrahimi   "  0x%02x   white space character\n"
263*22dc650dSSadaf Ebrahimi   "  0x%02x   letter\n"
264*22dc650dSSadaf Ebrahimi   "  0x%02x   lower case letter\n"
265*22dc650dSSadaf Ebrahimi   "  0x%02x   decimal digit\n"
266*22dc650dSSadaf Ebrahimi   "  0x%02x   word (alphanumeric or '_')\n*/\n\n",
267*22dc650dSSadaf Ebrahimi   ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
268*22dc650dSSadaf Ebrahimi 
269*22dc650dSSadaf Ebrahimi (void)fprintf(f, "  ");
270*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
271*22dc650dSSadaf Ebrahimi   {
272*22dc650dSSadaf Ebrahimi   if ((i & 7) == 0 && i != 0)
273*22dc650dSSadaf Ebrahimi     {
274*22dc650dSSadaf Ebrahimi     (void)fprintf(f, " /* ");
275*22dc650dSSadaf Ebrahimi     if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
276*22dc650dSSadaf Ebrahimi       else (void)fprintf(f, "%3d-", i-8);
277*22dc650dSSadaf Ebrahimi     if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
278*22dc650dSSadaf Ebrahimi       else (void)fprintf(f, "%3d", i-1);
279*22dc650dSSadaf Ebrahimi     (void)fprintf(f, " */\n  ");
280*22dc650dSSadaf Ebrahimi     }
281*22dc650dSSadaf Ebrahimi   (void)fprintf(f, "0x%02x", *tables++);
282*22dc650dSSadaf Ebrahimi   if (i != 255) (void)fprintf(f, ",");
283*22dc650dSSadaf Ebrahimi   }
284*22dc650dSSadaf Ebrahimi 
285*22dc650dSSadaf Ebrahimi (void)fprintf(f, "};/* ");
286*22dc650dSSadaf Ebrahimi if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
287*22dc650dSSadaf Ebrahimi   else (void)fprintf(f, "%3d-", i-8);
288*22dc650dSSadaf Ebrahimi if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
289*22dc650dSSadaf Ebrahimi   else (void)fprintf(f, "%3d", i-1);
290*22dc650dSSadaf Ebrahimi (void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
291*22dc650dSSadaf Ebrahimi 
292*22dc650dSSadaf Ebrahimi fclose(f);
293*22dc650dSSadaf Ebrahimi free((void *)base_of_tables);
294*22dc650dSSadaf Ebrahimi return 0;
295*22dc650dSSadaf Ebrahimi }
296*22dc650dSSadaf Ebrahimi 
297*22dc650dSSadaf Ebrahimi /* End of pcre2_dftables.c */
298