xref: /aosp_15_r20/external/pcre/src/pcre2_dftables.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10           New API code Copyright (c) 2016-2020 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 /* This is a freestanding support program to generate a file containing
43 character tables for PCRE2. The tables are built using the pcre2_maketables()
44 function, which is part of the PCRE2 API. By default, the system's "C" locale
45 is used rather than what the building user happens to have set, but the -L
46 option can be used to select the current locale from the LC_ALL environment
47 variable. By default, the tables are written in source form, but if -b is
48 given, they are written in binary. */
49 
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53 
54 #include <ctype.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <locale.h>
58 
59 #define PCRE2_DFTABLES            /* for pcre2_internal.h, pcre2_maketables.c */
60 
61 #define PCRE2_CODE_UNIT_WIDTH 0   /* Must be set, but not relevant here */
62 #include "pcre2_internal.h"
63 
64 #include "pcre2_maketables.c"
65 
66 
67 static const char *classlist[] =
68   {
69   "space", "xdigit", "digit", "upper", "lower",
70   "word", "graph", "print", "punct", "cntrl"
71   };
72 
73 
74 
75 /*************************************************
76 *                  Usage                         *
77 *************************************************/
78 
79 static void
usage(void)80 usage(void)
81 {
82 (void)fprintf(stderr,
83   "Usage: pcre2_dftables [options] <output file>\n"
84   "  -b    Write output in binary (default is source code)\n"
85   "  -L    Use locale from LC_ALL (default is \"C\" locale)\n"
86   );
87 }
88 
89 
90 
91 /*************************************************
92 *                Entry point                     *
93 *************************************************/
94 
main(int argc,char ** argv)95 int main(int argc, char **argv)
96 {
97 FILE *f;
98 int i;
99 int nclass = 0;
100 BOOL binary = FALSE;
101 char *env = (char *)"C";
102 const uint8_t *tables;
103 const uint8_t *base_of_tables;
104 
105 /* Process options */
106 
107 for (i = 1; i < argc; i++)
108   {
109   char *arg = argv[i];
110   if (*arg != '-') break;
111 
112   if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
113     {
114     usage();
115     return 0;
116     }
117 
118   else if (strcmp(arg, "-L") == 0)
119     {
120     if (setlocale(LC_ALL, "") == NULL)
121       {
122       (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
123       return 1;
124       }
125     env = getenv("LC_ALL");
126     }
127 
128   else if (strcmp(arg, "-b") == 0)
129     binary = TRUE;
130 
131   else
132     {
133     (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
134     return 1;
135     }
136   }
137 
138 if (i != argc - 1)
139   {
140   (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
141   return 1;
142   }
143 
144 /* Make the tables */
145 
146 tables = maketables();
147 base_of_tables = tables;
148 
149 f = fopen(argv[i], "wb");
150 if (f == NULL)
151   {
152   fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
153   return 1;
154   }
155 
156 /* If -b was specified, we write the tables in binary. */
157 
158 if (binary)
159   {
160   int yield = 0;
161   size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
162   if (len != TABLES_LENGTH)
163     {
164     (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
165      "instead of %d\n", (int)len, TABLES_LENGTH);
166     yield = 1;
167     }
168   fclose(f);
169   free((void *)base_of_tables);
170   return yield;
171   }
172 
173 /* Write the tables as source code for inclusion in the PCRE2 library. There
174 are several fprintf() calls here, because gcc in pedantic mode complains about
175 the very long string otherwise. */
176 
177 (void)fprintf(f,
178   "/*************************************************\n"
179   "*      Perl-Compatible Regular Expressions       *\n"
180   "*************************************************/\n\n"
181   "/* This file was automatically written by the pcre2_dftables auxiliary\n"
182   "program. It contains character tables that are used when no external\n"
183   "tables are passed to PCRE2 by the application that calls it. The tables\n"
184   "are used only for characters whose code values are less than 256, and\n"
185   "only relevant if not in UCP mode. */\n\n");
186 
187 (void)fprintf(f,
188   "/* This set of tables was written in the %s locale. */\n\n", env);
189 
190 (void)fprintf(f,
191   "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
192   "to build alternative versions of this file. This is necessary if you are\n"
193   "running in an EBCDIC environment, or if you want to default to a different\n"
194   "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
195   "these tables in the \"C\" locale by default. This happens automatically if\n"
196   "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
197   "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
198   "locale. */\n\n");
199 
200 /* Force config.h in z/OS */
201 
202 #if defined NATIVE_ZOS
203 (void)fprintf(f,
204   "/* For z/OS, config.h is forced */\n"
205   "#ifndef HAVE_CONFIG_H\n"
206   "#define HAVE_CONFIG_H 1\n"
207   "#endif\n\n");
208 #endif
209 
210 (void)fprintf(f,
211   "#ifdef HAVE_CONFIG_H\n"
212   "#include \"config.h\"\n"
213   "#endif\n\n"
214   "#include \"pcre2_internal.h\"\n\n");
215 
216 (void)fprintf(f,
217   "const uint8_t PRIV(default_tables)[] = {\n\n"
218   "/* This table is a lower casing table. */\n\n");
219 
220 (void)fprintf(f, "  ");
221 for (i = 0; i < 256; i++)
222   {
223   if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
224   fprintf(f, "%3d", *tables++);
225   if (i != 255) fprintf(f, ",");
226   }
227 (void)fprintf(f, ",\n\n");
228 
229 (void)fprintf(f, "/* This table is a case flipping table. */\n\n");
230 
231 (void)fprintf(f, "  ");
232 for (i = 0; i < 256; i++)
233   {
234   if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
235   fprintf(f, "%3d", *tables++);
236   if (i != 255) fprintf(f, ",");
237   }
238 (void)fprintf(f, ",\n\n");
239 
240 (void)fprintf(f,
241   "/* This table contains bit maps for various character classes. Each map is 32\n"
242   "bytes long and the bits run from the least significant end of each byte. The\n"
243   "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
244   "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
245 
246 (void)fprintf(f, "  ");
247 for (i = 0; i < cbit_length; i++)
248   {
249   if ((i & 7) == 0 && i != 0)
250     {
251     if ((i & 31) == 0) (void)fprintf(f, "\n");
252     if ((i & 24) == 8) (void)fprintf(f, "  /* %s */", classlist[nclass++]);
253     (void)fprintf(f, "\n  ");
254     }
255   (void)fprintf(f, "0x%02x", *tables++);
256   if (i != cbit_length - 1) (void)fprintf(f, ",");
257   }
258 (void)fprintf(f, ",\n\n");
259 
260 (void)fprintf(f,
261   "/* This table identifies various classes of character by individual bits:\n"
262   "  0x%02x   white space character\n"
263   "  0x%02x   letter\n"
264   "  0x%02x   lower case letter\n"
265   "  0x%02x   decimal digit\n"
266   "  0x%02x   word (alphanumeric or '_')\n*/\n\n",
267   ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
268 
269 (void)fprintf(f, "  ");
270 for (i = 0; i < 256; i++)
271   {
272   if ((i & 7) == 0 && i != 0)
273     {
274     (void)fprintf(f, " /* ");
275     if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
276       else (void)fprintf(f, "%3d-", i-8);
277     if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
278       else (void)fprintf(f, "%3d", i-1);
279     (void)fprintf(f, " */\n  ");
280     }
281   (void)fprintf(f, "0x%02x", *tables++);
282   if (i != 255) (void)fprintf(f, ",");
283   }
284 
285 (void)fprintf(f, "};/* ");
286 if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
287   else (void)fprintf(f, "%3d-", i-8);
288 if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
289   else (void)fprintf(f, "%3d", i-1);
290 (void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
291 
292 fclose(f);
293 free((void *)base_of_tables);
294 return 0;
295 }
296 
297 /* End of pcre2_dftables.c */
298