xref: /aosp_15_r20/external/pcre/src/pcre2test.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1 /*************************************************
2 *             PCRE2 testing program              *
3 *************************************************/
4 
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11 
12                        Written by Philip Hazel
13      Original code Copyright (c) 1997-2012 University of Cambridge
14     Rewritten code Copyright (c) 2016-2024 University of Cambridge
15 
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19 
20     * Redistributions of source code must retain the above copyright notice,
21       this list of conditions and the following disclaimer.
22 
23     * Redistributions in binary form must reproduce the above copyright
24       notice, this list of conditions and the following disclaimer in the
25       documentation and/or other materials provided with the distribution.
26 
27     * Neither the name of the University of Cambridge nor the names of its
28       contributors may be used to endorse or promote products derived from
29       this software without specific prior written permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44 
45 
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56 
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60 
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68 
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76 
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 /* Debugging code enabler */
82 
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84 
85 /* Both libreadline and libedit are optionally supported */
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #elif defined(HAVE_EDIT_READLINE_READLINE_H)
94 #include <edit/readline/readline.h>
95 #else
96 #include <readline.h>
97 /* GNU readline defines this macro but libedit doesn't, if that ever changes
98 this needs to be updated or the build could break */
99 #ifdef RL_VERSION_MAJOR
100 #include <history.h>
101 #endif
102 #endif
103 #endif
104 #endif
105 
106 /* Put the test for interactive input into a macro so that it can be changed if
107 required for different environments. */
108 
109 #define INTERACTIVE(f) isatty(fileno(f))
110 
111 
112 /* ---------------------- System-specific definitions ---------------------- */
113 
114 /* A number of things vary for Windows builds. Originally, pcretest opened its
115 input and output without "b"; then I was told that "b" was needed in some
116 environments, so it was added for release 5.0 to both the input and output. (It
117 makes no difference on Unix-like systems.) Later I was told that it is wrong
118 for the input on Windows. I've now abstracted the modes into macros that are
119 set here, to make it easier to fiddle with them, and removed "b" from the input
120 mode under Windows. The BINARY versions are used when saving/restoring compiled
121 patterns. */
122 
123 #if defined(_WIN32) || defined(WIN32)
124 #include <io.h>                /* For _setmode() */
125 #include <fcntl.h>             /* For _O_BINARY */
126 #define INPUT_MODE          "r"
127 #define OUTPUT_MODE         "wb"
128 #define BINARY_INPUT_MODE   "rb"
129 #define BINARY_OUTPUT_MODE  "wb"
130 
131 #ifndef isatty
132 #define isatty _isatty         /* This is what Windows calls them, I'm told, */
133 #endif                         /* though in some environments they seem to   */
134                                /* be already defined, hence the #ifndefs.    */
135 #ifndef fileno
136 #define fileno _fileno
137 #endif
138 
139 /* A user sent this fix for Borland Builder 5 under Windows. */
140 
141 #ifdef __BORLANDC__
142 #define _setmode(handle, mode) setmode(handle, mode)
143 #endif
144 
145 /* Not Windows */
146 
147 #else
148 #include <sys/time.h>          /* These two includes are needed */
149 #include <sys/resource.h>      /* for setrlimit(). */
150 #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
151 #define INPUT_MODE   "r"
152 #define OUTPUT_MODE  "w"
153 #define BINARY_INPUT_MODE   "rb"
154 #define BINARY_OUTPUT_MODE  "wb"
155 #else
156 #define INPUT_MODE          "rb"
157 #define OUTPUT_MODE         "wb"
158 #define BINARY_INPUT_MODE   "rb"
159 #define BINARY_OUTPUT_MODE  "wb"
160 #endif
161 #endif
162 
163 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
164 user [2] provided alternative code which worked better for him. I have
165 commented out the original, but kept it around just in case. */
166 
167 #ifdef __VMS
168 #include <ssdef.h>
169 /* These two includes came from [2]. */
170 #include descrip
171 #include lib$routines
172 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
173 #endif
174 
175 /* old VC and older compilers don't support %td or %zu, and even some that
176 claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
177 
178 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
179   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)))
180 #ifdef _WIN64
181 #define PTR_FORM "lld"
182 #define SIZ_FORM "llu"
183 #else
184 #define PTR_FORM "ld"
185 #define SIZ_FORM "lu"
186 #endif
187 #else
188 #define PTR_FORM "td"
189 #define SIZ_FORM "zu"
190 #endif
191 
192 /* ------------------End of system-specific definitions -------------------- */
193 
194 /* Glueing macros that are used in several places below. */
195 
196 #define glue(a,b) a##b
197 #define G(a,b) glue(a,b)
198 
199 /* Miscellaneous parameters and manifests */
200 
201 #ifndef CLOCKS_PER_SEC
202 #ifdef CLK_TCK
203 #define CLOCKS_PER_SEC CLK_TCK
204 #else
205 #define CLOCKS_PER_SEC 100
206 #endif
207 #endif
208 
209 #define CFORE_UNSET UINT32_MAX    /* Unset value for startend/cfail/cerror fields */
210 #define CONVERT_UNSET UINT32_MAX  /* Unset value for convert_type field */
211 #define DFA_WS_DIMENSION 1000     /* Size of DFA workspace */
212 #define DEFAULT_OVECCOUNT 15      /* Default ovector count */
213 #define JUNK_OFFSET 0xdeadbeef    /* For initializing ovector */
214 #define LOCALESIZE 32             /* Size of locale name */
215 #define LOOPREPEAT 500000         /* Default loop count for timing */
216 #define MALLOCLISTSIZE 20         /* For remembering mallocs */
217 #define PARENS_NEST_DEFAULT 220   /* Default parentheses nest limit */
218 #define PATSTACKSIZE 20           /* Pattern stack for save/restore testing */
219 #define REPLACE_MODSIZE 100       /* Field for reading 8-bit replacement */
220 #define VERSION_SIZE 64           /* Size of buffer for the version strings */
221 
222 /* Default JIT compile options */
223 
224 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
225                      PCRE2_JIT_PARTIAL_SOFT|\
226                      PCRE2_JIT_PARTIAL_HARD)
227 
228 /* Make sure the buffer into which replacement strings are copied is big enough
229 to hold them as 32-bit code units. */
230 
231 #define REPLACE_BUFFSIZE 1024   /* This is a byte value */
232 
233 /* Execution modes */
234 
235 #define PCRE8_MODE   8
236 #define PCRE16_MODE 16
237 #define PCRE32_MODE 32
238 
239 /* Processing returns */
240 
241 enum { PR_OK, PR_SKIP, PR_ABEND };
242 
243 /* The macro PRINTABLE determines whether to print an output character as-is or
244 as a hex value when showing compiled patterns. is We use it in cases when the
245 locale has not been explicitly changed, so as to get consistent output from
246 systems that differ in their output from isprint() even in the "C" locale. */
247 
248 #ifdef EBCDIC
249 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
250 #else
251 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
252 #endif
253 
254 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
255 
256 /* We have to include some of the library source files because we need
257 to use some of the macros, internal structure definitions, and other internal
258 values - pcre2test has "inside information" compared to an application program
259 that strictly follows the PCRE2 API.
260 
261 Before including pcre2_internal.h we define PRIV so that it does not get
262 defined therein. This ensures that PRIV names in the included files do not
263 clash with those in the libraries. Also, although pcre2_internal.h does itself
264 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
265 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
266 for building the library.
267 
268 Setting PCRE2_CODE_UNIT_WIDTH to zero cuts out all the width-specific settings
269 in pcre2.h and pcre2_internal.h. Defining PCRE2_BUILDING_PCRE2TEST cuts out the
270 check in pcre2_internal.h that ensures PCRE2_CODE_UNIT_WIDTH is 8, 16, or 32
271 (which it needs to be when compiling one of the libraries). */
272 
273 #define PRIV(name) name
274 #define PCRE2_CODE_UNIT_WIDTH 0
275 #define PCRE2_BUILDING_PCRE2TEST
276 #include "pcre2.h"
277 #include "pcre2posix.h"
278 #include "pcre2_internal.h"
279 
280 /* We need access to some of the data tables that PCRE2 uses. Defining
281 PCRE2_PCRE2TEST makes some minor changes in the files. The previous definition
282 of PRIV avoids name clashes. */
283 
284 #define PCRE2_PCRE2TEST
285 #include "pcre2_tables.c"
286 #include "pcre2_ucd.c"
287 
288 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
289 check needed for overflow depends on whether long ints are in fact longer than
290 ints. They are defined not to be shorter. */
291 
292 #if ULONG_MAX > UINT32_MAX
293 #define U32OVERFLOW(x) (x > UINT32_MAX)
294 #else
295 #define U32OVERFLOW(x) (x == UINT32_MAX)
296 #endif
297 
298 #if LONG_MAX > INT32_MAX
299 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
300 #else
301 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
302 #endif
303 
304 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
305 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
306 defined. We can now include it for each supported code unit width. Because
307 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
308 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
309 while including these files, and then restore it to a no-op. Because LINK_SIZE
310 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
311 these inclusions should not be changed. */
312 
313 #undef PCRE2_SUFFIX
314 #undef PCRE2_CODE_UNIT_WIDTH
315 
316 #ifdef   SUPPORT_PCRE2_8
317 #define  PCRE2_CODE_UNIT_WIDTH 8
318 #define  PCRE2_SUFFIX(a) G(a,8)
319 #include "pcre2_intmodedep.h"
320 #include "pcre2_printint.c"
321 #undef   PCRE2_CODE_UNIT_WIDTH
322 #undef   PCRE2_SUFFIX
323 #endif   /* SUPPORT_PCRE2_8 */
324 
325 #ifdef   SUPPORT_PCRE2_16
326 #define  PCRE2_CODE_UNIT_WIDTH 16
327 #define  PCRE2_SUFFIX(a) G(a,16)
328 #include "pcre2_intmodedep.h"
329 #include "pcre2_printint.c"
330 #undef   PCRE2_CODE_UNIT_WIDTH
331 #undef   PCRE2_SUFFIX
332 #endif   /* SUPPORT_PCRE2_16 */
333 
334 #ifdef   SUPPORT_PCRE2_32
335 #define  PCRE2_CODE_UNIT_WIDTH 32
336 #define  PCRE2_SUFFIX(a) G(a,32)
337 #include "pcre2_intmodedep.h"
338 #include "pcre2_printint.c"
339 #undef   PCRE2_CODE_UNIT_WIDTH
340 #undef   PCRE2_SUFFIX
341 #endif   /* SUPPORT_PCRE2_32 */
342 
343 #define PCRE2_SUFFIX(a) a
344 
345 #include "pcre2_chkdint.c"
346 
347 /* We need to be able to check input text for UTF-8 validity, whatever code
348 widths are actually available, because the input to pcre2test is always in
349 8-bit code units. So we include the UTF validity checking function for 8-bit
350 code units. */
351 
352 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
353 
354 #define  PCRE2_CODE_UNIT_WIDTH 8
355 #undef   PCRE2_SPTR
356 #define  PCRE2_SPTR PCRE2_SPTR8
357 #include "pcre2_valid_utf.c"
358 #undef   PCRE2_CODE_UNIT_WIDTH
359 #undef   PCRE2_SPTR
360 
361 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
362 support, it can be selected by a command-line option. If there is no 8-bit
363 support, there must be 16-bit or 32-bit support, so default to one of them. The
364 config function, JIT stack, contexts, and version string are the same in all
365 modes, so use the form of the first that is available. */
366 
367 #if defined SUPPORT_PCRE2_8
368 #define DEFAULT_TEST_MODE PCRE8_MODE
369 #define VERSION_TYPE PCRE2_UCHAR8
370 #define PCRE2_CONFIG pcre2_config_8
371 #define PCRE2_JIT_STACK pcre2_jit_stack_8
372 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
373 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
374 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
375 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
376 
377 #elif defined SUPPORT_PCRE2_16
378 #define DEFAULT_TEST_MODE PCRE16_MODE
379 #define VERSION_TYPE PCRE2_UCHAR16
380 #define PCRE2_CONFIG pcre2_config_16
381 #define PCRE2_JIT_STACK pcre2_jit_stack_16
382 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
383 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
384 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
385 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
386 
387 #elif defined SUPPORT_PCRE2_32
388 #define DEFAULT_TEST_MODE PCRE32_MODE
389 #define VERSION_TYPE PCRE2_UCHAR32
390 #define PCRE2_CONFIG pcre2_config_32
391 #define PCRE2_JIT_STACK pcre2_jit_stack_32
392 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
393 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
394 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
395 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
396 #endif
397 
398 /* ------------- Structure and table for handling #-commands ------------- */
399 
400 typedef struct cmdstruct {
401   const char *name;
402   int  value;
403 } cmdstruct;
404 
405 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
406   CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
407   CMD_UNKNOWN };
408 
409 static cmdstruct cmdlist[] = {
410   { "forbid_utf",      CMD_FORBID_UTF },
411   { "load",            CMD_LOAD },
412   { "loadtables",      CMD_LOADTABLES },
413   { "newline_default", CMD_NEWLINE_DEFAULT },
414   { "pattern",         CMD_PATTERN },
415   { "perltest",        CMD_PERLTEST },
416   { "pop",             CMD_POP },
417   { "popcopy",         CMD_POPCOPY },
418   { "save",            CMD_SAVE },
419   { "subject",         CMD_SUBJECT }};
420 
421 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
422 
423 /* ------------- Structures and tables for handling modifiers -------------- */
424 
425 /* Table of names for newline types. Must be kept in step with the definitions
426 of PCRE2_NEWLINE_xx in pcre2.h. */
427 
428 static const char *newlines[] = {
429   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
430 
431 /* Structure and table for handling pattern conversion types. */
432 
433 typedef struct convertstruct {
434   const char *name;
435   uint32_t option;
436 } convertstruct;
437 
438 static convertstruct convertlist[] = {
439   { "glob",                   PCRE2_CONVERT_GLOB },
440   { "glob_no_starstar",       PCRE2_CONVERT_GLOB_NO_STARSTAR },
441   { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
442   { "posix_basic",            PCRE2_CONVERT_POSIX_BASIC },
443   { "posix_extended",         PCRE2_CONVERT_POSIX_EXTENDED },
444   { "unset",                  CONVERT_UNSET }};
445 
446 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
447 
448 /* Modifier types and applicability */
449 
450 enum { MOD_CTC,    /* Applies to a compile context */
451        MOD_CTM,    /* Applies to a match context */
452        MOD_PAT,    /* Applies to a pattern */
453        MOD_PATP,   /* Ditto, OK for Perl test */
454        MOD_DAT,    /* Applies to a data line */
455        MOD_DATP,   /* Ditto, OK for Perl test */
456        MOD_PD,     /* Applies to a pattern or a data line */
457        MOD_PDP,    /* As MOD_PD, OK for Perl test */
458        MOD_PND,    /* As MOD_PD, but not for a default pattern */
459        MOD_PNDP,   /* As MOD_PND, OK for Perl test */
460        MOD_CHR,    /* Is a single character */
461        MOD_CON,    /* Is a "convert" type/options list */
462        MOD_CTL,    /* Is a control bit */
463        MOD_BSR,    /* Is a BSR value */
464        MOD_IN2,    /* Is one or two unsigned integers */
465        MOD_INS,    /* Is a signed integer */
466        MOD_INT,    /* Is an unsigned integer */
467        MOD_IND,    /* Is an unsigned integer, but no value => default */
468        MOD_NL,     /* Is a newline value */
469        MOD_NN,     /* Is a number or a name; more than one may occur */
470        MOD_OPT,    /* Is an option bit */
471        MOD_SIZ,    /* Is a PCRE2_SIZE value */
472        MOD_STR };  /* Is a string */
473 
474 /* Control bits. Some apply to compiling, some to matching, but some can be set
475 either on a pattern or a data line, so they must all be distinct. There are now
476 so many of them that they are split into two fields. */
477 
478 #define CTL_AFTERTEXT                    0x00000001u
479 #define CTL_ALLAFTERTEXT                 0x00000002u
480 #define CTL_ALLCAPTURES                  0x00000004u
481 #define CTL_ALLUSEDTEXT                  0x00000008u
482 #define CTL_ALTGLOBAL                    0x00000010u
483 #define CTL_BINCODE                      0x00000020u
484 #define CTL_CALLOUT_CAPTURE              0x00000040u
485 #define CTL_CALLOUT_INFO                 0x00000080u
486 #define CTL_CALLOUT_NONE                 0x00000100u
487 #define CTL_DFA                          0x00000200u
488 #define CTL_EXPAND                       0x00000400u
489 #define CTL_FINDLIMITS                   0x00000800u
490 #define CTL_FINDLIMITS_NOHEAP            0x00001000u
491 #define CTL_FULLBINCODE                  0x00002000u
492 #define CTL_GETALL                       0x00004000u
493 #define CTL_GLOBAL                       0x00008000u
494 #define CTL_HEXPAT                       0x00010000u  /* Same word as USE_LENGTH */
495 #define CTL_INFO                         0x00020000u
496 #define CTL_JITFAST                      0x00040000u
497 #define CTL_JITVERIFY                    0x00080000u
498 #define CTL_MARK                         0x00100000u
499 #define CTL_MEMORY                       0x00200000u
500 #define CTL_NULLCONTEXT                  0x00400000u
501 #define CTL_POSIX                        0x00800000u
502 #define CTL_POSIX_NOSUB                  0x01000000u
503 #define CTL_PUSH                         0x02000000u  /* These three must be */
504 #define CTL_PUSHCOPY                     0x04000000u  /*   all in the same */
505 #define CTL_PUSHTABLESCOPY               0x08000000u  /*     word. */
506 #define CTL_STARTCHAR                    0x10000000u
507 #define CTL_USE_LENGTH                   0x20000000u  /* Same word as HEXPAT */
508 #define CTL_UTF8_INPUT                   0x40000000u
509 #define CTL_ZERO_TERMINATE               0x80000000u
510 
511 /* Combinations */
512 
513 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
514 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
515 #define CTL_ANYGLOB          (CTL_ALTGLOBAL|CTL_GLOBAL)
516 
517 /* Second control word */
518 
519 #define CTL2_SUBSTITUTE_CALLOUT          0x00000001u
520 #define CTL2_SUBSTITUTE_EXTENDED         0x00000002u
521 #define CTL2_SUBSTITUTE_LITERAL          0x00000004u
522 #define CTL2_SUBSTITUTE_MATCHED          0x00000008u
523 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH  0x00000010u
524 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
525 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET    0x00000040u
526 #define CTL2_SUBSTITUTE_UNSET_EMPTY      0x00000080u
527 #define CTL2_SUBJECT_LITERAL             0x00000100u
528 #define CTL2_CALLOUT_NO_WHERE            0x00000200u
529 #define CTL2_CALLOUT_EXTRA               0x00000400u
530 #define CTL2_ALLVECTOR                   0x00000800u
531 #define CTL2_NULL_PATTERN                0x00001000u
532 #define CTL2_NULL_SUBJECT                0x00002000u
533 #define CTL2_NULL_REPLACEMENT            0x00004000u
534 #define CTL2_FRAMESIZE                   0x00008000u
535 
536 #define CTL2_HEAPFRAMES_SIZE             0x20000000u  /* Informational */
537 #define CTL2_NL_SET                      0x40000000u  /* Informational */
538 #define CTL2_BSR_SET                     0x80000000u  /* Informational */
539 
540 /* These are the matching controls that may be set either on a pattern or on a
541 data line. They are copied from the pattern controls as initial settings for
542 data line controls. Note that CTL_MEMORY is not included here, because it does
543 different things in the two cases. */
544 
545 #define CTL_ALLPD  (CTL_AFTERTEXT|\
546                     CTL_ALLAFTERTEXT|\
547                     CTL_ALLCAPTURES|\
548                     CTL_ALLUSEDTEXT|\
549                     CTL_ALTGLOBAL|\
550                     CTL_GLOBAL|\
551                     CTL_MARK|\
552                     CTL_STARTCHAR|\
553                     CTL_UTF8_INPUT)
554 
555 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
556                     CTL2_SUBSTITUTE_EXTENDED|\
557                     CTL2_SUBSTITUTE_LITERAL|\
558                     CTL2_SUBSTITUTE_MATCHED|\
559                     CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
560                     CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
561                     CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
562                     CTL2_SUBSTITUTE_UNSET_EMPTY|\
563                     CTL2_ALLVECTOR|\
564                     CTL2_HEAPFRAMES_SIZE)
565 
566 /* Structures for holding modifier information for patterns and subject strings
567 (data). Fields containing modifiers that can be set either for a pattern or a
568 subject must be at the start and in the same order in both cases so that the
569 same offset in the big table below works for both. */
570 
571 typedef struct patctl {       /* Structure for pattern modifiers. */
572   uint32_t  options;          /* Must be in same position as datctl */
573   uint32_t  control;          /* Must be in same position as datctl */
574   uint32_t  control2;         /* Must be in same position as datctl */
575   uint32_t  jitstack;         /* Must be in same position as datctl */
576    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
577   uint32_t  substitute_skip;  /* Must be in same position as patctl */
578   uint32_t  substitute_stop;  /* Must be in same position as patctl */
579   uint32_t  jit;
580   uint32_t  stackguard_test;
581   uint32_t  tables_id;
582   uint32_t  convert_type;
583   uint32_t  convert_length;
584   uint32_t  convert_glob_escape;
585   uint32_t  convert_glob_separator;
586   uint32_t  regerror_buffsize;
587    uint8_t  locale[LOCALESIZE];
588 } patctl;
589 
590 #define MAXCPYGET 10
591 #define LENCPYGET 64
592 
593 typedef struct datctl {       /* Structure for data line modifiers. */
594   uint32_t  options;          /* Must be in same position as patctl */
595   uint32_t  control;          /* Must be in same position as patctl */
596   uint32_t  control2;         /* Must be in same position as patctl */
597   uint32_t  jitstack;         /* Must be in same position as patctl */
598    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
599   uint32_t  substitute_skip;  /* Must be in same position as patctl */
600   uint32_t  substitute_stop;  /* Must be in same position as patctl */
601   uint32_t  startend[2];
602   uint32_t  cerror[2];
603   uint32_t  cfail[2];
604    int32_t  callout_data;
605    int32_t  copy_numbers[MAXCPYGET];
606    int32_t  get_numbers[MAXCPYGET];
607   uint32_t  oveccount;
608   uint32_t  offset;
609   uint8_t   copy_names[LENCPYGET];
610   uint8_t   get_names[LENCPYGET];
611 } datctl;
612 
613 /* Ids for which context to modify. */
614 
615 enum { CTX_PAT,            /* Active pattern context */
616        CTX_POPPAT,         /* Ditto, for a popped pattern */
617        CTX_DEFPAT,         /* Default pattern context */
618        CTX_DAT,            /* Active data (match) context */
619        CTX_DEFDAT };       /* Default data (match) context */
620 
621 /* Macros to simplify the big table below. */
622 
623 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
624 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
625 #define PO(name) offsetof(patctl, name)
626 #define PD(name) PO(name)
627 #define DO(name) offsetof(datctl, name)
628 
629 /* Table of all long-form modifiers. Must be in collating sequence of modifier
630 name because it is searched by binary chop. */
631 
632 typedef struct modstruct {
633   const char   *name;
634   uint16_t      which;
635   uint16_t      type;
636   uint32_t      value;
637   PCRE2_SIZE    offset;
638 } modstruct;
639 
640 #define PCRE2_EXTRA_ASCII_ALL (PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| \
641   PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX)
642 
643 static modstruct modlist[] = {
644   { "aftertext",                   MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
645   { "allaftertext",                MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
646   { "allcaptures",                 MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
647   { "allow_empty_class",           MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
648   { "allow_lookaround_bsk",        MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) },
649   { "allow_surrogate_escapes",     MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
650   { "allusedtext",                 MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
651   { "allvector",                   MOD_PND,  MOD_CTL, CTL2_ALLVECTOR,             PO(control2) },
652   { "alt_bsux",                    MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
653   { "alt_circumflex",              MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
654   { "alt_verbnames",               MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
655   { "altglobal",                   MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
656   { "anchored",                    MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
657   { "ascii_all",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_ALL,      CO(extra_options) },
658   { "ascii_bsd",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSD,      CO(extra_options) },
659   { "ascii_bss",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSS,      CO(extra_options) },
660   { "ascii_bsw",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSW,      CO(extra_options) },
661   { "ascii_digit",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_DIGIT,    CO(extra_options) },
662   { "ascii_posix",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_POSIX,    CO(extra_options) },
663   { "auto_callout",                MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
664   { "bad_escape_is_literal",       MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
665   { "bincode",                     MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
666   { "bsr",                         MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
667   { "callout_capture",             MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
668   { "callout_data",                MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
669   { "callout_error",               MOD_DAT,  MOD_IN2, 0,                          DO(cerror) },
670   { "callout_extra",               MOD_DAT,  MOD_CTL, CTL2_CALLOUT_EXTRA,         DO(control2) },
671   { "callout_fail",                MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
672   { "callout_info",                MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
673   { "callout_no_where",            MOD_DAT,  MOD_CTL, CTL2_CALLOUT_NO_WHERE,      DO(control2) },
674   { "callout_none",                MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
675   { "caseless",                    MOD_PATP, MOD_OPT, PCRE2_CASELESS,             PO(options) },
676   { "caseless_restrict",           MOD_CTC,  MOD_OPT, PCRE2_EXTRA_CASELESS_RESTRICT, CO(extra_options) },
677   { "convert",                     MOD_PAT,  MOD_CON, 0,                          PO(convert_type) },
678   { "convert_glob_escape",         MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_escape) },
679   { "convert_glob_separator",      MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_separator) },
680   { "convert_length",              MOD_PAT,  MOD_INT, 0,                          PO(convert_length) },
681   { "copy",                        MOD_DAT,  MOD_NN,  DO(copy_numbers),           DO(copy_names) },
682   { "copy_matched_subject",        MOD_DAT,  MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
683   { "debug",                       MOD_PAT,  MOD_CTL, CTL_DEBUG,                  PO(control) },
684   { "depth_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },
685   { "dfa",                         MOD_DAT,  MOD_CTL, CTL_DFA,                    DO(control) },
686   { "dfa_restart",                 MOD_DAT,  MOD_OPT, PCRE2_DFA_RESTART,          DO(options) },
687   { "dfa_shortest",                MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,         DO(options) },
688   { "disable_recurseloop_check",   MOD_DAT,  MOD_OPT, PCRE2_DISABLE_RECURSELOOP_CHECK, DO(options) },
689   { "dollar_endonly",              MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,       PO(options) },
690   { "dotall",                      MOD_PATP, MOD_OPT, PCRE2_DOTALL,               PO(options) },
691   { "dupnames",                    MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,             PO(options) },
692   { "endanchored",                 MOD_PD,   MOD_OPT, PCRE2_ENDANCHORED,          PD(options) },
693   { "escaped_cr_is_lf",            MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
694   { "expand",                      MOD_PAT,  MOD_CTL, CTL_EXPAND,                 PO(control) },
695   { "extended",                    MOD_PATP, MOD_OPT, PCRE2_EXTENDED,             PO(options) },
696   { "extended_more",               MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE,        PO(options) },
697   { "extra_alt_bsux",              MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALT_BSUX,       CO(extra_options) },
698   { "find_limits",                 MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,             DO(control) },
699   { "find_limits_noheap",          MOD_DAT,  MOD_CTL, CTL_FINDLIMITS_NOHEAP,      DO(control) },
700   { "firstline",                   MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,            PO(options) },
701   { "framesize",                   MOD_PAT,  MOD_CTL, CTL2_FRAMESIZE,             PO(control2) },
702   { "fullbincode",                 MOD_PAT,  MOD_CTL, CTL_FULLBINCODE,            PO(control) },
703   { "get",                         MOD_DAT,  MOD_NN,  DO(get_numbers),            DO(get_names) },
704   { "getall",                      MOD_DAT,  MOD_CTL, CTL_GETALL,                 DO(control) },
705   { "global",                      MOD_PNDP, MOD_CTL, CTL_GLOBAL,                 PO(control) },
706   { "heap_limit",                  MOD_CTM,  MOD_INT, 0,                          MO(heap_limit) },
707   { "heapframes_size",             MOD_PND,  MOD_CTL, CTL2_HEAPFRAMES_SIZE,       PO(control2) },
708   { "hex",                         MOD_PAT,  MOD_CTL, CTL_HEXPAT,                 PO(control) },
709   { "info",                        MOD_PAT,  MOD_CTL, CTL_INFO,                   PO(control) },
710   { "jit",                         MOD_PAT,  MOD_IND, 7,                          PO(jit) },
711   { "jitfast",                     MOD_PAT,  MOD_CTL, CTL_JITFAST,                PO(control) },
712   { "jitstack",                    MOD_PNDP, MOD_INT, 0,                          PO(jitstack) },
713   { "jitverify",                   MOD_PAT,  MOD_CTL, CTL_JITVERIFY,              PO(control) },
714   { "literal",                     MOD_PAT,  MOD_OPT, PCRE2_LITERAL,              PO(options) },
715   { "locale",                      MOD_PAT,  MOD_STR, LOCALESIZE,                 PO(locale) },
716   { "mark",                        MOD_PNDP, MOD_CTL, CTL_MARK,                   PO(control) },
717   { "match_invalid_utf",           MOD_PAT,  MOD_OPT, PCRE2_MATCH_INVALID_UTF,    PO(options) },
718   { "match_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(match_limit) },
719   { "match_line",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_LINE,     CO(extra_options) },
720   { "match_unset_backref",         MOD_PAT,  MOD_OPT, PCRE2_MATCH_UNSET_BACKREF,  PO(options) },
721   { "match_word",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_WORD,     CO(extra_options) },
722   { "max_pattern_compiled_length", MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_compiled_length) },
723   { "max_pattern_length",          MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_length) },
724   { "max_varlookbehind",           MOD_CTC,  MOD_INT, 0,                          CO(max_varlookbehind) },
725   { "memory",                      MOD_PD,   MOD_CTL, CTL_MEMORY,                 PD(control) },
726   { "multiline",                   MOD_PATP, MOD_OPT, PCRE2_MULTILINE,            PO(options) },
727   { "never_backslash_c",           MOD_PAT,  MOD_OPT, PCRE2_NEVER_BACKSLASH_C,    PO(options) },
728   { "never_ucp",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UCP,            PO(options) },
729   { "never_utf",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UTF,            PO(options) },
730   { "newline",                     MOD_CTC,  MOD_NL,  0,                          CO(newline_convention) },
731   { "no_auto_capture",             MOD_PAT,  MOD_OPT, PCRE2_NO_AUTO_CAPTURE,      PO(options) },
732   { "no_auto_possess",             MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS,      PO(options) },
733   { "no_dotstar_anchor",           MOD_PAT,  MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR,    PO(options) },
734   { "no_jit",                      MOD_DATP, MOD_OPT, PCRE2_NO_JIT,               DO(options) },
735   { "no_start_optimize",           MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE,    PO(options) },
736   { "no_utf_check",                MOD_PD,   MOD_OPT, PCRE2_NO_UTF_CHECK,         PD(options) },
737   { "notbol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTBOL,               DO(options) },
738   { "notempty",                    MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY,             DO(options) },
739   { "notempty_atstart",            MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY_ATSTART,     DO(options) },
740   { "noteol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTEOL,               DO(options) },
741   { "null_context",                MOD_PD,   MOD_CTL, CTL_NULLCONTEXT,            PO(control) },
742   { "null_pattern",                MOD_PAT,  MOD_CTL, CTL2_NULL_PATTERN,          PO(control2) },
743   { "null_replacement",            MOD_DAT,  MOD_CTL, CTL2_NULL_REPLACEMENT,      DO(control2) },
744   { "null_subject",                MOD_DAT,  MOD_CTL, CTL2_NULL_SUBJECT,          DO(control2) },
745   { "offset",                      MOD_DAT,  MOD_INT, 0,                          DO(offset) },
746   { "offset_limit",                MOD_CTM,  MOD_SIZ, 0,                          MO(offset_limit)},
747   { "ovector",                     MOD_DAT,  MOD_INT, 0,                          DO(oveccount) },
748   { "parens_nest_limit",           MOD_CTC,  MOD_INT, 0,                          CO(parens_nest_limit) },
749   { "partial_hard",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
750   { "partial_soft",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
751   { "ph",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
752   { "posix",                       MOD_PAT,  MOD_CTL, CTL_POSIX,                  PO(control) },
753   { "posix_nosub",                 MOD_PAT,  MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB,  PO(control) },
754   { "posix_startend",              MOD_DAT,  MOD_IN2, 0,                          DO(startend) },
755   { "ps",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
756   { "push",                        MOD_PAT,  MOD_CTL, CTL_PUSH,                   PO(control) },
757   { "pushcopy",                    MOD_PAT,  MOD_CTL, CTL_PUSHCOPY,               PO(control) },
758   { "pushtablescopy",              MOD_PAT,  MOD_CTL, CTL_PUSHTABLESCOPY,         PO(control) },
759   { "recursion_limit",             MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },  /* Obsolete synonym */
760   { "regerror_buffsize",           MOD_PAT,  MOD_INT, 0,                          PO(regerror_buffsize) },
761   { "replace",                     MOD_PND,  MOD_STR, REPLACE_MODSIZE,            PO(replacement) },
762   { "stackguard",                  MOD_PAT,  MOD_INT, 0,                          PO(stackguard_test) },
763   { "startchar",                   MOD_PND,  MOD_CTL, CTL_STARTCHAR,              PO(control) },
764   { "startoffset",                 MOD_DAT,  MOD_INT, 0,                          DO(offset) },
765   { "subject_literal",             MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL,       PO(control2) },
766   { "substitute_callout",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_CALLOUT,    PO(control2) },
767   { "substitute_extended",         MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_EXTENDED,   PO(control2) },
768   { "substitute_literal",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_LITERAL,    PO(control2) },
769   { "substitute_matched",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_MATCHED,    PO(control2) },
770   { "substitute_overflow_length",  MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
771   { "substitute_replacement_only", MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
772   { "substitute_skip",             MOD_PND,  MOD_INT, 0,                          PO(substitute_skip) },
773   { "substitute_stop",             MOD_PND,  MOD_INT, 0,                          PO(substitute_stop) },
774   { "substitute_unknown_unset",    MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
775   { "substitute_unset_empty",      MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
776   { "tables",                      MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
777   { "ucp",                         MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
778   { "ungreedy",                    MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
779   { "use_length",                  MOD_PAT,  MOD_CTL, CTL_USE_LENGTH,             PO(control) },
780   { "use_offset_limit",            MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
781   { "utf",                         MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
782   { "utf8_input",                  MOD_PAT,  MOD_CTL, CTL_UTF8_INPUT,             PO(control) },
783   { "zero_terminate",              MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,         DO(control) }
784 };
785 
786 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
787 
788 /* Controls and options that are supported for use with the POSIX interface. */
789 
790 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
791   PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
792   PCRE2_UTF|PCRE2_UNGREEDY)
793 
794 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
795 
796 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
797   CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
798   CTL_POSIX_NOSUB|CTL_USE_LENGTH)
799 
800 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
801 
802 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
803   PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
804 
805 #define POSIX_SUPPORTED_MATCH_CONTROLS  (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
806 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT)
807 
808 /* Control bits that are not ignored with 'push'. */
809 
810 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
811   CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
812   CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY| \
813   CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
814 
815 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET| \
816   CTL2_HEAPFRAMES_SIZE|CTL2_FRAMESIZE|CTL2_NL_SET)
817 
818 /* Controls that apply only at compile time with 'push'. */
819 
820 #define PUSH_COMPILE_ONLY_CONTROLS   CTL_JITVERIFY
821 #define PUSH_COMPILE_ONLY_CONTROLS2  (0)
822 
823 /* Controls that are forbidden with #pop or #popcopy. */
824 
825 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
826   CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
827 
828 /* Pattern controls that are mutually exclusive. At present these are all in
829 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
830 CTL_POSIX, so it doesn't need its own entries. */
831 
832 static uint32_t exclusive_pat_controls[] = {
833   CTL_POSIX    | CTL_PUSH,
834   CTL_POSIX    | CTL_PUSHCOPY,
835   CTL_POSIX    | CTL_PUSHTABLESCOPY,
836   CTL_PUSH     | CTL_PUSHCOPY,
837   CTL_PUSH     | CTL_PUSHTABLESCOPY,
838   CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
839   CTL_EXPAND   | CTL_HEXPAT };
840 
841 /* Data controls that are mutually exclusive. At present these are all in the
842 first control word. */
843 
844 static uint32_t exclusive_dat_controls[] = {
845   CTL_ALLUSEDTEXT        | CTL_STARTCHAR,
846   CTL_FINDLIMITS         | CTL_NULLCONTEXT,
847   CTL_FINDLIMITS_NOHEAP  | CTL_NULLCONTEXT };
848 
849 /* Table of single-character abbreviated modifiers. The index field is
850 initialized to -1, but the first time the modifier is encountered, it is filled
851 in with the index of the full entry in modlist, to save repeated searching when
852 processing multiple test items. This short list is searched serially, so its
853 order does not matter. */
854 
855 typedef struct c1modstruct {
856   const char *fullname;
857   uint32_t    onechar;
858   int         index;
859 } c1modstruct;
860 
861 static c1modstruct c1modlist[] = {
862   { "bincode",           'B',           -1 },
863   { "info",              'I',           -1 },
864   { "ascii_all",         'a',           -1 },
865   { "global",            'g',           -1 },
866   { "caseless",          'i',           -1 },
867   { "multiline",         'm',           -1 },
868   { "no_auto_capture",   'n',           -1 },
869   { "caseless_restrict", 'r',           -1 },
870   { "dotall",            's',           -1 },
871   { "extended",          'x',           -1 }
872 };
873 
874 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
875 
876 /* Table of arguments for the -C command line option. Use macros to make the
877 table itself easier to read. */
878 
879 #if defined SUPPORT_PCRE2_8
880 #define SUPPORT_8 1
881 #endif
882 #if defined SUPPORT_PCRE2_16
883 #define SUPPORT_16 1
884 #endif
885 #if defined SUPPORT_PCRE2_32
886 #define SUPPORT_32 1
887 #endif
888 
889 #ifndef SUPPORT_8
890 #define SUPPORT_8 0
891 #endif
892 #ifndef SUPPORT_16
893 #define SUPPORT_16 0
894 #endif
895 #ifndef SUPPORT_32
896 #define SUPPORT_32 0
897 #endif
898 
899 #ifdef EBCDIC
900 #define SUPPORT_EBCDIC 1
901 #define EBCDIC_NL CHAR_LF
902 #else
903 #define SUPPORT_EBCDIC 0
904 #define EBCDIC_NL 0
905 #endif
906 
907 #ifdef NEVER_BACKSLASH_C
908 #define BACKSLASH_C 0
909 #else
910 #define BACKSLASH_C 1
911 #endif
912 
913 typedef struct coptstruct {
914   const char *name;
915   uint32_t    type;
916   uint32_t    value;
917 } coptstruct;
918 
919 enum { CONF_BSR,
920        CONF_FIX,
921        CONF_FIZ,
922        CONF_INT,
923        CONF_NL
924 };
925 
926 static coptstruct coptlist[] = {
927   { "backslash-C", CONF_FIX, BACKSLASH_C },
928   { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
929   { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
930   { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
931   { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
932   { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
933   { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
934   { "pcre2-16",    CONF_FIX, SUPPORT_16 },
935   { "pcre2-32",    CONF_FIX, SUPPORT_32 },
936   { "pcre2-8",     CONF_FIX, SUPPORT_8 },
937   { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
938 };
939 
940 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
941 
942 #undef SUPPORT_8
943 #undef SUPPORT_16
944 #undef SUPPORT_32
945 #undef SUPPORT_EBCDIC
946 
947 
948 /* ----------------------- Static variables ------------------------ */
949 
950 static FILE *infile;
951 static FILE *outfile;
952 
953 static const void *last_callout_mark;
954 static PCRE2_JIT_STACK *jit_stack = NULL;
955 static size_t jit_stack_size = 0;
956 
957 static BOOL first_callout;
958 static BOOL jit_was_used;
959 static BOOL restrict_for_perl_test = FALSE;
960 static BOOL show_memory = FALSE;
961 
962 static int jitrc;                             /* Return from JIT compile */
963 static int test_mode = DEFAULT_TEST_MODE;
964 static int timeit = 0;
965 static int timeitm = 0;
966 
967 clock_t total_compile_time = 0;
968 clock_t total_jit_compile_time = 0;
969 clock_t total_match_time = 0;
970 
971 static uint32_t code_unit_size;               /* Bytes */
972 static uint32_t dfa_matched;
973 static uint32_t forbid_utf = 0;
974 static uint32_t maxlookbehind;
975 static uint32_t max_oveccount;
976 static uint32_t callout_count;
977 static uint32_t maxcapcount;
978 
979 static uint16_t local_newline_default = 0;
980 
981 static VERSION_TYPE jittarget[VERSION_SIZE];
982 static VERSION_TYPE version[VERSION_SIZE];
983 static VERSION_TYPE uversion[VERSION_SIZE];
984 
985 static patctl def_patctl;
986 static patctl pat_patctl;
987 static datctl def_datctl;
988 static datctl dat_datctl;
989 
990 static void *patstack[PATSTACKSIZE];
991 static int patstacknext = 0;
992 
993 static void *malloclist[MALLOCLISTSIZE];
994 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
995 static uint32_t malloclistptr = 0;
996 
997 #ifdef SUPPORT_PCRE2_8
998 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
999 #endif
1000 
1001 static int *dfa_workspace = NULL;
1002 static const uint8_t *locale_tables = NULL;
1003 static const uint8_t *use_tables = NULL;
1004 static uint8_t locale_name[32];
1005 static uint8_t *tables3 = NULL;         /* For binary-loaded tables */
1006 static uint32_t loadtables_length = 0;
1007 
1008 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
1009 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
1010 buffer is where all input lines are read. Its size is the same as pbuffer8.
1011 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
1012 are actually compiled from pbuffer16 or pbuffer32. */
1013 
1014 static size_t    pbuffer8_size  = 50000;        /* Initial size, bytes */
1015 static uint8_t  *pbuffer8 = NULL;
1016 static uint8_t  *buffer = NULL;
1017 
1018 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
1019 is cast as needed. For long data lines it grows as necessary. */
1020 
1021 static size_t dbuffer_size = 1u << 14;    /* Initial size, bytes */
1022 static uint8_t *dbuffer = NULL;
1023 
1024 
1025 /* ---------------- Mode-dependent variables -------------------*/
1026 
1027 #ifdef SUPPORT_PCRE2_8
1028 static pcre2_code_8             *compiled_code8;
1029 static pcre2_general_context_8  *general_context8, *general_context_copy8;
1030 static pcre2_compile_context_8  *pat_context8, *default_pat_context8;
1031 static pcre2_convert_context_8  *con_context8, *default_con_context8;
1032 static pcre2_match_context_8    *dat_context8, *default_dat_context8;
1033 static pcre2_match_data_8       *match_data8;
1034 #endif
1035 
1036 #ifdef SUPPORT_PCRE2_16
1037 static pcre2_code_16            *compiled_code16;
1038 static pcre2_general_context_16 *general_context16, *general_context_copy16;
1039 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
1040 static pcre2_convert_context_16 *con_context16, *default_con_context16;
1041 static pcre2_match_context_16   *dat_context16, *default_dat_context16;
1042 static pcre2_match_data_16      *match_data16;
1043 static PCRE2_SIZE pbuffer16_size = 0;   /* Set only when needed */
1044 static uint16_t *pbuffer16 = NULL;
1045 #endif
1046 
1047 #ifdef SUPPORT_PCRE2_32
1048 static pcre2_code_32            *compiled_code32;
1049 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1050 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1051 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1052 static pcre2_match_context_32   *dat_context32, *default_dat_context32;
1053 static pcre2_match_data_32      *match_data32;
1054 static PCRE2_SIZE pbuffer32_size = 0;   /* Set only when needed */
1055 static uint32_t *pbuffer32 = NULL;
1056 #endif
1057 
1058 
1059 /* ---------------- Macros that work in all modes ----------------- */
1060 
1061 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1062 #define SET(x,y) SETOP(x,y,=)
1063 #define SETPLUS(x,y) SETOP(x,y,+=)
1064 #define strlen8(x) strlen((char *)x)
1065 
1066 
1067 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1068 
1069 /* Define macros for variables and functions that must be selected dynamically
1070 depending on the mode setting (8, 16, 32). These are dependent on which modes
1071 are supported. */
1072 
1073 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1074      defined (SUPPORT_PCRE2_32)) >= 2
1075 
1076 /* ----- All three modes supported ----- */
1077 
1078 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1079 
1080 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1081   (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1082 
1083 #define CASTVAR(t,x) ( \
1084   (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1085   (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1086 
1087 #define CODE_UNIT(a,b) ( \
1088   (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1089   (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1090   (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1091 
1092 #define CONCTXCPY(a,b) \
1093   if (test_mode == PCRE8_MODE) \
1094     memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1095   else if (test_mode == PCRE16_MODE) \
1096     memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1097   else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1098 
1099 #define CONVERT_COPY(a,b,c) \
1100   if (test_mode == PCRE8_MODE) \
1101     memcpy(G(a,8),(char *)b,c); \
1102   else if (test_mode == PCRE16_MODE) \
1103     memcpy(G(a,16),(char *)b,(c)*2); \
1104   else if (test_mode == PCRE32_MODE) \
1105     memcpy(G(a,32),(char *)b,(c)*4)
1106 
1107 #define DATCTXCPY(a,b) \
1108   if (test_mode == PCRE8_MODE) \
1109     memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1110   else if (test_mode == PCRE16_MODE) \
1111     memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1112   else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1113 
1114 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1115   (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1116 
1117 #define PATCTXCPY(a,b) \
1118   if (test_mode == PCRE8_MODE) \
1119     memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1120   else if (test_mode == PCRE16_MODE) \
1121     memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1122   else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1123 
1124 #define PCHARS(lv, p, offset, len, utf, f) \
1125   if (test_mode == PCRE32_MODE) \
1126     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1127   else if (test_mode == PCRE16_MODE) \
1128     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1129   else \
1130     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1131 
1132 #define PCHARSV(p, offset, len, utf, f) \
1133   if (test_mode == PCRE32_MODE) \
1134     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1135   else if (test_mode == PCRE16_MODE) \
1136     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1137   else \
1138     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1139 
1140 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1141   if (test_mode == PCRE8_MODE) \
1142      a = pcre2_callout_enumerate_8(compiled_code8, \
1143        (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1144   else if (test_mode == PCRE16_MODE) \
1145      a = pcre2_callout_enumerate_16(compiled_code16, \
1146        (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1147   else \
1148      a = pcre2_callout_enumerate_32(compiled_code32, \
1149        (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1150 
1151 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1152   if (test_mode == PCRE8_MODE) \
1153     G(a,8) = pcre2_code_copy_8(b); \
1154   else if (test_mode == PCRE16_MODE) \
1155     G(a,16) = pcre2_code_copy_16(b); \
1156   else \
1157     G(a,32) = pcre2_code_copy_32(b)
1158 
1159 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1160   if (test_mode == PCRE8_MODE) \
1161     a = (void *)pcre2_code_copy_8(G(b,8)); \
1162   else if (test_mode == PCRE16_MODE) \
1163     a = (void *)pcre2_code_copy_16(G(b,16)); \
1164   else \
1165     a = (void *)pcre2_code_copy_32(G(b,32))
1166 
1167 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1168   if (test_mode == PCRE8_MODE) \
1169     a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1170   else if (test_mode == PCRE16_MODE) \
1171     a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1172   else \
1173     a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1174 
1175 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1176   if (test_mode == PCRE8_MODE) \
1177     G(a,8) = pcre2_compile_8(b,c,d,e,f,g); \
1178   else if (test_mode == PCRE16_MODE) \
1179     G(a,16) = pcre2_compile_16(b,c,d,e,f,g); \
1180   else \
1181     G(a,32) = pcre2_compile_32(b,c,d,e,f,g)
1182 
1183 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1184   if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1185   else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1186   else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1187 
1188 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1189   if (test_mode == PCRE8_MODE) \
1190     a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1191   else if (test_mode == PCRE16_MODE) \
1192     a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1193   else \
1194     a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1195 
1196 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1197   if (test_mode == PCRE8_MODE) \
1198     r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1199   else if (test_mode == PCRE16_MODE) \
1200     r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1201   else \
1202     r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1203 
1204 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
1205   if (test_mode == PCRE8_MODE) \
1206     r = pcre2_get_match_data_heapframes_size_8(G(a,8)); \
1207   else if (test_mode == PCRE16_MODE) \
1208     r = pcre2_get_match_data_heapframes_size_16(G(a,16)); \
1209   else \
1210     r = pcre2_get_match_data_heapframes_size_32(G(a,32))
1211 
1212 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1213   if (test_mode == PCRE8_MODE) \
1214     a = pcre2_get_ovector_count_8(G(b,8)); \
1215   else if (test_mode == PCRE16_MODE) \
1216     a = pcre2_get_ovector_count_16(G(b,16)); \
1217   else \
1218     a = pcre2_get_ovector_count_32(G(b,32))
1219 
1220 #define PCRE2_GET_STARTCHAR(a,b) \
1221   if (test_mode == PCRE8_MODE) \
1222     a = pcre2_get_startchar_8(G(b,8)); \
1223   else if (test_mode == PCRE16_MODE) \
1224     a = pcre2_get_startchar_16(G(b,16)); \
1225   else \
1226     a = pcre2_get_startchar_32(G(b,32))
1227 
1228 #define PCRE2_JIT_COMPILE(r,a,b) \
1229   if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1230   else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1231   else r = pcre2_jit_compile_32(G(a,32),b)
1232 
1233 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1234   if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1235   else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1236   else pcre2_jit_free_unused_memory_32(G(a,32))
1237 
1238 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1239   if (test_mode == PCRE8_MODE) \
1240     a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1241   else if (test_mode == PCRE16_MODE) \
1242     a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1243   else \
1244     a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1245 
1246 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1247   if (test_mode == PCRE8_MODE) \
1248     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1249   else if (test_mode == PCRE16_MODE) \
1250     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1251   else \
1252     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1253 
1254 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1255   if (test_mode == PCRE8_MODE) \
1256     pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1257   else if (test_mode == PCRE16_MODE) \
1258     pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1259   else \
1260     pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1261 
1262 #define PCRE2_JIT_STACK_FREE(a) \
1263   if (test_mode == PCRE8_MODE) \
1264     pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1265   else if (test_mode == PCRE16_MODE) \
1266     pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1267   else \
1268     pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1269 
1270 #define PCRE2_MAKETABLES(a,c) \
1271   if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(G(c,8)); \
1272   else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(G(c,16)); \
1273   else a = pcre2_maketables_32(G(c,32))
1274 
1275 #define PCRE2_MAKETABLES_FREE(c,a) \
1276   if (test_mode == PCRE8_MODE) pcre2_maketables_free_8(G(c,8),a); \
1277   else if (test_mode == PCRE16_MODE) pcre2_maketables_free_16(G(c,16),a); \
1278   else pcre2_maketables_free_32(G(c,32),a)
1279 
1280 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1281   if (test_mode == PCRE8_MODE) \
1282     a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1283   else if (test_mode == PCRE16_MODE) \
1284     a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1285   else \
1286     a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1287 
1288 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1289   if (test_mode == PCRE8_MODE) \
1290     G(a,8) = pcre2_match_data_create_8(b,G(c,8)); \
1291   else if (test_mode == PCRE16_MODE) \
1292     G(a,16) = pcre2_match_data_create_16(b,G(c,16)); \
1293   else \
1294     G(a,32) = pcre2_match_data_create_32(b,G(c,32))
1295 
1296 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1297   if (test_mode == PCRE8_MODE) \
1298     G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8)); \
1299   else if (test_mode == PCRE16_MODE) \
1300     G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16)); \
1301   else \
1302     G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32))
1303 
1304 #define PCRE2_MATCH_DATA_FREE(a) \
1305   if (test_mode == PCRE8_MODE) \
1306     pcre2_match_data_free_8(G(a,8)); \
1307   else if (test_mode == PCRE16_MODE) \
1308     pcre2_match_data_free_16(G(a,16)); \
1309   else \
1310     pcre2_match_data_free_32(G(a,32))
1311 
1312 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1313   if (test_mode == PCRE8_MODE) \
1314     a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1315   else if (test_mode == PCRE16_MODE) \
1316     a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1317   else \
1318     a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1319 
1320 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1321   if (test_mode == PCRE8_MODE) \
1322     a = pcre2_pattern_info_8(G(b,8),c,d); \
1323   else if (test_mode == PCRE16_MODE) \
1324     a = pcre2_pattern_info_16(G(b,16),c,d); \
1325   else \
1326     a = pcre2_pattern_info_32(G(b,32),c,d)
1327 
1328 #define PCRE2_PRINTINT(a) \
1329   if (test_mode == PCRE8_MODE) \
1330     pcre2_printint_8(compiled_code8,outfile,a); \
1331   else if (test_mode == PCRE16_MODE) \
1332     pcre2_printint_16(compiled_code16,outfile,a); \
1333   else \
1334     pcre2_printint_32(compiled_code32,outfile,a)
1335 
1336 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1337   if (test_mode == PCRE8_MODE) \
1338     r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1339   else if (test_mode == PCRE16_MODE) \
1340     r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1341   else \
1342     r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1343 
1344 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1345   if (test_mode == PCRE8_MODE) \
1346     r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1347   else if (test_mode == PCRE16_MODE) \
1348     r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1349   else \
1350     r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1351 
1352 #define PCRE2_SERIALIZE_FREE(a) \
1353   if (test_mode == PCRE8_MODE) \
1354     pcre2_serialize_free_8(a); \
1355   else if (test_mode == PCRE16_MODE) \
1356     pcre2_serialize_free_16(a); \
1357   else \
1358     pcre2_serialize_free_32(a)
1359 
1360 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1361   if (test_mode == PCRE8_MODE) \
1362     r = pcre2_serialize_get_number_of_codes_8(a); \
1363   else if (test_mode == PCRE16_MODE) \
1364     r = pcre2_serialize_get_number_of_codes_16(a); \
1365   else \
1366     r = pcre2_serialize_get_number_of_codes_32(a); \
1367 
1368 #define PCRE2_SET_CALLOUT(a,b,c) \
1369   if (test_mode == PCRE8_MODE) \
1370     pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1371   else if (test_mode == PCRE16_MODE) \
1372     pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1373   else \
1374     pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1375 
1376 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1377   if (test_mode == PCRE8_MODE) \
1378     pcre2_set_character_tables_8(G(a,8),b); \
1379   else if (test_mode == PCRE16_MODE) \
1380     pcre2_set_character_tables_16(G(a,16),b); \
1381   else \
1382     pcre2_set_character_tables_32(G(a,32),b)
1383 
1384 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1385   if (test_mode == PCRE8_MODE) \
1386     pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1387   else if (test_mode == PCRE16_MODE) \
1388     pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1389   else \
1390     pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1391 
1392 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1393   if (test_mode == PCRE8_MODE) \
1394     pcre2_set_depth_limit_8(G(a,8),b); \
1395   else if (test_mode == PCRE16_MODE) \
1396     pcre2_set_depth_limit_16(G(a,16),b); \
1397   else \
1398     pcre2_set_depth_limit_32(G(a,32),b)
1399 
1400 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1401   if (test_mode == PCRE8_MODE) \
1402     r = pcre2_set_glob_separator_8(G(a,8),b); \
1403   else if (test_mode == PCRE16_MODE) \
1404     r = pcre2_set_glob_separator_16(G(a,16),b); \
1405   else \
1406     r = pcre2_set_glob_separator_32(G(a,32),b)
1407 
1408 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1409   if (test_mode == PCRE8_MODE) \
1410     r = pcre2_set_glob_escape_8(G(a,8),b); \
1411   else if (test_mode == PCRE16_MODE) \
1412     r = pcre2_set_glob_escape_16(G(a,16),b); \
1413   else \
1414     r = pcre2_set_glob_escape_32(G(a,32),b)
1415 
1416 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1417   if (test_mode == PCRE8_MODE) \
1418     pcre2_set_heap_limit_8(G(a,8),b); \
1419   else if (test_mode == PCRE16_MODE) \
1420     pcre2_set_heap_limit_16(G(a,16),b); \
1421   else \
1422     pcre2_set_heap_limit_32(G(a,32),b)
1423 
1424 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1425   if (test_mode == PCRE8_MODE) \
1426     pcre2_set_match_limit_8(G(a,8),b); \
1427   else if (test_mode == PCRE16_MODE) \
1428     pcre2_set_match_limit_16(G(a,16),b); \
1429   else \
1430     pcre2_set_match_limit_32(G(a,32),b)
1431 
1432 #define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) \
1433   if (test_mode == PCRE8_MODE) \
1434     pcre2_set_max_pattern_compiled_length_8(G(a,8),b); \
1435   else if (test_mode == PCRE16_MODE) \
1436     pcre2_set_max_pattern_compiled_length_16(G(a,16),b); \
1437   else \
1438     pcre2_set_max_pattern_compiled_length_32(G(a,32),b)
1439 
1440 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1441   if (test_mode == PCRE8_MODE) \
1442     pcre2_set_max_pattern_length_8(G(a,8),b); \
1443   else if (test_mode == PCRE16_MODE) \
1444     pcre2_set_max_pattern_length_16(G(a,16),b); \
1445   else \
1446     pcre2_set_max_pattern_length_32(G(a,32),b)
1447 
1448 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) \
1449   if (test_mode == PCRE8_MODE) \
1450     pcre2_set_max_varlookbehind_8(G(a,8),b); \
1451   else if (test_mode == PCRE16_MODE) \
1452     pcre2_set_max_varlookbehind_16(G(a,16),b); \
1453   else \
1454     pcre2_set_max_varlookbehind_32(G(a,32),b)
1455 
1456 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1457   if (test_mode == PCRE8_MODE) \
1458     pcre2_set_offset_limit_8(G(a,8),b); \
1459   else if (test_mode == PCRE16_MODE) \
1460     pcre2_set_offset_limit_16(G(a,16),b); \
1461   else \
1462     pcre2_set_offset_limit_32(G(a,32),b)
1463 
1464 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1465   if (test_mode == PCRE8_MODE) \
1466     pcre2_set_parens_nest_limit_8(G(a,8),b); \
1467   else if (test_mode == PCRE16_MODE) \
1468     pcre2_set_parens_nest_limit_16(G(a,16),b); \
1469   else \
1470     pcre2_set_parens_nest_limit_32(G(a,32),b)
1471 
1472 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1473   if (test_mode == PCRE8_MODE) \
1474     pcre2_set_substitute_callout_8(G(a,8), \
1475       (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1476   else if (test_mode == PCRE16_MODE) \
1477     pcre2_set_substitute_callout_16(G(a,16), \
1478       (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1479   else \
1480     pcre2_set_substitute_callout_32(G(a,32), \
1481       (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1482 
1483 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1484   if (test_mode == PCRE8_MODE) \
1485     a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1486       (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1487   else if (test_mode == PCRE16_MODE) \
1488     a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1489       (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1490   else \
1491     a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1492       (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1493 
1494 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1495   if (test_mode == PCRE8_MODE) \
1496     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1497   else if (test_mode == PCRE16_MODE) \
1498     a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1499   else \
1500     a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1501 
1502 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1503   if (test_mode == PCRE8_MODE) \
1504     a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1505   else if (test_mode == PCRE16_MODE) \
1506     a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1507   else \
1508     a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1509 
1510 #define PCRE2_SUBSTRING_FREE(a) \
1511   if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1512   else if (test_mode == PCRE16_MODE) \
1513     pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1514   else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1515 
1516 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1517   if (test_mode == PCRE8_MODE) \
1518     a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1519   else if (test_mode == PCRE16_MODE) \
1520     a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1521   else \
1522     a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1523 
1524 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1525   if (test_mode == PCRE8_MODE) \
1526     a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1527   else if (test_mode == PCRE16_MODE) \
1528     a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1529   else \
1530     a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1531 
1532 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1533   if (test_mode == PCRE8_MODE) \
1534     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1535   else if (test_mode == PCRE16_MODE) \
1536     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1537   else \
1538     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1539 
1540 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1541   if (test_mode == PCRE8_MODE) \
1542     a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1543   else if (test_mode == PCRE16_MODE) \
1544     a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1545   else \
1546     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1547 
1548 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1549   if (test_mode == PCRE8_MODE) \
1550     a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1551   else if (test_mode == PCRE16_MODE) \
1552     a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1553   else \
1554     a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1555 
1556 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1557   if (test_mode == PCRE8_MODE) \
1558     pcre2_substring_list_free_8((PCRE2_UCHAR8 **)a); \
1559   else if (test_mode == PCRE16_MODE) \
1560     pcre2_substring_list_free_16((PCRE2_UCHAR16 **)a); \
1561   else \
1562     pcre2_substring_list_free_32((PCRE2_UCHAR32 **)a)
1563 
1564 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1565   if (test_mode == PCRE8_MODE) \
1566     a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1567   else if (test_mode == PCRE16_MODE) \
1568     a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1569   else \
1570     a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1571 
1572 #define PTR(x) ( \
1573   (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1574   (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1575   (void *)G(x,32))
1576 
1577 #define SETFLD(x,y,z) \
1578   if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1579   else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1580   else G(x,32)->y = z
1581 
1582 #define SETFLDVEC(x,y,v,z) \
1583   if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1584   else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1585   else G(x,32)->y[v] = z
1586 
1587 #define SETOP(x,y,z) \
1588   if (test_mode == PCRE8_MODE) G(x,8) z y; \
1589   else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1590   else G(x,32) z y
1591 
1592 #define SETCASTPTR(x,y) \
1593   if (test_mode == PCRE8_MODE) \
1594     G(x,8) = (uint8_t *)(y); \
1595   else if (test_mode == PCRE16_MODE) \
1596     G(x,16) = (uint16_t *)(y); \
1597   else \
1598     G(x,32) = (uint32_t *)(y)
1599 
1600 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1601   (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1602   ((int)strlen32((PCRE2_SPTR32)p)))
1603 
1604 #define SUB1(a,b) \
1605   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1606   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1607   else G(a,32)(G(b,32))
1608 
1609 #define SUB2(a,b,c) \
1610   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1611   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1612   else G(a,32)(G(b,32),G(c,32))
1613 
1614 #define TEST(x,r,y) ( \
1615   (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1616   (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1617   (test_mode == PCRE32_MODE && G(x,32) r (y)))
1618 
1619 #define TESTFLD(x,f,r,y) ( \
1620   (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1621   (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1622   (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1623 
1624 
1625 /* ----- Two out of three modes are supported ----- */
1626 
1627 #else
1628 
1629 /* We can use some macro trickery to make a single set of definitions work in
1630 the three different cases. */
1631 
1632 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1633 
1634 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1635 #define BITONE 32
1636 #define BITTWO 16
1637 
1638 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1639 
1640 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1641 #define BITONE 32
1642 #define BITTWO 8
1643 
1644 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1645 
1646 #else
1647 #define BITONE 16
1648 #define BITTWO 8
1649 #endif
1650 
1651 
1652 /* ----- Common macros for two-mode cases ----- */
1653 
1654 #define BYTEONE (BITONE/8)
1655 #define BYTETWO (BITTWO/8)
1656 
1657 #define CASTFLD(t,a,b) \
1658   ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1659     (t)(G(a,BITTWO)->b))
1660 
1661 #define CASTVAR(t,x) ( \
1662   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1663     (t)G(x,BITONE) : (t)G(x,BITTWO))
1664 
1665 #define CODE_UNIT(a,b) ( \
1666   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1667   (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1668   (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1669 
1670 #define CONCTXCPY(a,b) \
1671   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1672     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1673   else \
1674     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1675 
1676 #define CONVERT_COPY(a,b,c) \
1677   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1678   memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1679   memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1680 
1681 #define DATCTXCPY(a,b) \
1682   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1683     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1684   else \
1685     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1686 
1687 #define FLD(a,b) \
1688   ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1689 
1690 #define PATCTXCPY(a,b) \
1691   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1692     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1693   else \
1694     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1695 
1696 #define PCHARS(lv, p, offset, len, utf, f) \
1697   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1698     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1699   else \
1700     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1701 
1702 #define PCHARSV(p, offset, len, utf, f) \
1703   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1704     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1705   else \
1706     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1707 
1708 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1709   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1710      a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1711        (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1712   else \
1713      a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1714        (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1715 
1716 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1717   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1718     G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1719   else \
1720     G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1721 
1722 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1723   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1724     a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1725   else \
1726     a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1727 
1728 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1729   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1730     a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1731   else \
1732     a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1733 
1734 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1735   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1736     G(a,BITONE) = G(pcre2_compile_,BITONE)(b,c,d,e,f,g); \
1737   else \
1738     G(a,BITTWO) = G(pcre2_compile_,BITTWO)(b,c,d,e,f,g)
1739 
1740 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1741   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1742     G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1743   else \
1744     G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1745 
1746 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1747   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1748     a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1749       G(g,BITONE),h,i,j); \
1750   else \
1751     a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1752       G(g,BITTWO),h,i,j)
1753 
1754 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1755   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1756     r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1757   else \
1758     r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1759 
1760 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
1761   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1762     r = G(pcre2_get_match_data_heapframes_size_,BITONE)(G(a,BITONE)); \
1763   else \
1764     r = G(pcre2_get_match_data_heapframes_size_,BITTWO)(G(a,BITTWO))
1765 
1766 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1767   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1768     a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1769   else \
1770     a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1771 
1772 #define PCRE2_GET_STARTCHAR(a,b) \
1773   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1774     a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1775   else \
1776     a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1777 
1778 #define PCRE2_JIT_COMPILE(r,a,b) \
1779   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1780     r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1781   else \
1782     r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1783 
1784 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1785   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1786     G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1787   else \
1788     G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1789 
1790 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1791   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1792     a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1793       G(g,BITONE),h); \
1794   else \
1795     a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1796       G(g,BITTWO),h)
1797 
1798 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1799   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1800     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1801   else \
1802     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1803 
1804 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1805   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1806     G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1807   else \
1808     G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1809 
1810 #define PCRE2_JIT_STACK_FREE(a) \
1811   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1812     G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1813   else \
1814     G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1815 
1816 #define PCRE2_MAKETABLES(a,c) \
1817   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1818     a = G(pcre2_maketables_,BITONE)(G(c,BITONE)); \
1819   else \
1820     a = G(pcre2_maketables_,BITTWO)(G(c,BITTWO))
1821 
1822 #define PCRE2_MAKETABLES_FREE(c,a) \
1823   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1824     G(pcre2_maketables_free_,BITONE)(G(c,BITONE),a); \
1825   else \
1826     G(pcre2_maketables_free_,BITTWO)(G(c,BITTWO),a)
1827 
1828 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1829   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1830     a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1831       G(g,BITONE),h); \
1832   else \
1833     a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1834       G(g,BITTWO),h)
1835 
1836 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1837   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1838     G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,G(c,BITONE)); \
1839   else \
1840     G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,G(c,BITTWO))
1841 
1842 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1843   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1844     G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1845   else \
1846     G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1847 
1848 #define PCRE2_MATCH_DATA_FREE(a) \
1849   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1850     G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1851   else \
1852     G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1853 
1854 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1855   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1856     a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1857   else \
1858     a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1859 
1860 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1861   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1862     a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1863   else \
1864     a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1865 
1866 #define PCRE2_PRINTINT(a) \
1867  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1868     G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1869   else \
1870     G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1871 
1872 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1873  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1874     r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1875   else \
1876     r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1877 
1878 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1879  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1880     r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1881   else \
1882     r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1883 
1884 #define PCRE2_SERIALIZE_FREE(a) \
1885  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1886     G(pcre2_serialize_free_,BITONE)(a); \
1887   else \
1888     G(pcre2_serialize_free_,BITTWO)(a)
1889 
1890 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1891  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1892     r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1893   else \
1894     r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1895 
1896 #define PCRE2_SET_CALLOUT(a,b,c) \
1897   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1898     G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1899       (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1900   else \
1901     G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1902       (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1903 
1904 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1905   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1906     G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1907   else \
1908     G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1909 
1910 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1911   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1912     G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1913   else \
1914     G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1915 
1916 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1917   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1918     G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1919   else \
1920     G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1921 
1922 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1923   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1924     r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1925   else \
1926     r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1927 
1928 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1929   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1930     r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1931   else \
1932     r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1933 
1934 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1935   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1936     G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1937   else \
1938     G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1939 
1940 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1941   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1942     G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1943   else \
1944     G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1945 
1946 #define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) \
1947   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1948     G(pcre2_set_max_pattern_compiled_length_,BITONE)(G(a,BITONE),b); \
1949   else \
1950     G(pcre2_set_max_pattern_compiled_length_,BITTWO)(G(a,BITTWO),b)
1951 
1952 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1953   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1954     G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1955   else \
1956     G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1957 
1958 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) \
1959   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1960     G(pcre2_set_max_varlookbehind_,BITONE)(G(a,BITONE),b); \
1961   else \
1962     G(pcre2_set_max_varlookbehind_,BITTWO)(G(a,BITTWO),b)
1963 
1964 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1965   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1966     G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1967   else \
1968     G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1969 
1970 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1971   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1972     G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1973   else \
1974     G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1975 
1976 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1977   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1978     G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1979       (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1980   else \
1981     G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1982       (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1983 
1984 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1985   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1986     a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1987       G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1988       (G(PCRE2_UCHAR,BITONE) *)k,l); \
1989   else \
1990     a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1991       G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1992       (G(PCRE2_UCHAR,BITTWO) *)k,l)
1993 
1994 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1995   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1996     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1997       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1998   else \
1999     a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
2000       (G(PCRE2_UCHAR,BITTWO) *)d,e)
2001 
2002 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2003   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2004     a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
2005       (G(PCRE2_UCHAR,BITONE) *)d,e); \
2006   else \
2007     a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
2008       (G(PCRE2_UCHAR,BITTWO) *)d,e)
2009 
2010 #define PCRE2_SUBSTRING_FREE(a) \
2011   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2012     G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
2013   else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
2014 
2015 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2016   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2017     a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
2018       (G(PCRE2_UCHAR,BITONE) **)d,e); \
2019   else \
2020     a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
2021       (G(PCRE2_UCHAR,BITTWO) **)d,e)
2022 
2023 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2024   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2025     a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
2026       (G(PCRE2_UCHAR,BITONE) **)d,e); \
2027   else \
2028     a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
2029       (G(PCRE2_UCHAR,BITTWO) **)d,e)
2030 
2031 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2032   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2033     a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
2034   else \
2035     a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
2036 
2037 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2038   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2039     a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
2040   else \
2041     a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
2042 
2043 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2044   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2045     a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
2046       (G(PCRE2_UCHAR,BITONE) ***)c,d); \
2047   else \
2048     a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
2049       (G(PCRE2_UCHAR,BITTWO) ***)c,d)
2050 
2051 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2052   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2053     G(pcre2_substring_list_free_,BITONE)((G(PCRE2_UCHAR,BITONE) **)a); \
2054   else \
2055     G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) **)a)
2056 
2057 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2058   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2059     a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
2060   else \
2061     a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
2062 
2063 #define PTR(x) ( \
2064   (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
2065   (void *)G(x,BITTWO))
2066 
2067 #define SETFLD(x,y,z) \
2068   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
2069   else G(x,BITTWO)->y = z
2070 
2071 #define SETFLDVEC(x,y,v,z) \
2072   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
2073   else G(x,BITTWO)->y[v] = z
2074 
2075 #define SETOP(x,y,z) \
2076   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
2077   else G(x,BITTWO) z y
2078 
2079 #define SETCASTPTR(x,y) \
2080   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2081     G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
2082   else \
2083     G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
2084 
2085 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
2086   G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
2087   G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
2088 
2089 #define SUB1(a,b) \
2090   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2091     G(a,BITONE)(G(b,BITONE)); \
2092   else \
2093     G(a,BITTWO)(G(b,BITTWO))
2094 
2095 #define SUB2(a,b,c) \
2096   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2097     G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2098   else \
2099     G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2100 
2101 #define TEST(x,r,y) ( \
2102   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2103   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2104 
2105 #define TESTFLD(x,f,r,y) ( \
2106   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2107   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2108 
2109 
2110 #endif  /* Two out of three modes */
2111 
2112 /* ----- End of cases where more than one mode is supported ----- */
2113 
2114 
2115 /* ----- Only 8-bit mode is supported ----- */
2116 
2117 #elif defined SUPPORT_PCRE2_8
2118 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2119 #define CASTVAR(t,x) (t)G(x,8)
2120 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2121 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2122 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2123 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2124 #define FLD(a,b) G(a,8)->b
2125 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2126 #define PCHARS(lv, p, offset, len, utf, f) \
2127   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2128 #define PCHARSV(p, offset, len, utf, f) \
2129   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2130 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2131    a = pcre2_callout_enumerate_8(compiled_code8, \
2132      (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2133 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2134 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2135 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2136 #define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,8) = pcre2_compile_8(b,c,d,e,f,g)
2137 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2138   pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2139 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2140   a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2141 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2142   r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2143 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
2144   r = pcre2_get_match_data_heapframes_size_8(G(a,8))
2145 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2146 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2147 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2148 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2149 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2150   a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2151 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2152   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2153 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2154   pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2155 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2156 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_8(G(c,8))
2157 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_8(G(c,8),a)
2158 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2159   a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2160 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,G(c,8))
2161 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2162   G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8))
2163 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2164 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2165 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2166 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2167 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2168   r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2169 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2170   r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2171 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2172 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2173   r = pcre2_serialize_get_number_of_codes_8(a)
2174 #define PCRE2_SET_CALLOUT(a,b,c) \
2175   pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2176 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2177 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2178   pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2179 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2180 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2181 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2182 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2183 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2184 #define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) pcre2_set_max_pattern_compiled_length_8(G(a,8),b)
2185 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2186 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_8(G(a,8),b)
2187 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2188 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2189 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2190   pcre2_set_substitute_callout_8(G(a,8), \
2191     (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2192 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2193   a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2194     (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2195 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2196   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2197 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2198   a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2199 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2200 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2201   a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2202 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2203   a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2204 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2205     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2206 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2207     a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2208 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2209   a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2210 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2211   pcre2_substring_list_free_8((PCRE2_UCHAR8 **)a)
2212 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2213   a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2214 #define PTR(x) (void *)G(x,8)
2215 #define SETFLD(x,y,z) G(x,8)->y = z
2216 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2217 #define SETOP(x,y,z) G(x,8) z y
2218 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2219 #define STRLEN(p) (int)strlen((char *)p)
2220 #define SUB1(a,b) G(a,8)(G(b,8))
2221 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2222 #define TEST(x,r,y) (G(x,8) r (y))
2223 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2224 
2225 
2226 /* ----- Only 16-bit mode is supported ----- */
2227 
2228 #elif defined SUPPORT_PCRE2_16
2229 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2230 #define CASTVAR(t,x) (t)G(x,16)
2231 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2232 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2233 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2234 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2235 #define FLD(a,b) G(a,16)->b
2236 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2237 #define PCHARS(lv, p, offset, len, utf, f) \
2238   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2239 #define PCHARSV(p, offset, len, utf, f) \
2240   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2241 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2242    a = pcre2_callout_enumerate_16(compiled_code16, \
2243      (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2244 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2245 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2246 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2247 #define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,16) = pcre2_compile_16(b,c,d,e,f,g)
2248 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2249   pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2250 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2251   a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2252 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2253   r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2254 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2255 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
2256   r = pcre2_get_match_data_heapframes_size_16(G(a,16))
2257 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2258 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2259 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2260 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2261   a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2262 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2263   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2264 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2265   pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2266 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2267 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_16(G(c,16))
2268 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_16(G(c,16),a)
2269 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2270   a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2271 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,G(c,16))
2272 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2273   G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16))
2274 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2275 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2276 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2277 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2278 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2279   r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2280 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2281   r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2282 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2283 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2284   r = pcre2_serialize_get_number_of_codes_16(a)
2285 #define PCRE2_SET_CALLOUT(a,b,c) \
2286   pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2287 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2288 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2289   pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2290 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2291 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2292 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2293 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2294 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2295 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_16(G(a,16),b)
2296 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2297 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2298 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2299   pcre2_set_substitute_callout_16(G(a,16), \
2300     (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2301 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2302   a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2303     (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2304 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2305   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2306 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2307   a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2308 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2309 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2310   a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2311 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2312   a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2313 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2314     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2315 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2316     a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2317 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2318   a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2319 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2320   pcre2_substring_list_free_16((PCRE2_UCHAR16 **)a)
2321 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2322   a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2323 #define PTR(x) (void *)G(x,16)
2324 #define SETFLD(x,y,z) G(x,16)->y = z
2325 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2326 #define SETOP(x,y,z) G(x,16) z y
2327 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2328 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2329 #define SUB1(a,b) G(a,16)(G(b,16))
2330 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2331 #define TEST(x,r,y) (G(x,16) r (y))
2332 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2333 
2334 
2335 /* ----- Only 32-bit mode is supported ----- */
2336 
2337 #elif defined SUPPORT_PCRE2_32
2338 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2339 #define CASTVAR(t,x) (t)G(x,32)
2340 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2341 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2342 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2343 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2344 #define FLD(a,b) G(a,32)->b
2345 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2346 #define PCHARS(lv, p, offset, len, utf, f) \
2347   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2348 #define PCHARSV(p, offset, len, utf, f) \
2349   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2350 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2351    a = pcre2_callout_enumerate_32(compiled_code32, \
2352      (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2353 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2354 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2355 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2356 #define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,32) = pcre2_compile_32(b,c,d,e,f,g)
2357 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2358   pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2359 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2360   a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2361 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2362   r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2363 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2364 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
2365   r = pcre2_get_match_data_heapframes_size_32(G(a,32))
2366 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2367 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2368 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2369 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2370   a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2371 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2372   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2373 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2374   pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2375 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2376 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_32(G(c,32))
2377 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_32(G(c,32),a)
2378 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2379   a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2380 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,G(c,32))
2381 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2382   G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32))
2383 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2384 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2385 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2386 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2387 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2388   r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2389 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2390   r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2391 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2392 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2393   r = pcre2_serialize_get_number_of_codes_32(a)
2394 #define PCRE2_SET_CALLOUT(a,b,c) \
2395   pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2396 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2397 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2398   pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2399 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2400 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2401 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2402 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2403 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2404 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_32(G(a,32),b)
2405 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2406 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2407 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2408   pcre2_set_substitute_callout_32(G(a,32), \
2409     (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2410 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2411   a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2412     (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2413 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2414   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2415 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2416   a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2417 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2418 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2419   a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2420 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2421   a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2422 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2423     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2424 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2425     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2426 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2427   a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2428 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2429   pcre2_substring_list_free_32((PCRE2_UCHAR32 **)a)
2430 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2431   a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2432 #define PTR(x) (void *)G(x,32)
2433 #define SETFLD(x,y,z) G(x,32)->y = z
2434 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2435 #define SETOP(x,y,z) G(x,32) z y
2436 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2437 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2438 #define SUB1(a,b) G(a,32)(G(b,32))
2439 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2440 #define TEST(x,r,y) (G(x,32) r (y))
2441 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2442 
2443 #endif
2444 
2445 /* ----- End of mode-specific function call macros ----- */
2446 
2447 
2448 
2449 
2450 /*************************************************
2451 *         Alternate character tables             *
2452 *************************************************/
2453 
2454 /* By default, the "tables" pointer in the compile context when calling
2455 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2456 library. However, the tables modifier can be used to select alternate sets of
2457 tables, for different kinds of testing. Note that the locale modifier also
2458 adjusts the tables. */
2459 
2460 /* This is the set of tables distributed as default with PCRE2. It recognizes
2461 only ASCII characters. */
2462 
2463 static const uint8_t tables1[] = {
2464 
2465 /* This table is a lower casing table. */
2466 
2467     0,  1,  2,  3,  4,  5,  6,  7,
2468     8,  9, 10, 11, 12, 13, 14, 15,
2469    16, 17, 18, 19, 20, 21, 22, 23,
2470    24, 25, 26, 27, 28, 29, 30, 31,
2471    32, 33, 34, 35, 36, 37, 38, 39,
2472    40, 41, 42, 43, 44, 45, 46, 47,
2473    48, 49, 50, 51, 52, 53, 54, 55,
2474    56, 57, 58, 59, 60, 61, 62, 63,
2475    64, 97, 98, 99,100,101,102,103,
2476   104,105,106,107,108,109,110,111,
2477   112,113,114,115,116,117,118,119,
2478   120,121,122, 91, 92, 93, 94, 95,
2479    96, 97, 98, 99,100,101,102,103,
2480   104,105,106,107,108,109,110,111,
2481   112,113,114,115,116,117,118,119,
2482   120,121,122,123,124,125,126,127,
2483   128,129,130,131,132,133,134,135,
2484   136,137,138,139,140,141,142,143,
2485   144,145,146,147,148,149,150,151,
2486   152,153,154,155,156,157,158,159,
2487   160,161,162,163,164,165,166,167,
2488   168,169,170,171,172,173,174,175,
2489   176,177,178,179,180,181,182,183,
2490   184,185,186,187,188,189,190,191,
2491   192,193,194,195,196,197,198,199,
2492   200,201,202,203,204,205,206,207,
2493   208,209,210,211,212,213,214,215,
2494   216,217,218,219,220,221,222,223,
2495   224,225,226,227,228,229,230,231,
2496   232,233,234,235,236,237,238,239,
2497   240,241,242,243,244,245,246,247,
2498   248,249,250,251,252,253,254,255,
2499 
2500 /* This table is a case flipping table. */
2501 
2502     0,  1,  2,  3,  4,  5,  6,  7,
2503     8,  9, 10, 11, 12, 13, 14, 15,
2504    16, 17, 18, 19, 20, 21, 22, 23,
2505    24, 25, 26, 27, 28, 29, 30, 31,
2506    32, 33, 34, 35, 36, 37, 38, 39,
2507    40, 41, 42, 43, 44, 45, 46, 47,
2508    48, 49, 50, 51, 52, 53, 54, 55,
2509    56, 57, 58, 59, 60, 61, 62, 63,
2510    64, 97, 98, 99,100,101,102,103,
2511   104,105,106,107,108,109,110,111,
2512   112,113,114,115,116,117,118,119,
2513   120,121,122, 91, 92, 93, 94, 95,
2514    96, 65, 66, 67, 68, 69, 70, 71,
2515    72, 73, 74, 75, 76, 77, 78, 79,
2516    80, 81, 82, 83, 84, 85, 86, 87,
2517    88, 89, 90,123,124,125,126,127,
2518   128,129,130,131,132,133,134,135,
2519   136,137,138,139,140,141,142,143,
2520   144,145,146,147,148,149,150,151,
2521   152,153,154,155,156,157,158,159,
2522   160,161,162,163,164,165,166,167,
2523   168,169,170,171,172,173,174,175,
2524   176,177,178,179,180,181,182,183,
2525   184,185,186,187,188,189,190,191,
2526   192,193,194,195,196,197,198,199,
2527   200,201,202,203,204,205,206,207,
2528   208,209,210,211,212,213,214,215,
2529   216,217,218,219,220,221,222,223,
2530   224,225,226,227,228,229,230,231,
2531   232,233,234,235,236,237,238,239,
2532   240,241,242,243,244,245,246,247,
2533   248,249,250,251,252,253,254,255,
2534 
2535 /* This table contains bit maps for various character classes. Each map is 32
2536 bytes long and the bits run from the least significant end of each byte. The
2537 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2538 graph, print, punct, and cntrl. Other classes are built from combinations. */
2539 
2540   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2541   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2542   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2543   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2544 
2545   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2546   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2547   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2548   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2549 
2550   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2551   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2552   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2553   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2554 
2555   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2556   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2557   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2558   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2559 
2560   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2561   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2562   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2563   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2564 
2565   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2566   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2567   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2568   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2569 
2570   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2571   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2572   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2573   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2574 
2575   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2576   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2577   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2578   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2579 
2580   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2581   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2582   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2583   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2584 
2585   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2586   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2587   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2588   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2589 
2590 /* This table identifies various classes of character by individual bits:
2591   0x01   white space character
2592   0x02   letter
2593   0x04   decimal digit
2594   0x08   hexadecimal digit
2595   0x10   alphanumeric or '_'
2596   0x80   regular expression metacharacter or binary zero
2597 */
2598 
2599   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
2600   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
2601   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
2602   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
2603   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
2604   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
2605   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
2606   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
2607   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
2608   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
2609   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
2610   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
2611   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
2612   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
2613   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
2614   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
2615   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2616   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2617   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2618   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2619   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2620   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2621   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2622   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2623   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2624   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2625   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2626   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2627   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2628   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2629   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2630   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2631 
2632 /* This is a set of tables that came originally from a Windows user. It seems
2633 to be at least an approximation of ISO 8859. In particular, there are
2634 characters greater than 128 that are marked as spaces, letters, etc. */
2635 
2636 static const uint8_t tables2[] = {
2637 0,1,2,3,4,5,6,7,
2638 8,9,10,11,12,13,14,15,
2639 16,17,18,19,20,21,22,23,
2640 24,25,26,27,28,29,30,31,
2641 32,33,34,35,36,37,38,39,
2642 40,41,42,43,44,45,46,47,
2643 48,49,50,51,52,53,54,55,
2644 56,57,58,59,60,61,62,63,
2645 64,97,98,99,100,101,102,103,
2646 104,105,106,107,108,109,110,111,
2647 112,113,114,115,116,117,118,119,
2648 120,121,122,91,92,93,94,95,
2649 96,97,98,99,100,101,102,103,
2650 104,105,106,107,108,109,110,111,
2651 112,113,114,115,116,117,118,119,
2652 120,121,122,123,124,125,126,127,
2653 128,129,130,131,132,133,134,135,
2654 136,137,138,139,140,141,142,143,
2655 144,145,146,147,148,149,150,151,
2656 152,153,154,155,156,157,158,159,
2657 160,161,162,163,164,165,166,167,
2658 168,169,170,171,172,173,174,175,
2659 176,177,178,179,180,181,182,183,
2660 184,185,186,187,188,189,190,191,
2661 224,225,226,227,228,229,230,231,
2662 232,233,234,235,236,237,238,239,
2663 240,241,242,243,244,245,246,215,
2664 248,249,250,251,252,253,254,223,
2665 224,225,226,227,228,229,230,231,
2666 232,233,234,235,236,237,238,239,
2667 240,241,242,243,244,245,246,247,
2668 248,249,250,251,252,253,254,255,
2669 0,1,2,3,4,5,6,7,
2670 8,9,10,11,12,13,14,15,
2671 16,17,18,19,20,21,22,23,
2672 24,25,26,27,28,29,30,31,
2673 32,33,34,35,36,37,38,39,
2674 40,41,42,43,44,45,46,47,
2675 48,49,50,51,52,53,54,55,
2676 56,57,58,59,60,61,62,63,
2677 64,97,98,99,100,101,102,103,
2678 104,105,106,107,108,109,110,111,
2679 112,113,114,115,116,117,118,119,
2680 120,121,122,91,92,93,94,95,
2681 96,65,66,67,68,69,70,71,
2682 72,73,74,75,76,77,78,79,
2683 80,81,82,83,84,85,86,87,
2684 88,89,90,123,124,125,126,127,
2685 128,129,130,131,132,133,134,135,
2686 136,137,138,139,140,141,142,143,
2687 144,145,146,147,148,149,150,151,
2688 152,153,154,155,156,157,158,159,
2689 160,161,162,163,164,165,166,167,
2690 168,169,170,171,172,173,174,175,
2691 176,177,178,179,180,181,182,183,
2692 184,185,186,187,188,189,190,191,
2693 224,225,226,227,228,229,230,231,
2694 232,233,234,235,236,237,238,239,
2695 240,241,242,243,244,245,246,215,
2696 248,249,250,251,252,253,254,223,
2697 192,193,194,195,196,197,198,199,
2698 200,201,202,203,204,205,206,207,
2699 208,209,210,211,212,213,214,247,
2700 216,217,218,219,220,221,222,255,
2701 0,62,0,0,1,0,0,0,
2702 0,0,0,0,0,0,0,0,
2703 32,0,0,0,1,0,0,0,
2704 0,0,0,0,0,0,0,0,
2705 0,0,0,0,0,0,255,3,
2706 126,0,0,0,126,0,0,0,
2707 0,0,0,0,0,0,0,0,
2708 0,0,0,0,0,0,0,0,
2709 0,0,0,0,0,0,255,3,
2710 0,0,0,0,0,0,0,0,
2711 0,0,0,0,0,0,12,2,
2712 0,0,0,0,0,0,0,0,
2713 0,0,0,0,0,0,0,0,
2714 254,255,255,7,0,0,0,0,
2715 0,0,0,0,0,0,0,0,
2716 255,255,127,127,0,0,0,0,
2717 0,0,0,0,0,0,0,0,
2718 0,0,0,0,254,255,255,7,
2719 0,0,0,0,0,4,32,4,
2720 0,0,0,128,255,255,127,255,
2721 0,0,0,0,0,0,255,3,
2722 254,255,255,135,254,255,255,7,
2723 0,0,0,0,0,4,44,6,
2724 255,255,127,255,255,255,127,255,
2725 0,0,0,0,254,255,255,255,
2726 255,255,255,255,255,255,255,127,
2727 0,0,0,0,254,255,255,255,
2728 255,255,255,255,255,255,255,255,
2729 0,2,0,0,255,255,255,255,
2730 255,255,255,255,255,255,255,127,
2731 0,0,0,0,255,255,255,255,
2732 255,255,255,255,255,255,255,255,
2733 0,0,0,0,254,255,0,252,
2734 1,0,0,248,1,0,0,120,
2735 0,0,0,0,254,255,255,255,
2736 0,0,128,0,0,0,128,0,
2737 255,255,255,255,0,0,0,0,
2738 0,0,0,0,0,0,0,128,
2739 255,255,255,255,0,0,0,0,
2740 0,0,0,0,0,0,0,0,
2741 128,0,0,0,0,0,0,0,
2742 0,1,1,0,1,1,0,0,
2743 0,0,0,0,0,0,0,0,
2744 0,0,0,0,0,0,0,0,
2745 1,0,0,0,128,0,0,0,
2746 128,128,128,128,0,0,128,0,
2747 28,28,28,28,28,28,28,28,
2748 28,28,0,0,0,0,0,128,
2749 0,26,26,26,26,26,26,18,
2750 18,18,18,18,18,18,18,18,
2751 18,18,18,18,18,18,18,18,
2752 18,18,18,128,128,0,128,16,
2753 0,26,26,26,26,26,26,18,
2754 18,18,18,18,18,18,18,18,
2755 18,18,18,18,18,18,18,18,
2756 18,18,18,128,128,0,0,0,
2757 0,0,0,0,0,1,0,0,
2758 0,0,0,0,0,0,0,0,
2759 0,0,0,0,0,0,0,0,
2760 0,0,0,0,0,0,0,0,
2761 1,0,0,0,0,0,0,0,
2762 0,0,18,0,0,0,0,0,
2763 0,0,20,20,0,18,0,0,
2764 0,20,18,0,0,0,0,0,
2765 18,18,18,18,18,18,18,18,
2766 18,18,18,18,18,18,18,18,
2767 18,18,18,18,18,18,18,0,
2768 18,18,18,18,18,18,18,18,
2769 18,18,18,18,18,18,18,18,
2770 18,18,18,18,18,18,18,18,
2771 18,18,18,18,18,18,18,0,
2772 18,18,18,18,18,18,18,18
2773 };
2774 
2775 
2776 
2777 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2778 /*************************************************
2779 *    Emulated memmove() for systems without it   *
2780 *************************************************/
2781 
2782 /* This function can make use of bcopy() if it is available. Otherwise do it by
2783 steam, as there are some non-Unix environments that lack both memmove() and
2784 bcopy(). */
2785 
2786 static void *
emulated_memmove(void * d,const void * s,size_t n)2787 emulated_memmove(void *d, const void *s, size_t n)
2788 {
2789 #ifdef HAVE_BCOPY
2790 bcopy(s, d, n);
2791 return d;
2792 #else
2793 size_t i;
2794 unsigned char *dest = (unsigned char *)d;
2795 const unsigned char *src = (const unsigned char *)s;
2796 if (dest > src)
2797   {
2798   dest += n;
2799   src += n;
2800   for (i = 0; i < n; ++i) *(--dest) = *(--src);
2801   return (void *)dest;
2802   }
2803 else
2804   {
2805   for (i = 0; i < n; ++i) *dest++ = *src++;
2806   return (void *)(dest - n);
2807   }
2808 #endif   /* not HAVE_BCOPY */
2809 }
2810 #undef memmove
2811 #define memmove(d,s,n) emulated_memmove(d,s,n)
2812 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
2813 
2814 
2815 
2816 #ifndef HAVE_STRERROR
2817 /*************************************************
2818 *     Provide strerror() for non-ANSI libraries  *
2819 *************************************************/
2820 
2821 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2822 libraries. They may no longer be around, but just in case, we can try to
2823 provide the same facility by this simple alternative function. */
2824 
2825 extern int   sys_nerr;
2826 extern char *sys_errlist[];
2827 
2828 char *
strerror(int n)2829 strerror(int n)
2830 {
2831 if (n < 0 || n >= sys_nerr) return "unknown error number";
2832 return sys_errlist[n];
2833 }
2834 #endif /* HAVE_STRERROR */
2835 
2836 
2837 
2838 /*************************************************
2839 *            Local memory functions              *
2840 *************************************************/
2841 
2842 /* Alternative memory functions, to test functionality. */
2843 
my_malloc(size_t size,void * data)2844 static void *my_malloc(size_t size, void *data)
2845 {
2846 void *block = malloc(size);
2847 (void)data;
2848 if (show_memory)
2849   {
2850   if (block == NULL)
2851     {
2852     fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size);
2853     }
2854   else
2855     {
2856     fprintf(outfile, "malloc  %5" SIZ_FORM, size);
2857 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2858     fprintf(outfile, " %p", block);   /* Not portable */
2859 #endif
2860     if (malloclistptr < MALLOCLISTSIZE)
2861       {
2862       malloclist[malloclistptr] = block;
2863       malloclistlength[malloclistptr++] = size;
2864       }
2865     else
2866       fprintf(outfile, " (not remembered)");
2867     fprintf(outfile, "\n");
2868     }
2869   }
2870 return block;
2871 }
2872 
my_free(void * block,void * data)2873 static void my_free(void *block, void *data)
2874 {
2875 (void)data;
2876 if (show_memory && block != NULL)
2877   {
2878   uint32_t i, j;
2879   BOOL found = FALSE;
2880 
2881   fprintf(outfile, "free");
2882   for (i = 0; i < malloclistptr; i++)
2883     {
2884     if (block == malloclist[i])
2885       {
2886       fprintf(outfile, "    %5" SIZ_FORM, malloclistlength[i]);
2887       malloclistptr--;
2888       for (j = i; j < malloclistptr; j++)
2889         {
2890         malloclist[j] = malloclist[j+1];
2891         malloclistlength[j] = malloclistlength[j+1];
2892         }
2893       found = TRUE;
2894       break;
2895       }
2896     }
2897   if (!found) fprintf(outfile, " unremembered block");
2898 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2899   fprintf(outfile, " %p", block);  /* Not portable */
2900 #endif
2901   fprintf(outfile, "\n");
2902   }
2903 free(block);
2904 }
2905 
2906 
2907 
2908 /*************************************************
2909 *       Callback function for stack guard        *
2910 *************************************************/
2911 
2912 /* This is set up to be called from pcre2_compile() when the stackguard=n
2913 modifier sets a value greater than zero. The test we do is whether the
2914 parenthesis nesting depth is greater than the value set by the modifier.
2915 
2916 Argument:  the current parenthesis nesting depth
2917 Returns:   non-zero to kill the compilation
2918 */
2919 
2920 static int
stack_guard(uint32_t depth,void * user_data)2921 stack_guard(uint32_t depth, void *user_data)
2922 {
2923 (void)user_data;
2924 return depth > pat_patctl.stackguard_test;
2925 }
2926 
2927 
2928 /*************************************************
2929 *         JIT memory callback                    *
2930 *************************************************/
2931 
2932 static PCRE2_JIT_STACK*
jit_callback(void * arg)2933 jit_callback(void *arg)
2934 {
2935 jit_was_used = TRUE;
2936 return (PCRE2_JIT_STACK *)arg;
2937 }
2938 
2939 
2940 /*************************************************
2941 *      Convert UTF-8 character to code point     *
2942 *************************************************/
2943 
2944 /* This function reads one or more bytes that represent a UTF-8 character,
2945 and returns the codepoint of that character. Note that the function supports
2946 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2947 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2948 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2949 checking, and also for generating 32-bit non-UTF data values above the UTF
2950 limit.
2951 
2952 Argument:
2953   utf8bytes   a pointer to the byte vector
2954   end         a pointer to the end of the byte vector
2955   vptr        a pointer to an int to receive the value
2956 
2957 Returns:      >  0 => the number of bytes consumed
2958               -6 to 0 => malformed UTF-8 character at offset = (-return)
2959 */
2960 
2961 static int
utf82ord(PCRE2_SPTR8 utf8bytes,PCRE2_SPTR8 end,uint32_t * vptr)2962 utf82ord(PCRE2_SPTR8 utf8bytes, PCRE2_SPTR8 end, uint32_t *vptr)
2963 {
2964 uint32_t c = *utf8bytes++;
2965 uint32_t d = c;
2966 int i, j, s;
2967 
2968 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
2969   {
2970   if ((d & 0x80) == 0) break;
2971   d <<= 1;
2972   }
2973 
2974 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
2975 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2976 
2977 /* i now has a value in the range 1-5 */
2978 
2979 s = 6*i;
2980 d = (c & utf8_table3[i]) << s;
2981 
2982 for (j = 0; j < i; j++)
2983   {
2984   if (utf8bytes >= end) return 0;
2985 
2986   c = *utf8bytes++;
2987   if ((c & 0xc0) != 0x80) return -(j+1);
2988   s -= 6;
2989   d |= (c & 0x3f) << s;
2990   }
2991 
2992 /* Check that encoding was the correct unique one */
2993 
2994 for (j = 0; j < utf8_table1_size; j++)
2995   if (d <= (uint32_t)utf8_table1[j]) break;
2996 if (j != i) return -(i+1);
2997 
2998 /* Valid value */
2999 
3000 *vptr = d;
3001 return i+1;
3002 }
3003 
3004 
3005 
3006 /*************************************************
3007 *             Print one character                *
3008 *************************************************/
3009 
3010 /* Print a single character either literally, or as a hex escape, and count how
3011 many printed characters are used.
3012 
3013 Arguments:
3014   c            the character
3015   utf          TRUE in UTF mode
3016   f            the FILE to print to, or NULL just to count characters
3017 
3018 Returns:       number of characters written
3019 */
3020 
3021 static int
pchar(uint32_t c,BOOL utf,FILE * f)3022 pchar(uint32_t c, BOOL utf, FILE *f)
3023 {
3024 int n = 0;
3025 char tempbuffer[16];
3026 
3027 if (PRINTOK(c))
3028   {
3029   if (f != NULL) fprintf(f, "%c", c);
3030   return 1;
3031   }
3032 
3033 if (c < 0x100)
3034   {
3035   if (utf)
3036     {
3037     if (f != NULL) fprintf(f, "\\x{%02x}", c);
3038     return 6;
3039     }
3040   else
3041     {
3042     if (f != NULL) fprintf(f, "\\x%02x", c);
3043     return 4;
3044     }
3045   }
3046 
3047 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
3048   else n = sprintf(tempbuffer, "\\x{%02x}", c);
3049 
3050 return n >= 0 ? n : 0;
3051 }
3052 
3053 
3054 
3055 #ifdef SUPPORT_PCRE2_16
3056 /*************************************************
3057 *    Find length of 0-terminated 16-bit string   *
3058 *************************************************/
3059 
strlen16(PCRE2_SPTR16 p)3060 static size_t strlen16(PCRE2_SPTR16 p)
3061 {
3062 PCRE2_SPTR16 pp = p;
3063 while (*pp != 0) pp++;
3064 return (int)(pp - p);
3065 }
3066 #endif  /* SUPPORT_PCRE2_16 */
3067 
3068 
3069 
3070 #ifdef SUPPORT_PCRE2_32
3071 /*************************************************
3072 *    Find length of 0-terminated 32-bit string   *
3073 *************************************************/
3074 
strlen32(PCRE2_SPTR32 p)3075 static size_t strlen32(PCRE2_SPTR32 p)
3076 {
3077 PCRE2_SPTR32 pp = p;
3078 while (*pp != 0) pp++;
3079 return (int)(pp - p);
3080 }
3081 #endif  /* SUPPORT_PCRE2_32 */
3082 
3083 
3084 #ifdef SUPPORT_PCRE2_8
3085 /*************************************************
3086 *         Print 8-bit character string           *
3087 *************************************************/
3088 
3089 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
3090 For printing *MARK strings, a negative length is given, indicating that the
3091 length is in the first code unit. If handed a NULL file, this function just
3092 counts chars without printing (because pchar() does that). */
3093 
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)3094 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
3095 {
3096 PCRE2_SPTR8 end;
3097 uint32_t c = 0;
3098 int yield = 0;
3099 if (length < 0) length = *p++;
3100 end = p + length;
3101 while (length-- > 0)
3102   {
3103   if (utf)
3104     {
3105     int rc = utf82ord(p, end, &c);
3106     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
3107       {
3108       length -= rc - 1;
3109       p += rc;
3110       yield += pchar(c, utf, f);
3111       continue;
3112       }
3113     }
3114   c = *p++;
3115   yield += pchar(c, utf, f);
3116   }
3117 
3118 return yield;
3119 }
3120 #endif
3121 
3122 
3123 #ifdef SUPPORT_PCRE2_16
3124 /*************************************************
3125 *           Print 16-bit character string        *
3126 *************************************************/
3127 
3128 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3129 For printing *MARK strings, a negative length is given, indicating that the
3130 length is in the first code unit. If handed a NULL file, just counts chars
3131 without printing. */
3132 
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3133 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3134 {
3135 int yield = 0;
3136 if (length < 0) length = *p++;
3137 while (length-- > 0)
3138   {
3139   uint32_t c = *p++ & 0xffff;
3140   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3141     {
3142     int d = *p & 0xffff;
3143     if (d >= 0xDC00 && d <= 0xDFFF)
3144       {
3145       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3146       length--;
3147       p++;
3148       }
3149     }
3150   yield += pchar(c, utf, f);
3151   }
3152 return yield;
3153 }
3154 #endif  /* SUPPORT_PCRE2_16 */
3155 
3156 
3157 
3158 #ifdef SUPPORT_PCRE2_32
3159 /*************************************************
3160 *           Print 32-bit character string        *
3161 *************************************************/
3162 
3163 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3164 For printing *MARK strings, a negative length is given, indicating that the
3165 length is in the first code unit. If handed a NULL file, just counts chars
3166 without printing. */
3167 
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3168 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3169 {
3170 int yield = 0;
3171 (void)(utf);  /* Avoid compiler warning */
3172 if (length < 0) length = *p++;
3173 while (length-- > 0)
3174   {
3175   uint32_t c = *p++;
3176   yield += pchar(c, utf, f);
3177   }
3178 return yield;
3179 }
3180 #endif  /* SUPPORT_PCRE2_32 */
3181 
3182 
3183 
3184 
3185 /*************************************************
3186 *       Convert character value to UTF-8         *
3187 *************************************************/
3188 
3189 /* This function takes an integer value in the range 0 - 0x7fffffff
3190 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3191 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3192 characters.
3193 
3194 Arguments:
3195   cvalue     the character value
3196   utf8bytes  pointer to buffer for result - at least 6 bytes long
3197 
3198 Returns:     number of characters placed in the buffer
3199 */
3200 
3201 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3202 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3203 {
3204 int i, j;
3205 if (cvalue > 0x7fffffffu)
3206   return -1;
3207 for (i = 0; i < utf8_table1_size; i++)
3208   if (cvalue <= (uint32_t)utf8_table1[i]) break;
3209 utf8bytes += i;
3210 for (j = i; j > 0; j--)
3211  {
3212  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3213  cvalue >>= 6;
3214  }
3215 *utf8bytes = utf8_table2[i] | cvalue;
3216 return i + 1;
3217 }
3218 
3219 
3220 
3221 #ifdef SUPPORT_PCRE2_16
3222 /*************************************************
3223 *           Convert string to 16-bit             *
3224 *************************************************/
3225 
3226 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3227 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3228 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3229 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3230 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3231 greater than 0xffff.
3232 
3233 If all the input bytes are ASCII, the space needed for a 16-bit string is
3234 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3235 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3236 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3237 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3238 save repeated re-sizing.
3239 
3240 Note that this function does not object to surrogate values. This is
3241 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3242 for the purpose of testing that they are correctly faulted.
3243 
3244 Arguments:
3245   p          points to a byte string
3246   utf        true in UTF mode
3247   lenptr     points to number of bytes in the string (excluding trailing zero)
3248 
3249 Returns:     0 on success, with the length updated to the number of 16-bit
3250                data items used (excluding the trailing zero)
3251              OR -1 if a UTF-8 string is malformed
3252              OR -2 if a value > 0x10ffff is encountered in UTF mode
3253              OR -3 if a value > 0xffff is encountered when not in UTF mode
3254 */
3255 
3256 static int
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3257 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3258 {
3259 uint16_t *pp;
3260 PCRE2_SIZE len = *lenptr;
3261 
3262 if (pbuffer16_size < 2*len + 2)
3263   {
3264   if (pbuffer16 != NULL) free(pbuffer16);
3265   pbuffer16_size = 2*len + 2;
3266   if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3267   pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3268   if (pbuffer16 == NULL)
3269     {
3270     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3271       pbuffer16_size);
3272     exit(1);
3273     }
3274   }
3275 
3276 pp = pbuffer16;
3277 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3278   {
3279   for (; len > 0; len--) *pp++ = *p++;
3280   }
3281 else while (len > 0)
3282   {
3283   uint32_t c;
3284   const uint8_t *end = p + len;
3285   int chlen = utf82ord(p, end, &c);
3286   if (chlen <= 0) return -1;
3287   if (!utf && c > 0xffff) return -3;
3288   if (c > 0x10ffff) return -2;
3289   p += chlen;
3290   len -= chlen;
3291   if (c < 0x10000) *pp++ = c; else
3292     {
3293     c -= 0x10000;
3294     *pp++ = 0xD800 | (c >> 10);
3295     *pp++ = 0xDC00 | (c & 0x3ff);
3296     }
3297   }
3298 
3299 *pp = 0;
3300 *lenptr = pp - pbuffer16;
3301 return 0;
3302 }
3303 #endif
3304 
3305 
3306 
3307 #ifdef SUPPORT_PCRE2_32
3308 /*************************************************
3309 *           Convert string to 32-bit             *
3310 *************************************************/
3311 
3312 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3313 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3314 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3315 limit of 0x10ffff cause an error.
3316 
3317 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3318 is set, and no limit is imposed. There is special interpretation of the 0xff
3319 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3320 next character to be set. This provides a way of generating 32-bit characters
3321 greater than 0x7fffffff.
3322 
3323 If all the input bytes are ASCII, the space needed for a 32-bit string is
3324 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3325 string is no more than four times, because the number of characters must be
3326 less than the number of bytes. The result is always left in pbuffer32. Impose a
3327 minimum size to save repeated re-sizing.
3328 
3329 Note that this function does not object to surrogate values. This is
3330 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3331 for the purpose of testing that they are correctly faulted.
3332 
3333 Arguments:
3334   p          points to a byte string
3335   utf        true in UTF mode
3336   lenptr     points to number of bytes in the string (excluding trailing zero)
3337 
3338 Returns:     0 on success, with the length updated to the number of 32-bit
3339                data items used (excluding the trailing zero)
3340              OR -1 if a UTF-8 string is malformed
3341              OR -2 if a value > 0x10ffff is encountered in UTF mode
3342 */
3343 
3344 static int
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3345 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3346 {
3347 uint32_t *pp;
3348 PCRE2_SIZE len = *lenptr;
3349 
3350 if (pbuffer32_size < 4*len + 4)
3351   {
3352   if (pbuffer32 != NULL) free(pbuffer32);
3353   pbuffer32_size = 4*len + 4;
3354   if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3355   pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3356   if (pbuffer32 == NULL)
3357     {
3358     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3359       pbuffer32_size);
3360     exit(1);
3361     }
3362   }
3363 
3364 pp = pbuffer32;
3365 
3366 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3367   {
3368   for (; len > 0; len--) *pp++ = *p++;
3369   }
3370 
3371 else while (len > 0)
3372   {
3373   int chlen;
3374   uint32_t c;
3375   uint32_t topbit = 0;
3376   const uint8_t *end = p + len;
3377   if (!utf && *p == 0xff && len > 1)
3378     {
3379     topbit = 0x80000000u;
3380     p++;
3381     len--;
3382     }
3383   chlen = utf82ord(p, end, &c);
3384   if (chlen <= 0) return -1;
3385   if (utf && c > 0x10ffff) return -2;
3386   p += chlen;
3387   len -= chlen;
3388   *pp++ = c | topbit;
3389   }
3390 
3391 *pp = 0;
3392 *lenptr = pp - pbuffer32;
3393 return 0;
3394 }
3395 #endif /* SUPPORT_PCRE2_32 */
3396 
3397 
3398 
3399 /* This function is no longer used. Keep it around for a while, just in case it
3400 needs to be re-instated. */
3401 
3402 #ifdef NEVERNEVERNEVER
3403 
3404 /*************************************************
3405 *         Move back by so many characters        *
3406 *************************************************/
3407 
3408 /* Given a code unit offset in a subject string, move backwards by a number of
3409 characters, and return the resulting offset.
3410 
3411 Arguments:
3412   subject   pointer to the string
3413   offset    start offset
3414   count     count to move back by
3415   utf       TRUE if in UTF mode
3416 
3417 Returns:   a possibly changed offset
3418 */
3419 
3420 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3421 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3422 {
3423 if (!utf || test_mode == PCRE32_MODE)
3424   return (count >= offset)? 0 : (offset - count);
3425 
3426 else if (test_mode == PCRE8_MODE)
3427   {
3428   PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3429   for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3430     {
3431     pp--;
3432     while ((*pp & 0xc0) == 0x80) pp--;
3433     }
3434   return pp - (PCRE2_SPTR8)subject;
3435   }
3436 
3437 else  /* 16-bit mode */
3438   {
3439   PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3440   for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3441     {
3442     pp--;
3443     if ((*pp & 0xfc00) == 0xdc00) pp--;
3444     }
3445   return pp - (PCRE2_SPTR16)subject;
3446   }
3447 }
3448 #endif  /* NEVERNEVERNEVER */
3449 
3450 
3451 
3452 /*************************************************
3453 *           Expand input buffers                 *
3454 *************************************************/
3455 
3456 /* This function doubles the size of the input buffer and the buffer for
3457 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3458 the new ones.
3459 
3460 Arguments: none
3461 Returns:   nothing (aborts if malloc() fails)
3462 */
3463 
3464 static void
expand_input_buffers(void)3465 expand_input_buffers(void)
3466 {
3467 int new_pbuffer8_size = 2*pbuffer8_size;
3468 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3469 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3470 
3471 if (new_buffer == NULL || new_pbuffer8 == NULL)
3472   {
3473   fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3474   exit(1);
3475   }
3476 
3477 memcpy(new_buffer, buffer, pbuffer8_size);
3478 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3479 
3480 pbuffer8_size = new_pbuffer8_size;
3481 
3482 free(buffer);
3483 free(pbuffer8);
3484 
3485 buffer = new_buffer;
3486 pbuffer8 = new_pbuffer8;
3487 }
3488 
3489 
3490 
3491 /*************************************************
3492 *        Read or extend an input line            *
3493 *************************************************/
3494 
3495 /* Input lines are read into buffer, but both patterns and data lines can be
3496 continued over multiple input lines. In addition, if the buffer fills up, we
3497 want to automatically expand it so as to be able to handle extremely large
3498 lines that are needed for certain stress tests, although this is less likely
3499 now that there are repetition features for both patterns and data. When the
3500 input buffer is expanded, the other two buffers must also be expanded likewise,
3501 and the contents of pbuffer, which are a copy of the input for callouts, must
3502 be preserved (for when expansion happens for a data line). This is not the most
3503 optimal way of handling this, but hey, this is just a test program!
3504 
3505 Arguments:
3506   f            the file to read
3507   start        where in buffer to start (this *must* be within buffer)
3508   prompt       for stdin or readline()
3509 
3510 Returns:       pointer to the start of new data
3511                could be a copy of start, or could be moved
3512                NULL if no data read and EOF reached
3513 */
3514 
3515 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3516 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3517 {
3518 uint8_t *here = start;
3519 
3520 for (;;)
3521   {
3522   size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3523 
3524   if (rlen > 1000)
3525     {
3526     size_t dlen;
3527 
3528     /* If libreadline or libedit support is required, use readline() to read a
3529     line if the input is a terminal. Note that readline() removes the trailing
3530     newline, so we must put it back again, to be compatible with fgets(). */
3531 
3532 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3533     if (INTERACTIVE(f))
3534       {
3535       size_t len;
3536       char *s = readline(prompt);
3537       if (s == NULL) return (here == start)? NULL : start;
3538       len = strlen(s);
3539       if (len > 0) add_history(s);
3540       if (len > rlen - 1) len = rlen - 1;
3541       memcpy(here, s, len);
3542       here[len] = '\n';
3543       here[len+1] = 0;
3544       free(s);
3545       }
3546     else
3547 #endif
3548 
3549     /* Read the next line by normal means, prompting if the file is a tty. */
3550 
3551       {
3552       if (INTERACTIVE(f)) printf("%s", prompt);
3553       if (fgets((char *)here, rlen,  f) == NULL)
3554         return (here == start)? NULL : start;
3555       }
3556 
3557     dlen = strlen((char *)here);
3558     here += dlen;
3559 
3560     /* Check for end of line reached. Take care not to read data from before
3561     start (dlen will be zero for a file starting with a binary zero). */
3562 
3563     if (here > start && here[-1] == '\n') return start;
3564 
3565     /* If we have not read a newline when reading a file, we have either filled
3566     the buffer or reached the end of the file. We can detect the former by
3567     checking that the string fills the buffer, and the latter by feof(). If
3568     neither of these is true, it means we read a binary zero which has caused
3569     strlen() to give a short length. This is a hard error because pcre2test
3570     expects to work with C strings. */
3571 
3572     if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3573       {
3574       fprintf(outfile, "** Binary zero encountered in input\n");
3575       fprintf(outfile, "** pcre2test run abandoned\n");
3576       exit(1);
3577       }
3578     }
3579 
3580   else
3581     {
3582     size_t start_offset = start - buffer;
3583     size_t here_offset = here - buffer;
3584     expand_input_buffers();
3585     start = buffer + start_offset;
3586     here = buffer + here_offset;
3587     }
3588   }
3589 
3590 /* Control never gets here */
3591 }
3592 
3593 
3594 
3595 /*************************************************
3596 *         Case-independent strncmp() function    *
3597 *************************************************/
3598 
3599 /*
3600 Arguments:
3601   s         first string
3602   t         second string
3603   n         number of characters to compare
3604 
3605 Returns:    < 0, = 0, or > 0, according to the comparison
3606 */
3607 
3608 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3609 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3610 {
3611 while (n--)
3612   {
3613   int c = tolower(*s++) - tolower(*t++);
3614   if (c != 0) return c;
3615   }
3616 return 0;
3617 }
3618 
3619 
3620 
3621 /*************************************************
3622 *          Scan the main modifier list           *
3623 *************************************************/
3624 
3625 /* This function searches the modifier list for a long modifier name.
3626 
3627 Argument:
3628   p         start of the name
3629   lenp      length of the name
3630 
3631 Returns:    an index in the modifier list, or -1 on failure
3632 */
3633 
3634 static int
scan_modifiers(const uint8_t * p,unsigned int len)3635 scan_modifiers(const uint8_t *p, unsigned int len)
3636 {
3637 int bot = 0;
3638 int top = MODLISTCOUNT;
3639 
3640 while (top > bot)
3641   {
3642   int mid = (bot + top)/2;
3643   unsigned int mlen = strlen(modlist[mid].name);
3644   int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3645   if (c == 0)
3646     {
3647     if (len == mlen) return mid;
3648     c = (int)len - (int)mlen;
3649     }
3650   if (c > 0) bot = mid + 1; else top = mid;
3651   }
3652 
3653 return -1;
3654 
3655 }
3656 
3657 
3658 
3659 /*************************************************
3660 *        Check a modifer and find its field      *
3661 *************************************************/
3662 
3663 /* This function is called when a modifier has been identified. We check that
3664 it is allowed here and find the field that is to be changed.
3665 
3666 Arguments:
3667   m          the modifier list entry
3668   ctx        CTX_PAT     => pattern context
3669              CTX_POPPAT  => pattern context for popped pattern
3670              CTX_DEFPAT  => default pattern context
3671              CTX_DAT     => data context
3672              CTX_DEFDAT  => default data context
3673   pctl       point to pattern control block
3674   dctl       point to data control block
3675   c          a single character or 0
3676 
3677 Returns:     a field pointer or NULL
3678 */
3679 
3680 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3681 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3682 {
3683 void *field = NULL;
3684 PCRE2_SIZE offset = m->offset;
3685 
3686 if (restrict_for_perl_test) switch(m->which)
3687   {
3688   case MOD_PNDP:
3689   case MOD_PATP:
3690   case MOD_DATP:
3691   case MOD_PDP:
3692   break;
3693 
3694   default:
3695   fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3696     m->name);
3697   return NULL;
3698   }
3699 
3700 switch (m->which)
3701   {
3702   case MOD_CTC:  /* Compile context modifier */
3703   if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3704     else if (ctx == CTX_PAT) field = PTR(pat_context);
3705   break;
3706 
3707   case MOD_CTM:  /* Match context modifier */
3708   if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3709     else if (ctx == CTX_DAT) field = PTR(dat_context);
3710   break;
3711 
3712   case MOD_DAT:    /* Data line modifier */
3713   case MOD_DATP:   /* Allowed for Perl test */
3714   if (dctl != NULL) field = dctl;
3715   break;
3716 
3717   case MOD_PAT:    /* Pattern modifier */
3718   case MOD_PATP:   /* Allowed for Perl test */
3719   if (pctl != NULL) field = pctl;
3720   break;
3721 
3722   case MOD_PD:   /* Pattern or data line modifier */
3723   case MOD_PDP:  /* Ditto, allowed for Perl test */
3724   case MOD_PND:  /* Ditto, but not default pattern */
3725   case MOD_PNDP: /* Ditto, allowed for Perl test */
3726   if (dctl != NULL) field = dctl;
3727     else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3728              ctx != CTX_DEFPAT))
3729       field = pctl;
3730   break;
3731   }
3732 
3733 if (field == NULL)
3734   {
3735   if (c == 0)
3736     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3737   else
3738     fprintf(outfile, "** /%c is not valid here\n", c);
3739   return NULL;
3740   }
3741 
3742 return (char *)field + offset;
3743 }
3744 
3745 
3746 
3747 /*************************************************
3748 *            Decode a modifier list              *
3749 *************************************************/
3750 
3751 /* A pointer to a control block is NULL when called in cases when that block is
3752 not relevant. They are never all relevant in one call. At least one of patctl
3753 and datctl is NULL. The second argument specifies which context to use for
3754 modifiers that apply to contexts.
3755 
3756 Arguments:
3757   p          point to modifier string
3758   ctx        CTX_PAT     => pattern context
3759              CTX_POPPAT  => pattern context for popped pattern
3760              CTX_DEFPAT  => default pattern context
3761              CTX_DAT     => data context
3762              CTX_DEFDAT  => default data context
3763   pctl       point to pattern control block
3764   dctl       point to data control block
3765 
3766 Returns: TRUE if successful decode, FALSE otherwise
3767 */
3768 
3769 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3770 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3771 {
3772 uint8_t *ep, *pp;
3773 long li;
3774 unsigned long uli;
3775 BOOL first = TRUE;
3776 
3777 for (;;)
3778   {
3779   void *field;
3780   modstruct *m;
3781   BOOL off = FALSE;
3782   unsigned int i, len;
3783   int index;
3784   char *endptr;
3785 
3786   /* Skip white space and commas. */
3787 
3788   while (isspace(*p) || *p == ',') p++;
3789   if (*p == 0) break;
3790 
3791   /* Find the end of the item; lose trailing whitespace at end of line. */
3792 
3793   for (ep = p; *ep != 0 && *ep != ','; ep++);
3794   if (*ep == 0)
3795     {
3796     while (ep > p && isspace(ep[-1])) ep--;
3797     *ep = 0;
3798     }
3799 
3800   /* Remember if the first character is '-'. */
3801 
3802   if (*p == '-')
3803     {
3804     off = TRUE;
3805     p++;
3806     }
3807 
3808   /* Find the length of a full-length modifier name, and scan for it. */
3809 
3810   pp = p;
3811   while (pp < ep && *pp != '=') pp++;
3812   index = scan_modifiers(p, pp - p);
3813 
3814   /* If the first modifier is unrecognized, try to interpret it as a sequence
3815   of single-character abbreviated modifiers. None of these modifiers have any
3816   associated data. They just set options or control bits. */
3817 
3818   if (index < 0)
3819     {
3820     uint32_t cc;
3821     uint8_t *mp = p;
3822 
3823     if (!first)
3824       {
3825       fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3826       if (ep - p == 1)
3827         fprintf(outfile, "** Single-character modifiers must come first\n");
3828       return FALSE;
3829       }
3830 
3831     for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3832       {
3833       for (i = 0; i < C1MODLISTCOUNT; i++)
3834         if (cc == c1modlist[i].onechar) break;
3835 
3836       if (i >= C1MODLISTCOUNT)
3837         {
3838         fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3839           *p, (int)(ep-mp), mp);
3840         return FALSE;
3841         }
3842 
3843       if (c1modlist[i].index >= 0)
3844         {
3845         index = c1modlist[i].index;
3846         }
3847 
3848       else
3849         {
3850         index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3851           strlen(c1modlist[i].fullname));
3852         if (index < 0)
3853           {
3854           fprintf(outfile, "** Internal error: single-character equivalent "
3855             "modifier '%s' not found\n", c1modlist[i].fullname);
3856           return FALSE;
3857           }
3858         c1modlist[i].index = index;     /* Cache for next time */
3859         }
3860 
3861       field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3862       if (field == NULL) return FALSE;
3863 
3864       /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3865       PCRE2_EXTENDED_MORE. */
3866 
3867       if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3868         {
3869         *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3870         *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3871         }
3872       else
3873         *((uint32_t *)field) |= modlist[index].value;
3874       }
3875 
3876     continue;    /* With tne next (fullname) modifier */
3877     }
3878 
3879   /* We have a match on a full-name modifier. Check for the existence of data
3880   when needed. */
3881 
3882   m = modlist + index;      /* Save typing */
3883   if (m->type != MOD_CTL && m->type != MOD_OPT &&
3884       (m->type != MOD_IND || *pp == '='))
3885     {
3886     if (*pp++ != '=')
3887       {
3888       fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3889       return FALSE;
3890       }
3891     if (off)
3892       {
3893       fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3894       return FALSE;
3895       }
3896     }
3897 
3898   /* These on/off types have no data. */
3899 
3900   else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3901     {
3902     fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3903     return FALSE;
3904     }
3905 
3906   /* Set the data length for those types that have data. Then find the field
3907   that is to be set. If check_modifier() returns NULL, it has already output an
3908   error message. */
3909 
3910   len = ep - pp;
3911   field = check_modifier(m, ctx, pctl, dctl, 0);
3912   if (field == NULL) return FALSE;
3913 
3914   /* Process according to data type. */
3915 
3916   switch (m->type)
3917     {
3918     case MOD_CTL:
3919     case MOD_OPT:
3920     if (off) *((uint32_t *)field) &= ~m->value;
3921       else *((uint32_t *)field) |= m->value;
3922     break;
3923 
3924     case MOD_BSR:
3925     if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3926       {
3927 #ifdef BSR_ANYCRLF
3928       *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3929 #else
3930       *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3931 #endif
3932       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3933         else dctl->control2 &= ~CTL2_BSR_SET;
3934       }
3935     else
3936       {
3937       if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3938         *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3939       else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3940         *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3941       else goto INVALID_VALUE;
3942       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3943         else dctl->control2 |= CTL2_BSR_SET;
3944       }
3945     pp = ep;
3946     break;
3947 
3948     case MOD_CHR:  /* A single character */
3949     *((uint32_t *)field) = *pp++;
3950     break;
3951 
3952     case MOD_CON:  /* A convert type/options list */
3953     for (;; pp++)
3954       {
3955       uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3956       len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3957       for (i = 0; i < convertlistcount; i++)
3958         {
3959         if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3960           {
3961           if (*((uint32_t *)field) == CONVERT_UNSET)
3962             *((uint32_t *)field) = convertlist[i].option;
3963           else
3964             *((uint32_t *)field) |= convertlist[i].option;
3965           break;
3966           }
3967         }
3968       if (i >= convertlistcount) goto INVALID_VALUE;
3969       pp += len;
3970       if (*pp != ':') break;
3971       }
3972     break;
3973 
3974     case MOD_IN2:    /* One or two unsigned integers */
3975     if (!isdigit(*pp)) goto INVALID_VALUE;
3976     uli = strtoul((const char *)pp, &endptr, 10);
3977     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3978     ((uint32_t *)field)[0] = (uint32_t)uli;
3979     if (*endptr == ':')
3980       {
3981       uli = strtoul((const char *)endptr+1, &endptr, 10);
3982       if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3983       ((uint32_t *)field)[1] = (uint32_t)uli;
3984       }
3985     else ((uint32_t *)field)[1] = 0;
3986     pp = (uint8_t *)endptr;
3987     break;
3988 
3989     /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3990     less than ULONG_MAX. So first test for overflowing the long int, and then
3991     test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3992 
3993     case MOD_SIZ:    /* PCRE2_SIZE value */
3994     if (!isdigit(*pp)) goto INVALID_VALUE;
3995     uli = strtoul((const char *)pp, &endptr, 10);
3996     if (uli == ULONG_MAX) goto INVALID_VALUE;
3997 #if ULONG_MAX > PCRE2_SIZE_MAX
3998     if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3999 #endif
4000     *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
4001     pp = (uint8_t *)endptr;
4002     break;
4003 
4004     case MOD_IND:    /* Unsigned integer with default */
4005     if (len == 0)
4006       {
4007       *((uint32_t *)field) = (uint32_t)(m->value);
4008       break;
4009       }
4010     /* Fall through */
4011 
4012     case MOD_INT:    /* Unsigned integer */
4013     if (!isdigit(*pp)) goto INVALID_VALUE;
4014     uli = strtoul((const char *)pp, &endptr, 10);
4015     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
4016     *((uint32_t *)field) = (uint32_t)uli;
4017     pp = (uint8_t *)endptr;
4018     break;
4019 
4020     case MOD_INS:   /* Signed integer */
4021     if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
4022     li = strtol((const char *)pp, &endptr, 10);
4023     if (S32OVERFLOW(li)) goto INVALID_VALUE;
4024     *((int32_t *)field) = (int32_t)li;
4025     pp = (uint8_t *)endptr;
4026     break;
4027 
4028     case MOD_NL:
4029     for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
4030       if (len == strlen(newlines[i]) &&
4031         strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
4032     if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
4033     if (i == 0)
4034       {
4035       *((uint16_t *)field) = NEWLINE_DEFAULT;
4036       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
4037         else dctl->control2 &= ~CTL2_NL_SET;
4038       }
4039     else
4040       {
4041       *((uint16_t *)field) = i;
4042       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
4043         else dctl->control2 |= CTL2_NL_SET;
4044       }
4045     pp = ep;
4046     break;
4047 
4048     case MOD_NN:              /* Name or (signed) number; may be several */
4049     if (isdigit(*pp) || *pp == '-')
4050       {
4051       int ct = MAXCPYGET - 1;
4052       int32_t value;
4053       li = strtol((const char *)pp, &endptr, 10);
4054       if (S32OVERFLOW(li)) goto INVALID_VALUE;
4055       value = (int32_t)li;
4056       field = (char *)field - m->offset + m->value;      /* Adjust field ptr */
4057       if (value >= 0)                                    /* Add new number */
4058         {
4059         while (*((int32_t *)field) >= 0 && ct-- > 0)   /* Skip previous */
4060           field = (char *)field + sizeof(int32_t);
4061         if (ct <= 0)
4062           {
4063           fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
4064           return FALSE;
4065           }
4066         }
4067       *((int32_t *)field) = value;
4068       if (ct > 0) ((int32_t *)field)[1] = -1;
4069       pp = (uint8_t *)endptr;
4070       }
4071 
4072     /* Multiple strings are put end to end. */
4073 
4074     else
4075       {
4076       char *nn = (char *)field;
4077       if (len > 0)                    /* Add new name */
4078         {
4079         if (len > MAX_NAME_SIZE)
4080           {
4081           fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
4082           return FALSE;
4083           }
4084         while (*nn != 0) nn += strlen(nn) + 1;
4085         if (nn + len + 2 - (char *)field > LENCPYGET)
4086           {
4087           fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
4088             m->name);
4089           return FALSE;
4090           }
4091         memcpy(nn, pp, len);
4092         }
4093       nn[len] = 0 ;
4094       nn[len+1] = 0;
4095       pp = ep;
4096       }
4097     break;
4098 
4099     case MOD_STR:
4100     if (len + 1 > m->value)
4101       {
4102       fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
4103         m->name, m->value - 1);
4104       return FALSE;
4105       }
4106     memcpy(field, pp, len);
4107     ((uint8_t *)field)[len] = 0;
4108     pp = ep;
4109     break;
4110     }
4111 
4112   if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4113     {
4114     fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4115     return FALSE;
4116     }
4117 
4118   p = pp;
4119   first = FALSE;
4120 
4121   if (ctx == CTX_POPPAT &&
4122      (pctl->options != 0 ||
4123       pctl->tables_id != 0 ||
4124       pctl->locale[0] != 0 ||
4125       (pctl->control & NOTPOP_CONTROLS) != 0))
4126     {
4127     fprintf(outfile, "** '%s' is not valid here\n", m->name);
4128     return FALSE;
4129     }
4130   }
4131 
4132 return TRUE;
4133 
4134 INVALID_VALUE:
4135 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4136 return FALSE;
4137 }
4138 
4139 
4140 /*************************************************
4141 *             Get info from a pattern            *
4142 *************************************************/
4143 
4144 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4145 pattern.
4146 
4147 Arguments:
4148   what        code for the required information
4149   where       where to put the answer
4150   unsetok     PCRE2_ERROR_UNSET is an "expected" result
4151 
4152 Returns:      the return from pcre2_pattern_info()
4153 */
4154 
4155 static int
pattern_info(int what,void * where,BOOL unsetok)4156 pattern_info(int what, void *where, BOOL unsetok)
4157 {
4158 int rc;
4159 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL);  /* Exercise the code */
4160 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4161 if (rc >= 0) return 0;
4162 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4163   {
4164   fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4165     what);
4166   if (rc == PCRE2_ERROR_BADMODE)
4167     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4168       "%d-bit mode\n", test_mode,
4169       8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4170   }
4171 return rc;
4172 }
4173 
4174 
4175 
4176 #ifdef SUPPORT_PCRE2_8
4177 /*************************************************
4178 *             Show something in a list           *
4179 *************************************************/
4180 
4181 /* This function just helps to keep the code that uses it tidier. It's used for
4182 various lists of things where there needs to be introductory text before the
4183 first item. As these calls are all in the POSIX-support code, they happen only
4184 when 8-bit mode is supported. */
4185 
4186 static void
prmsg(const char ** msg,const char * s)4187 prmsg(const char **msg, const char *s)
4188 {
4189 fprintf(outfile, "%s %s", *msg, s);
4190 *msg = "";
4191 }
4192 #endif  /* SUPPORT_PCRE2_8 */
4193 
4194 
4195 
4196 /*************************************************
4197 *                Show control bits               *
4198 *************************************************/
4199 
4200 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4201 Because the bits are unique, this can be used for both pattern and data control
4202 words.
4203 
4204 Arguments:
4205   controls    control bits
4206   controls2   more control bits
4207   before      text to print before
4208 
4209 Returns:      nothing
4210 */
4211 
4212 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4213 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4214 {
4215 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4216   before,
4217   ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4218   ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4219   ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4220   ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4221   ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4222   ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4223   ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4224   ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4225   ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4226   ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4227   ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4228   ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4229   ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4230   ((controls & CTL_DFA) != 0)? " dfa" : "",
4231   ((controls & CTL_EXPAND) != 0)? " expand" : "",
4232   ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4233   ((controls & CTL_FINDLIMITS_NOHEAP) != 0)? " find_limits_noheap" : "",
4234   ((controls2 & CTL2_FRAMESIZE) != 0)? " framesize" : "",
4235   ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4236   ((controls & CTL_GETALL) != 0)? " getall" : "",
4237   ((controls & CTL_GLOBAL) != 0)? " global" : "",
4238   ((controls2 & CTL2_HEAPFRAMES_SIZE) != 0)? " heapframes_size" : "",
4239   ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4240   ((controls & CTL_INFO) != 0)? " info" : "",
4241   ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4242   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4243   ((controls & CTL_MARK) != 0)? " mark" : "",
4244   ((controls & CTL_MEMORY) != 0)? " memory" : "",
4245   ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4246   ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4247   ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "",
4248   ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "",
4249   ((controls & CTL_POSIX) != 0)? " posix" : "",
4250   ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4251   ((controls & CTL_PUSH) != 0)? " push" : "",
4252   ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4253   ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4254   ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4255   ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4256   ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4257   ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4258   ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4259   ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4260   ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4261   ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4262   ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4263   ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4264   ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4265   ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4266 }
4267 
4268 
4269 
4270 /*************************************************
4271 *                Show compile options            *
4272 *************************************************/
4273 
4274 /* Called from show_pattern_info() and for unsupported POSIX options.
4275 
4276 Arguments:
4277   options     an options word
4278   before      text to print before
4279   after       text to print after
4280 
4281 Returns:      nothing
4282 */
4283 
4284 static void
show_compile_options(uint32_t options,const char * before,const char * after)4285 show_compile_options(uint32_t options, const char *before, const char *after)
4286 {
4287 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4288 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4289   before,
4290   ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4291   ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4292   ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4293   ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4294   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4295   ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4296   ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4297   ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4298   ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4299   ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4300   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4301   ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4302   ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4303   ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4304   ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4305   ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4306   ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4307   ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4308   ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4309   ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4310   ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4311   ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4312   ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4313   ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4314   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4315   ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4316   ((options & PCRE2_UCP) != 0)? " ucp" : "",
4317   ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4318   ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4319   ((options & PCRE2_UTF) != 0)? " utf" : "",
4320   after);
4321 }
4322 
4323 
4324 /*************************************************
4325 *           Show compile extra options           *
4326 *************************************************/
4327 
4328 /* Called from show_pattern_info() and for unsupported POSIX options.
4329 
4330 Arguments:
4331   options     an options word
4332   before      text to print before
4333   after       text to print after
4334 
4335 Returns:      nothing
4336 */
4337 
4338 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4339 show_compile_extra_options(uint32_t options, const char *before,
4340   const char *after)
4341 {
4342 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4343 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4344   before,
4345   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4346   ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "",
4347   ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "",
4348   ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "",
4349   ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "",
4350   ((options & PCRE2_EXTRA_ASCII_DIGIT) != 0)? " ascii_digit" : "",
4351   ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "",
4352   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4353   ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
4354   ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4355   ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4356   ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4357   after);
4358 }
4359 
4360 
4361 
4362 #ifdef SUPPORT_PCRE2_8
4363 /*************************************************
4364 *                Show match options              *
4365 *************************************************/
4366 
4367 /* Called for unsupported POSIX options. */
4368 
4369 static void
show_match_options(uint32_t options)4370 show_match_options(uint32_t options)
4371 {
4372 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4373   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4374   ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4375   ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4376   ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4377   ((options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
4378   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4379   ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4380   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4381   ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4382   ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4383   ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4384   ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4385   ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4386   ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4387 }
4388 #endif  /* SUPPORT_PCRE2_8 */
4389 
4390 
4391 
4392 /*************************************************
4393 *      Show memory usage info for a pattern      *
4394 *************************************************/
4395 
4396 static void
show_memory_info(void)4397 show_memory_info(void)
4398 {
4399 uint32_t name_count, name_entry_size;
4400 PCRE2_SIZE size, cblock_size;
4401 
4402 /* One of the test_mode values will always be true, but to stop a compiler
4403 warning we must initialize cblock_size. */
4404 
4405 cblock_size = 0;
4406 #ifdef SUPPORT_PCRE2_8
4407 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4408 #endif
4409 #ifdef SUPPORT_PCRE2_16
4410 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4411 #endif
4412 #ifdef SUPPORT_PCRE2_32
4413 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4414 #endif
4415 
4416 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4417 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4418 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4419 
4420 /* The uint32_t variables are cast before multiplying to stop code analyzers
4421 grumbling about potential overflow. */
4422 
4423 fprintf(outfile, "Memory allocation - compiled block : %" SIZ_FORM "\n", size);
4424 fprintf(outfile, "Memory allocation - code portion   : %" SIZ_FORM "\n", size -
4425   (PCRE2_SIZE)name_count * (PCRE2_SIZE)name_entry_size * (PCRE2_SIZE)code_unit_size -
4426   cblock_size);
4427 
4428 if (pat_patctl.jit != 0)
4429   {
4430   (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4431   fprintf(outfile, "Memory allocation - JIT code       : %" SIZ_FORM "\n", size);
4432   }
4433 }
4434 
4435 
4436 
4437 /*************************************************
4438 *       Show frame size info for a pattern       *
4439 *************************************************/
4440 
4441 static void
show_framesize(void)4442 show_framesize(void)
4443 {
4444 PCRE2_SIZE frame_size;
4445 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4446 fprintf(outfile, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size);
4447 }
4448 
4449 
4450 
4451 /*************************************************
4452 *   Show heapframes size info for a match_data   *
4453 *************************************************/
4454 
4455 static void
show_heapframes_size(void)4456 show_heapframes_size(void)
4457 {
4458 PCRE2_SIZE heapframes_size;
4459 PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(heapframes_size, match_data);
4460 fprintf(outfile, "Heapframes size in match_data: %" SIZ_FORM "\n",
4461   heapframes_size);
4462 }
4463 
4464 
4465 
4466 /*************************************************
4467 *         Get and output an error message        *
4468 *************************************************/
4469 
4470 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4471 print_error_message(int errorcode, const char *before, const char *after)
4472 {
4473 int len;
4474 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4475 if (len < 0)
4476   {
4477   fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4478     "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4479   }
4480 else
4481   {
4482   fprintf(outfile, "%s", before);
4483   PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4484   fprintf(outfile, "%s", after);
4485   }
4486 return len >= 0;
4487 }
4488 
4489 
4490 /*************************************************
4491 *     Callback function for callout enumeration  *
4492 *************************************************/
4493 
4494 /* The only differences in the callout emumeration block for different code
4495 unit widths are that the pointers to the subject, the most recent MARK, and a
4496 callout argument string point to strings of the appropriate width. Casts can be
4497 used to deal with this.
4498 
4499 Argument:
4500   cb            pointer to enumerate block
4501   callout_data  user data
4502 
4503 Returns:    0
4504 */
4505 
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4506 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4507   void *callout_data)
4508 {
4509 uint32_t i;
4510 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4511 
4512 (void)callout_data;  /* Not currently displayed */
4513 
4514 fprintf(outfile, "Callout ");
4515 if (cb->callout_string != NULL)
4516   {
4517   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4518   fprintf(outfile, "%c", delimiter);
4519   PCHARSV(cb->callout_string, 0,
4520     cb->callout_string_length, utf, outfile);
4521   for (i = 0; callout_start_delims[i] != 0; i++)
4522     if (delimiter == callout_start_delims[i])
4523       {
4524       delimiter = callout_end_delims[i];
4525       break;
4526       }
4527   fprintf(outfile, "%c  ", delimiter);
4528   }
4529 else fprintf(outfile, "%d  ", cb->callout_number);
4530 
4531 fprintf(outfile, "%.*s\n",
4532   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4533   pbuffer8 + cb->pattern_position);
4534 
4535 return 0;
4536 }
4537 
4538 
4539 
4540 /*************************************************
4541 *        Show information about a pattern        *
4542 *************************************************/
4543 
4544 /* This function is called after a pattern has been compiled if any of the
4545 information-requesting controls have been set.
4546 
4547 Arguments:  none
4548 
4549 Returns:    PR_OK     continue processing next line
4550             PR_SKIP   skip to a blank line
4551             PR_ABEND  abort the pcre2test run
4552 */
4553 
4554 static int
show_pattern_info(void)4555 show_pattern_info(void)
4556 {
4557 uint32_t compile_options, overall_options, extra_options;
4558 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4559 
4560 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4561   {
4562   fprintf(outfile, "------------------------------------------------------------------\n");
4563   PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4564   }
4565 
4566 if ((pat_patctl.control & CTL_INFO) != 0)
4567   {
4568   int rc;
4569   void *nametable;
4570   uint8_t *start_bits;
4571   BOOL heap_limit_set, match_limit_set, depth_limit_set;
4572   uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4573     hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4574     depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4575     newline_convention;
4576 
4577   /* Exercise the error route. */
4578 
4579   PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4580   (void)rc;
4581 
4582   /* These info requests may return PCRE2_ERROR_UNSET. */
4583 
4584   switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4585     {
4586     case 0:
4587     heap_limit_set = TRUE;
4588     break;
4589 
4590     case PCRE2_ERROR_UNSET:
4591     heap_limit_set = FALSE;
4592     break;
4593 
4594     default:
4595     return PR_ABEND;
4596     }
4597 
4598   switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4599     {
4600     case 0:
4601     match_limit_set = TRUE;
4602     break;
4603 
4604     case PCRE2_ERROR_UNSET:
4605     match_limit_set = FALSE;
4606     break;
4607 
4608     default:
4609     return PR_ABEND;
4610     }
4611 
4612   switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4613     {
4614     case 0:
4615     depth_limit_set = TRUE;
4616     break;
4617 
4618     case PCRE2_ERROR_UNSET:
4619     depth_limit_set = FALSE;
4620     break;
4621 
4622     default:
4623     return PR_ABEND;
4624     }
4625 
4626   /* These info requests should always succeed. */
4627 
4628   if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4629       pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4630       pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4631       pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4632       pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4633       pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4634       pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4635       pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4636       pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4637       pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4638       pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4639       pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4640       pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4641       pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4642       pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4643       pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4644       pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4645       != 0)
4646     return PR_ABEND;
4647 
4648   fprintf(outfile, "Capture group count = %d\n", capture_count);
4649 
4650   if (backrefmax > 0)
4651     fprintf(outfile, "Max back reference = %d\n", backrefmax);
4652 
4653   if (maxlookbehind > 0)
4654     fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4655 
4656   if (heap_limit_set)
4657     fprintf(outfile, "Heap limit = %u\n", heap_limit);
4658 
4659   if (match_limit_set)
4660     fprintf(outfile, "Match limit = %u\n", match_limit);
4661 
4662   if (depth_limit_set)
4663     fprintf(outfile, "Depth limit = %u\n", depth_limit);
4664 
4665   if (namecount > 0)
4666     {
4667     fprintf(outfile, "Named capture groups:\n");
4668     for (; namecount > 0; namecount--)
4669       {
4670       int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4671       uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4672       fprintf(outfile, "  ");
4673 
4674       /* In UTF mode the name may be a UTF string containing non-ASCII
4675       letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4676       use the normal string printing functions, which use escapes for all
4677       non-ASCII characters. */
4678 
4679       if (utf)
4680         {
4681 #ifdef SUPPORT_PCRE2_32
4682         if (test_mode == PCRE32_MODE)
4683           {
4684           PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4685           while (*nameptr != 0)
4686             {
4687             uint8_t u8buff[6];
4688             int len = ord2utf8(*nameptr++, u8buff);
4689             fprintf(outfile, "%.*s", len, u8buff);
4690             }
4691           }
4692 #endif
4693 #ifdef SUPPORT_PCRE2_16
4694         if (test_mode == PCRE16_MODE)
4695           {
4696           PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4697           while (*nameptr != 0)
4698             {
4699             int len;
4700             uint8_t u8buff[6];
4701             uint32_t c = *nameptr++ & 0xffff;
4702             if (c >= 0xD800 && c < 0xDC00)
4703               c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4704             len = ord2utf8(c, u8buff);
4705             fprintf(outfile, "%.*s", len, u8buff);
4706             }
4707           }
4708 #endif
4709 #ifdef SUPPORT_PCRE2_8
4710         if (test_mode == PCRE8_MODE)
4711           fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4712 #endif
4713         }
4714       else  /* Not UTF mode */
4715         {
4716         PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4717         }
4718 
4719       while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4720 
4721 #ifdef SUPPORT_PCRE2_32
4722       if (test_mode == PCRE32_MODE)
4723         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4724 #endif
4725 #ifdef SUPPORT_PCRE2_16
4726       if (test_mode == PCRE16_MODE)
4727         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4728 #endif
4729 #ifdef SUPPORT_PCRE2_8
4730       if (test_mode == PCRE8_MODE)
4731         fprintf(outfile, "%3d\n", (int)(
4732         ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4733 #endif
4734 
4735       nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4736       }
4737     }
4738 
4739   if (hascrorlf)     fprintf(outfile, "Contains explicit CR or LF match\n");
4740   if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4741   if (match_empty)   fprintf(outfile, "May match empty string\n");
4742 
4743   pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4744   pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4745   pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4746 
4747   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4748   cluttering up the verification output of non-UTF test files. */
4749 
4750   if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4751     {
4752     compile_options &= ~PCRE2_NEVER_UTF;
4753     overall_options &= ~PCRE2_NEVER_UTF;
4754     }
4755 
4756   if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4757     {
4758     compile_options &= ~PCRE2_NEVER_UCP;
4759     overall_options &= ~PCRE2_NEVER_UCP;
4760     }
4761 
4762   if ((compile_options|overall_options) != 0)
4763     {
4764     if (compile_options == overall_options)
4765       show_compile_options(compile_options, "Options:", "\n");
4766     else
4767       {
4768       show_compile_options(compile_options, "Compile options:", "\n");
4769       show_compile_options(overall_options, "Overall options:", "\n");
4770       }
4771     }
4772 
4773   if (extra_options != 0)
4774     show_compile_extra_options(extra_options, "Extra options:", "\n");
4775 
4776   if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4777 
4778   if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4779       (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4780     fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4781       "any Unicode newline" : "CR, LF, or CRLF");
4782 
4783   if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4784     {
4785     switch (newline_convention)
4786       {
4787       case PCRE2_NEWLINE_CR:
4788       fprintf(outfile, "Forced newline is CR\n");
4789       break;
4790 
4791       case PCRE2_NEWLINE_LF:
4792       fprintf(outfile, "Forced newline is LF\n");
4793       break;
4794 
4795       case PCRE2_NEWLINE_CRLF:
4796       fprintf(outfile, "Forced newline is CRLF\n");
4797       break;
4798 
4799       case PCRE2_NEWLINE_ANYCRLF:
4800       fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4801       break;
4802 
4803       case PCRE2_NEWLINE_ANY:
4804       fprintf(outfile, "Forced newline is any Unicode newline\n");
4805       break;
4806 
4807       case PCRE2_NEWLINE_NUL:
4808       fprintf(outfile, "Forced newline is NUL\n");
4809       break;
4810 
4811       default:
4812       break;
4813       }
4814     }
4815 
4816   if (first_ctype == 2)
4817     {
4818     fprintf(outfile, "First code unit at start or follows newline\n");
4819     }
4820   else if (first_ctype == 1)
4821     {
4822     const char *caseless =
4823       ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4824       "" : " (caseless)";
4825     if (PRINTOK(first_cunit))
4826       fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4827     else
4828       {
4829       fprintf(outfile, "First code unit = ");
4830       pchar(first_cunit, FALSE, outfile);
4831       fprintf(outfile, "%s\n", caseless);
4832       }
4833     }
4834   else if (start_bits != NULL)
4835     {
4836     int i;
4837     int c = 24;
4838     fprintf(outfile, "Starting code units: ");
4839     for (i = 0; i < 256; i++)
4840       {
4841       if ((start_bits[i/8] & (1u << (i&7))) != 0)
4842         {
4843         if (c > 75)
4844           {
4845           fprintf(outfile, "\n  ");
4846           c = 2;
4847           }
4848         if (PRINTOK(i) && i != ' ')
4849           {
4850           fprintf(outfile, "%c ", i);
4851           c += 2;
4852           }
4853         else
4854           {
4855           fprintf(outfile, "\\x%02x ", i);
4856           c += 5;
4857           }
4858         }
4859       }
4860     fprintf(outfile, "\n");
4861     }
4862 
4863   if (last_ctype != 0)
4864     {
4865     const char *caseless =
4866       ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4867       "" : " (caseless)";
4868     if (PRINTOK(last_cunit))
4869       fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4870     else
4871       {
4872       fprintf(outfile, "Last code unit = ");
4873       pchar(last_cunit, FALSE, outfile);
4874       fprintf(outfile, "%s\n", caseless);
4875       }
4876     }
4877 
4878   if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4879     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4880 
4881   if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4882     {
4883 #ifdef SUPPORT_JIT
4884     if (FLD(compiled_code, executable_jit) != NULL)
4885       fprintf(outfile, "JIT compilation was successful\n");
4886     else
4887       {
4888       fprintf(outfile, "JIT compilation was not successful");
4889       if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4890         return PR_ABEND;
4891       fprintf(outfile, "\n");
4892       }
4893 #else
4894       fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4895 #endif
4896     }
4897   }
4898 
4899 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4900   {
4901   int errorcode;
4902   PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4903   if (errorcode != 0)
4904     {
4905     fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4906     if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4907       return PR_ABEND;
4908     return PR_SKIP;
4909     }
4910   }
4911 
4912 return PR_OK;
4913 }
4914 
4915 
4916 
4917 /*************************************************
4918 *              Handle serialization error        *
4919 *************************************************/
4920 
4921 /* Print an error message after a serialization failure.
4922 
4923 Arguments:
4924   rc         the error code
4925   msg        an initial message for what failed
4926 
4927 Returns:     FALSE if print_error_message() fails
4928 */
4929 
4930 static BOOL
serial_error(int rc,const char * msg)4931 serial_error(int rc, const char *msg)
4932 {
4933 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4934 return print_error_message(rc, "", "\n");
4935 }
4936 
4937 
4938 
4939 /*************************************************
4940 *        Open file for save/load commands        *
4941 *************************************************/
4942 
4943 /* This function decodes the file name and opens the file.
4944 
4945 Arguments:
4946   buffptr     point after the #command
4947   mode        open mode
4948   fptr        points to the FILE variable
4949   name        name of # command
4950 
4951 Returns:      PR_OK or PR_ABEND
4952 */
4953 
4954 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4955 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4956 {
4957 char *endf;
4958 char *filename = (char *)buffptr;
4959 while (isspace(*filename)) filename++;
4960 endf = filename + strlen8(filename);
4961 while (endf > filename && isspace(endf[-1])) endf--;
4962 
4963 if (endf == filename)
4964   {
4965   fprintf(outfile, "** File name expected after %s\n", name);
4966   return PR_ABEND;
4967   }
4968 
4969 *endf = 0;
4970 *fptr = fopen((const char *)filename, mode);
4971 if (*fptr == NULL)
4972   {
4973   fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4974   return PR_ABEND;
4975   }
4976 
4977 return PR_OK;
4978 }
4979 
4980 
4981 
4982 /*************************************************
4983 *               Process command line             *
4984 *************************************************/
4985 
4986 /* This function is called for lines beginning with # and a character that is
4987 not ! or whitespace, when encountered between tests, which means that there is
4988 no compiled pattern (compiled_code is NULL). The line is in buffer.
4989 
4990 Arguments:  none
4991 
4992 Returns:    PR_OK     continue processing next line
4993             PR_SKIP   skip to a blank line
4994             PR_ABEND  abort the pcre2test run
4995 */
4996 
4997 static int
process_command(void)4998 process_command(void)
4999 {
5000 FILE *f;
5001 PCRE2_SIZE serial_size;
5002 size_t i;
5003 int rc, cmd, cmdlen, yield;
5004 uint16_t first_listed_newline;
5005 const char *cmdname;
5006 uint8_t *argptr, *serial;
5007 
5008 yield = PR_OK;
5009 cmd = CMD_UNKNOWN;
5010 cmdlen = 0;
5011 
5012 for (i = 0; i < cmdlistcount; i++)
5013   {
5014   cmdname = cmdlist[i].name;
5015   cmdlen = strlen(cmdname);
5016   if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
5017       isspace(buffer[cmdlen+1]))
5018     {
5019     cmd = cmdlist[i].value;
5020     break;
5021     }
5022   }
5023 
5024 argptr = buffer + cmdlen + 1;
5025 
5026 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
5027   {
5028   fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
5029   return PR_ABEND;
5030   }
5031 
5032 switch(cmd)
5033   {
5034   case CMD_UNKNOWN:
5035   fprintf(outfile, "** Unknown command: %s", buffer);
5036   break;
5037 
5038   case CMD_FORBID_UTF:
5039   forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
5040   break;
5041 
5042   case CMD_PERLTEST:
5043   restrict_for_perl_test = TRUE;
5044   break;
5045 
5046   /* Set default pattern modifiers */
5047 
5048   case CMD_PATTERN:
5049   (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
5050   if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
5051     def_patctl.jit = JIT_DEFAULT;
5052   break;
5053 
5054   /* Set default subject modifiers */
5055 
5056   case CMD_SUBJECT:
5057   (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
5058   break;
5059 
5060   /* Check the default newline, and if not one of those listed, set up the
5061   first one to be forced. An empty list unsets. */
5062 
5063   case CMD_NEWLINE_DEFAULT:
5064   local_newline_default = 0;   /* Unset */
5065   first_listed_newline = 0;
5066   for (;;)
5067     {
5068     while (isspace(*argptr)) argptr++;
5069     if (*argptr == 0) break;
5070     for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
5071       {
5072       size_t nlen = strlen(newlines[i]);
5073       if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
5074           isspace(argptr[nlen]))
5075         {
5076         if (i == NEWLINE_DEFAULT) return PR_OK;  /* Default is valid */
5077         if (first_listed_newline == 0) first_listed_newline = i;
5078         }
5079       }
5080     while (*argptr != 0 && !isspace(*argptr)) argptr++;
5081     }
5082   local_newline_default = first_listed_newline;
5083   break;
5084 
5085   /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
5086   the compiled pattern (e.g. to give information) are permitted. The default
5087   pattern modifiers are ignored. */
5088 
5089   case CMD_POP:
5090   case CMD_POPCOPY:
5091   if (patstacknext <= 0)
5092     {
5093     fprintf(outfile, "** Can't pop off an empty stack\n");
5094     return PR_SKIP;
5095     }
5096   memset(&pat_patctl, 0, sizeof(patctl));   /* Completely unset */
5097   if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
5098     return PR_SKIP;
5099 
5100   if (cmd == CMD_POP)
5101     {
5102     SET(compiled_code, patstack[--patstacknext]);
5103     }
5104   else
5105     {
5106     PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
5107     }
5108 
5109   if (pat_patctl.jit != 0)
5110     {
5111     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5112     }
5113   if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5114   if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize();
5115   if ((pat_patctl.control & CTL_ANYINFO) != 0)
5116     {
5117     rc = show_pattern_info();
5118     if (rc != PR_OK) return rc;
5119     }
5120   break;
5121 
5122   /* Save the stack of compiled patterns to a file, then empty the stack. */
5123 
5124   case CMD_SAVE:
5125   if (patstacknext <= 0)
5126     {
5127     fprintf(outfile, "** No stacked patterns to save\n");
5128     return PR_OK;
5129     }
5130 
5131   rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
5132   if (rc != PR_OK) return rc;
5133 
5134   PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
5135     general_context);
5136   if (rc < 0)
5137     {
5138     fclose(f);
5139     if (!serial_error(rc, "Serialization")) return PR_ABEND;
5140     break;
5141     }
5142 
5143   /* Write the length at the start of the file to make it straightforward to
5144   get the right memory when re-loading. This saves having to read the file size
5145   in different operating systems. To allow for different endianness (even
5146   though reloading with the opposite endianness does not work), write the
5147   length byte-by-byte. */
5148 
5149   for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5150   if (fwrite(serial, 1, serial_size, f) != serial_size)
5151     {
5152     fprintf(outfile, "** Wrong return from fwrite()\n");
5153     fclose(f);
5154     return PR_ABEND;
5155     }
5156 
5157   fclose(f);
5158   PCRE2_SERIALIZE_FREE(serial);
5159   while(patstacknext > 0)
5160     {
5161     SET(compiled_code, patstack[--patstacknext]);
5162     SUB1(pcre2_code_free, compiled_code);
5163     }
5164   SET(compiled_code, NULL);
5165   break;
5166 
5167   /* Load a set of compiled patterns from a file onto the stack */
5168 
5169   case CMD_LOAD:
5170   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5171   if (rc != PR_OK) return rc;
5172 
5173   serial_size = 0;
5174   for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5175 
5176   serial = malloc(serial_size);
5177   if (serial == NULL)
5178     {
5179     fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5180       serial_size);
5181     fclose(f);
5182     return PR_ABEND;
5183     }
5184 
5185   i = fread(serial, 1, serial_size, f);
5186   fclose(f);
5187 
5188   if (i != serial_size)
5189     {
5190     fprintf(outfile, "** Wrong return from fread()\n");
5191     yield = PR_ABEND;
5192     }
5193   else
5194     {
5195     PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5196     if (rc < 0)
5197       {
5198       if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5199       }
5200     else
5201       {
5202       if (rc + patstacknext > PATSTACKSIZE)
5203         {
5204         fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5205           rc, (rc == 1)? "" : "s");
5206         rc = PATSTACKSIZE - patstacknext;
5207         fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5208           (rc == 1)? "" : "s");
5209         }
5210       PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5211         general_context);
5212       if (rc < 0)
5213         {
5214         if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5215         }
5216       else patstacknext += rc;
5217       }
5218     }
5219 
5220   free(serial);
5221   break;
5222 
5223   /* Load a set of binary tables into tables3. */
5224 
5225   case CMD_LOADTABLES:
5226   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5227   if (rc != PR_OK) return rc;
5228 
5229   if (tables3 == NULL)
5230     {
5231     (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5232     tables3 = malloc(loadtables_length);
5233     }
5234 
5235   if (tables3 == NULL)
5236     {
5237     fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5238     yield = PR_ABEND;
5239     }
5240   else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5241     {
5242     fprintf(outfile, "** Wrong return from fread()\n");
5243     yield = PR_ABEND;
5244     }
5245 
5246   fclose(f);
5247   break;
5248   }
5249 
5250 return yield;
5251 }
5252 
5253 
5254 
5255 /*************************************************
5256 *               Process pattern line             *
5257 *************************************************/
5258 
5259 /* This function is called when the input buffer contains the start of a
5260 pattern. The first character is known to be a valid delimiter. The pattern is
5261 read, modifiers are interpreted, and a suitable local context is set up for
5262 this test. The pattern is then compiled.
5263 
5264 Arguments:  none
5265 
5266 Returns:    PR_OK     continue processing next line
5267             PR_SKIP   skip to a blank line
5268             PR_ABEND  abort the pcre2test run
5269 */
5270 
5271 static int
process_pattern(void)5272 process_pattern(void)
5273 {
5274 BOOL utf;
5275 uint32_t k;
5276 uint8_t *p = buffer;
5277 unsigned int delimiter = *p++;
5278 int errorcode;
5279 void *use_pat_context;
5280 void *use_pbuffer = NULL;
5281 uint32_t use_forbid_utf = forbid_utf;
5282 PCRE2_SIZE patlen;
5283 PCRE2_SIZE valgrind_access_length;
5284 PCRE2_SIZE erroroffset;
5285 
5286 /* The perltest.sh script supports only / as a delimiter. */
5287 
5288 if (restrict_for_perl_test && delimiter != '/')
5289   {
5290   fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5291   return PR_ABEND;
5292   }
5293 
5294 /* Initialize the context and pattern/data controls for this test from the
5295 defaults. */
5296 
5297 PATCTXCPY(pat_context, default_pat_context);
5298 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5299 
5300 /* Find the end of the pattern, reading more lines if necessary. */
5301 
5302 for(;;)
5303   {
5304   while (*p != 0)
5305     {
5306     if (*p == '\\' && p[1] != 0) p++;
5307       else if (*p == delimiter) break;
5308     p++;
5309     }
5310   if (*p != 0) break;
5311   if ((p = extend_inputline(infile, p, "    > ")) == NULL)
5312     {
5313     fprintf(outfile, "** Unexpected EOF\n");
5314     return PR_ABEND;
5315     }
5316   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5317   }
5318 
5319 /* If the first character after the delimiter is backslash, make the pattern
5320 end with backslash. This is purely to provide a way of testing for the error
5321 message when a pattern ends with backslash. */
5322 
5323 if (p[1] == '\\') *p++ = '\\';
5324 
5325 /* Terminate the pattern at the delimiter, and compute the length. */
5326 
5327 *p++ = 0;
5328 patlen = p - buffer - 2;
5329 
5330 /* Look for modifiers and options after the final delimiter. */
5331 
5332 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5333 
5334 /* Note that the match_invalid_utf option also sets utf when passed to
5335 pcre2_compile(). */
5336 
5337 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5338 
5339 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5340 exclusive with the utf modifier. */
5341 
5342 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5343   {
5344   if (test_mode == PCRE8_MODE)
5345     {
5346     fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5347     return PR_SKIP;
5348     }
5349   if (utf)
5350     {
5351     fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5352     return PR_SKIP;
5353     }
5354   }
5355 
5356 /* The convert and posix modifiers are mutually exclusive. */
5357 
5358 if (pat_patctl.convert_type != CONVERT_UNSET &&
5359     (pat_patctl.control & CTL_POSIX) != 0)
5360   {
5361   fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5362   return PR_SKIP;
5363   }
5364 
5365 /* Check for mutually exclusive control modifiers. At present, these are all in
5366 the first control word. */
5367 
5368 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5369   {
5370   uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5371   if (c != 0 && c != (c & (~c+1)))
5372     {
5373     show_controls(c, 0, "** Not allowed together:");
5374     fprintf(outfile, "\n");
5375     return PR_SKIP;
5376     }
5377   }
5378 
5379 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5380 specified. */
5381 
5382 if (pat_patctl.jit == 0 &&
5383     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5384   pat_patctl.jit = JIT_DEFAULT;
5385 
5386 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5387 in callouts. Convert from hex if requested (literal strings in quotes may be
5388 present within the hexadecimal pairs). The result must necessarily be fewer
5389 characters so will always fit in pbuffer8. */
5390 
5391 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5392   {
5393   uint8_t *pp, *pt;
5394   uint32_t c, d;
5395 
5396   pt = pbuffer8;
5397   for (pp = buffer + 1; *pp != 0; pp++)
5398     {
5399     if (isspace(*pp)) continue;
5400     c = *pp++;
5401 
5402     /* Handle a literal substring */
5403 
5404     if (c == '\'' || c == '"')
5405       {
5406       uint8_t *pq = pp;
5407       for (;; pp++)
5408         {
5409         d = *pp;
5410         if (d == 0)
5411           {
5412           fprintf(outfile, "** Missing closing quote in hex pattern: "
5413             "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5414           return PR_SKIP;
5415           }
5416         if (d == c) break;
5417         *pt++ = d;
5418         }
5419       }
5420 
5421     /* Expect a hex pair */
5422 
5423     else
5424       {
5425       if (!isxdigit(c))
5426         {
5427         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5428           PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5429         return PR_SKIP;
5430         }
5431       if (*pp == 0)
5432         {
5433         fprintf(outfile, "** Odd number of digits in hex pattern\n");
5434         return PR_SKIP;
5435         }
5436       d = *pp;
5437       if (!isxdigit(d))
5438         {
5439         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5440           PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5441         return PR_SKIP;
5442         }
5443       c = toupper(c);
5444       d = toupper(d);
5445       *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5446                (isdigit(d)? (d - '0') : (d - 'A' + 10));
5447       }
5448     }
5449   *pt = 0;
5450   patlen = pt - pbuffer8;
5451   }
5452 
5453 /* If not a hex string, process for repetition expansion if requested. */
5454 
5455 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5456   {
5457   uint8_t *pp, *pt;
5458 
5459   pt = pbuffer8;
5460   for (pp = buffer + 1; *pp != 0; pp++)
5461     {
5462     uint8_t *pc = pp;
5463     uint32_t count = 1;
5464     size_t length = 1;
5465 
5466     /* Check for replication syntax; if not found, the defaults just set will
5467     prevail and one character will be copied. */
5468 
5469     if (pp[0] == '\\' && pp[1] == '[')
5470       {
5471       uint8_t *pe;
5472       for (pe = pp + 2; *pe != 0; pe++)
5473         {
5474         if (pe[0] == ']' && pe[1] == '{')
5475           {
5476           uint32_t clen = pe - pc - 2;
5477           uint32_t i = 0;
5478           unsigned long uli;
5479           char *endptr;
5480 
5481           pe += 2;
5482           uli = strtoul((const char *)pe, &endptr, 10);
5483           if (U32OVERFLOW(uli))
5484             {
5485             fprintf(outfile, "** Pattern repeat count too large\n");
5486             return PR_SKIP;
5487             }
5488 
5489           i = (uint32_t)uli;
5490           pe = (uint8_t *)endptr;
5491           if (*pe == '}')
5492             {
5493             if (i == 0)
5494               {
5495               fprintf(outfile, "** Zero repeat not allowed\n");
5496               return PR_SKIP;
5497               }
5498             pc += 2;
5499             count = i;
5500             length = clen;
5501             pp = pe;
5502             break;
5503             }
5504           }
5505         }
5506       }
5507 
5508     /* Add to output. If the buffer is too small expand it. The function for
5509     expanding buffers always keeps buffer and pbuffer8 in step as far as their
5510     size goes. */
5511 
5512     while (pt + count * length > pbuffer8 + pbuffer8_size)
5513       {
5514       size_t pc_offset = pc - buffer;
5515       size_t pp_offset = pp - buffer;
5516       size_t pt_offset = pt - pbuffer8;
5517       expand_input_buffers();
5518       pc = buffer + pc_offset;
5519       pp = buffer + pp_offset;
5520       pt = pbuffer8 + pt_offset;
5521       }
5522 
5523     for (; count > 0; count--)
5524       {
5525       memcpy(pt, pc, length);
5526       pt += length;
5527       }
5528     }
5529 
5530   *pt = 0;
5531   patlen = pt - pbuffer8;
5532 
5533   if ((pat_patctl.control & CTL_INFO) != 0)
5534     fprintf(outfile, "Expanded: %s\n", pbuffer8);
5535   }
5536 
5537 /* Neither hex nor expanded, just copy the input verbatim. */
5538 
5539 else
5540   {
5541   strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5542   }
5543 
5544 /* Sort out character tables */
5545 
5546 if (pat_patctl.locale[0] != 0)
5547   {
5548   if (pat_patctl.tables_id != 0)
5549     {
5550     fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5551     return PR_SKIP;
5552     }
5553   if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5554     {
5555     fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5556     return PR_SKIP;
5557     }
5558   if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5559     {
5560     strcpy((char *)locale_name, (char *)pat_patctl.locale);
5561     if (locale_tables != NULL)
5562       {
5563       PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables);
5564       }
5565     PCRE2_MAKETABLES(locale_tables, general_context);
5566     }
5567   use_tables = locale_tables;
5568   }
5569 
5570 else switch (pat_patctl.tables_id)
5571   {
5572   case 0: use_tables = NULL; break;
5573   case 1: use_tables = tables1; break;
5574   case 2: use_tables = tables2; break;
5575 
5576   case 3:
5577   if (tables3 == NULL)
5578     {
5579     fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5580       "been loaded\n");
5581     return PR_SKIP;
5582     }
5583   use_tables = tables3;
5584   break;
5585 
5586   default:
5587   fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5588   return PR_SKIP;
5589   }
5590 
5591 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5592 
5593 /* Set up for the stackguard test. */
5594 
5595 if (pat_patctl.stackguard_test != 0)
5596   {
5597   PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5598   }
5599 
5600 /* Handle compiling via the POSIX interface, which doesn't support the
5601 timing, showing, or debugging options, nor the ability to pass over
5602 local character tables. Neither does it have 16-bit or 32-bit support. */
5603 
5604 if ((pat_patctl.control & CTL_POSIX) != 0)
5605   {
5606 #ifdef SUPPORT_PCRE2_8
5607   int rc;
5608   int cflags = 0;
5609   const char *msg = "** Ignored with POSIX interface:";
5610 #endif
5611 
5612   if (test_mode != PCRE8_MODE)
5613     {
5614     fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5615     return PR_SKIP;
5616     }
5617 
5618 #ifdef SUPPORT_PCRE2_8
5619   /* Check for features that the POSIX interface does not support. */
5620 
5621   if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5622   if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5623   if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5624   if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5625   if (timeit > 0) prmsg(&msg, "timing");
5626   if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5627 
5628   if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5629     {
5630     show_compile_options(
5631       pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
5632         msg, "");
5633     msg = "";
5634     }
5635 
5636   if ((FLD(pat_context, extra_options) &
5637        (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
5638     {
5639     show_compile_extra_options(
5640       FLD(pat_context, extra_options) &
5641         (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
5642     msg = "";
5643     }
5644 
5645   if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
5646       (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
5647     {
5648     show_controls(
5649       pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
5650       pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
5651       msg);
5652     msg = "";
5653 
5654     /* Remove ignored options so as not to get a repeated message for those
5655     that are actually subject controls. */
5656 
5657     pat_patctl.control &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS);
5658     pat_patctl.control2 &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS2);
5659     }
5660 
5661   if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5662   if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5663     prmsg(&msg, "max_pattern_length");
5664   if (FLD(pat_context, max_pattern_compiled_length) != PCRE2_UNSET)
5665     prmsg(&msg, "max_pattern_compiled_length");
5666   if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5667     prmsg(&msg, "parens_nest_limit");
5668 
5669   if (msg[0] == 0) fprintf(outfile, "\n");
5670 
5671   /* Translate PCRE2 options to POSIX options and then compile. */
5672 
5673   if (utf) cflags |= REG_UTF;
5674   if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5675   if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5676   if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5677   if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5678   if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5679   if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5680   if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5681 
5682   if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5683     {
5684     preg.re_endp = (char *)pbuffer8 + patlen;
5685     cflags |= REG_PEND;
5686     }
5687 
5688   rc = regcomp(&preg, (char *)pbuffer8, cflags);
5689 
5690   /* Compiling failed */
5691 
5692   if (rc != 0)
5693     {
5694     size_t bsize, usize;
5695     int psize;
5696 
5697     preg.re_pcre2_code = NULL;     /* In case something was left in there */
5698     preg.re_match_data = NULL;
5699 
5700     bsize = (pat_patctl.regerror_buffsize != 0)?
5701       pat_patctl.regerror_buffsize : pbuffer8_size;
5702     if (bsize + 8 < pbuffer8_size)
5703       memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5704     usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5705 
5706     /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5707     versions of snprintf() put a zero byte at the end, but others do not.
5708     Therefore, we print a maximum of one less than the size of the buffer. */
5709 
5710     psize = (int)bsize - 1;
5711     fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5712     if (usize > bsize)
5713       {
5714       fprintf(outfile, "** regerror() message truncated\n");
5715       if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5716         fprintf(outfile, "** regerror() buffer overflow\n");
5717       }
5718     return PR_SKIP;
5719     }
5720 
5721   /* Compiling succeeded. Check that the values in the preg block are sensible.
5722   It can happen that pcre2test is accidentally linked with a different POSIX
5723   library which succeeds, but of course puts different things into preg. In
5724   this situation, calling regfree() may cause a segfault (or invalid free() in
5725   valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5726   calling of regfree() on exit. */
5727 
5728   if (preg.re_pcre2_code == NULL ||
5729       ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5730       ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5731       preg.re_match_data == NULL ||
5732       preg.re_cflags != cflags)
5733     {
5734     fprintf(outfile,
5735       "** The regcomp() function returned zero (success), but the values set\n"
5736       "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5737       "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5738       "** some other POSIX regex library.\n**\n");
5739     preg.re_pcre2_code = NULL;
5740     return PR_ABEND;
5741     }
5742 
5743   return PR_OK;
5744 #endif  /* SUPPORT_PCRE2_8 */
5745   }
5746 
5747 /* Handle compiling via the native interface. Controls that act later are
5748 ignored with "push". Replacements are locked out. */
5749 
5750 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5751   {
5752   if (pat_patctl.replacement[0] != 0)
5753     {
5754     fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5755     return PR_OK;
5756     }
5757   if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5758       (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5759     {
5760     show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5761                   pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5762       "** Ignored when compiled pattern is stacked with 'push':");
5763     fprintf(outfile, "\n");
5764     }
5765   if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5766       (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5767     {
5768     show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5769                   pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5770       "** Applies only to compile when pattern is stacked with 'push':");
5771     fprintf(outfile, "\n");
5772     }
5773   }
5774 
5775 /* Convert the input in non-8-bit modes. */
5776 
5777 errorcode = 0;
5778 
5779 #ifdef SUPPORT_PCRE2_16
5780 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5781 #endif
5782 
5783 #ifdef SUPPORT_PCRE2_32
5784 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5785 #endif
5786 
5787 switch(errorcode)
5788   {
5789   case -1:
5790   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5791     "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5792   return PR_SKIP;
5793 
5794   case -2:
5795   fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5796     "cannot be converted to UTF\n");
5797   return PR_SKIP;
5798 
5799   case -3:
5800   fprintf(outfile, "** Failed: character value greater than 0xffff "
5801     "cannot be converted to 16-bit in non-UTF mode\n");
5802   return PR_SKIP;
5803 
5804   default:
5805   break;
5806   }
5807 
5808 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5809 patlen. If it is to be converted, copy the result back afterwards so that it
5810 ends up back in the usual place. */
5811 
5812 if (pat_patctl.convert_type != CONVERT_UNSET)
5813   {
5814   int rc;
5815   int convert_return = PR_OK;
5816   uint32_t convert_options = pat_patctl.convert_type;
5817   void *converted_pattern;
5818   PCRE2_SIZE converted_length;
5819 
5820   if (pat_patctl.convert_length != 0)
5821     {
5822     converted_length = pat_patctl.convert_length;
5823     converted_pattern = malloc(converted_length * code_unit_size);
5824     if (converted_pattern == NULL)
5825       {
5826       fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5827       return PR_SKIP;
5828       }
5829     }
5830   else converted_pattern = NULL;  /* Let the library allocate */
5831 
5832   if (utf) convert_options |= PCRE2_CONVERT_UTF;
5833   if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5834     convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5835 
5836   CONCTXCPY(con_context, default_con_context);
5837 
5838   if (pat_patctl.convert_glob_escape != 0)
5839     {
5840     uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5841       pat_patctl.convert_glob_escape;
5842     PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5843     if (rc != 0)
5844       {
5845       fprintf(outfile, "** Invalid glob escape '%c'\n",
5846         pat_patctl.convert_glob_escape);
5847       convert_return = PR_SKIP;
5848       goto CONVERT_FINISH;
5849       }
5850     }
5851 
5852   if (pat_patctl.convert_glob_separator != 0)
5853     {
5854     PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5855     if (rc != 0)
5856       {
5857       fprintf(outfile, "** Invalid glob separator '%c'\n",
5858         pat_patctl.convert_glob_separator);
5859       convert_return = PR_SKIP;
5860       goto CONVERT_FINISH;
5861       }
5862     }
5863 
5864   PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5865     &converted_pattern, &converted_length, con_context);
5866 
5867   if (rc != 0)
5868     {
5869     fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5870       converted_length);
5871     convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5872     }
5873 
5874   /* Output the converted pattern, then copy it. */
5875 
5876   else
5877     {
5878     BOOL toolong;
5879     PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5880     fprintf(outfile, "\n");
5881 
5882     if (test_mode == PCRE8_MODE)
5883       toolong = (converted_length + 1 > pbuffer8_size);
5884     else if (test_mode == PCRE16_MODE)
5885       toolong = (2*(converted_length + 1) > pbuffer8_size);
5886     else  /* 32-bit */
5887       toolong = (4*(converted_length + 1) > pbuffer8_size);
5888 
5889     if (toolong)
5890       {
5891       fprintf(outfile, "** Pattern conversion is too long for the buffer\n");
5892       convert_return = PR_SKIP;
5893       }
5894     else
5895       {
5896       CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5897       patlen = converted_length;
5898       }
5899     }
5900 
5901   /* Free the converted pattern. */
5902 
5903   CONVERT_FINISH:
5904   if (pat_patctl.convert_length != 0)
5905     free(converted_pattern);
5906   else
5907     PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5908 
5909   /* Return if conversion was unsuccessful. */
5910 
5911   if (convert_return != PR_OK) return convert_return;
5912   }
5913 
5914 /* By default we pass a zero-terminated pattern, but a length is passed if
5915 "use_length" was specified or this is a hex pattern (which might contain binary
5916 zeros). When valgrind is supported, arrange for the unused part of the buffer
5917 to be marked as no access. */
5918 
5919 valgrind_access_length = patlen;
5920 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5921   {
5922   patlen = PCRE2_ZERO_TERMINATED;
5923   valgrind_access_length += 1;  /* For the terminating zero */
5924   }
5925 
5926 #ifdef SUPPORT_VALGRIND
5927 #ifdef SUPPORT_PCRE2_8
5928 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5929   {
5930   VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5931     pbuffer8_size - valgrind_access_length);
5932   }
5933 #endif
5934 #ifdef SUPPORT_PCRE2_16
5935 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5936   {
5937   VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5938     pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5939   }
5940 #endif
5941 #ifdef SUPPORT_PCRE2_32
5942 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5943   {
5944   VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5945     pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5946   }
5947 #endif
5948 #else  /* Valgrind not supported */
5949 (void)valgrind_access_length;  /* Avoid compiler warning */
5950 #endif
5951 
5952 /* If #newline_default has been used and the library was not compiled with an
5953 appropriate default newline setting, local_newline_default will be non-zero. We
5954 use this if there is no explicit newline modifier. */
5955 
5956 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5957   {
5958   SETFLD(pat_context, newline_convention, local_newline_default);
5959   }
5960 
5961 /* The null_context modifier is used to test calling pcre2_compile() with a
5962 NULL context. */
5963 
5964 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5965   NULL : PTR(pat_context);
5966 
5967 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5968 and PCRE2_NEVER_UCP are invalid with it. */
5969 
5970 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5971 
5972 /* Set use_pbuffer to the input buffer, or leave it as NULL if requested. */
5973 
5974 if ((pat_patctl.control2 & CTL2_NULL_PATTERN) == 0)
5975   {
5976 #ifdef SUPPORT_PCRE2_8
5977   if (test_mode == PCRE8_MODE) use_pbuffer = pbuffer8;
5978 #endif
5979 #ifdef SUPPORT_PCRE2_16
5980   if (test_mode == PCRE16_MODE) use_pbuffer = pbuffer16;
5981 #endif
5982 #ifdef SUPPORT_PCRE2_32
5983   if (test_mode == PCRE32_MODE) use_pbuffer = pbuffer32;
5984 #endif
5985   }
5986 
5987 /* Compile many times when timing. */
5988 
5989 if (timeit > 0)
5990   {
5991   int i;
5992   clock_t time_taken = 0;
5993   for (i = 0; i < timeit; i++)
5994     {
5995     clock_t start_time = clock();
5996     PCRE2_COMPILE(compiled_code, use_pbuffer, patlen,
5997       pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5998         use_pat_context);
5999     time_taken += clock() - start_time;
6000     if (TEST(compiled_code, !=, NULL))
6001       { SUB1(pcre2_code_free, compiled_code); }
6002     }
6003   total_compile_time += time_taken;
6004   fprintf(outfile, "Compile time %8.4f microseconds\n",
6005     ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit);
6006   }
6007 
6008 /* A final compile that is used "for real". */
6009 
6010 PCRE2_COMPILE(compiled_code, use_pbuffer, patlen,
6011   pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, use_pat_context);
6012 
6013 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
6014 and 32-bit buffers can be marked completely undefined, but we must leave the
6015 pattern in the 8-bit buffer defined because it may be read from a callout
6016 during matching. */
6017 
6018 #ifdef SUPPORT_VALGRIND
6019 #ifdef SUPPORT_PCRE2_8
6020 if (test_mode == PCRE8_MODE)
6021   {
6022   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
6023     pbuffer8_size - valgrind_access_length);
6024   }
6025 #endif
6026 #ifdef SUPPORT_PCRE2_16
6027 if (test_mode == PCRE16_MODE)
6028   {
6029   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
6030   }
6031 #endif
6032 #ifdef SUPPORT_PCRE2_32
6033 if (test_mode == PCRE32_MODE)
6034   {
6035   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
6036   }
6037 #endif
6038 #endif
6039 
6040 /* Call the JIT compiler if requested. When timing, we must free and recompile
6041 the pattern each time because that is the only way to free the JIT compiled
6042 code. We know that compilation will always succeed. */
6043 
6044 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
6045   {
6046   if (timeit > 0)
6047     {
6048     int i;
6049     clock_t time_taken = 0;
6050 
6051     for (i = 0; i < timeit; i++)
6052       {
6053       clock_t start_time;
6054       SUB1(pcre2_code_free, compiled_code);
6055       PCRE2_COMPILE(compiled_code, use_pbuffer, patlen,
6056         pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
6057         use_pat_context);
6058       start_time = clock();
6059       PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
6060       time_taken += clock() - start_time;
6061       if (jitrc != 0)
6062         {
6063         fprintf(outfile, "JIT compilation was not successful");
6064         if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND;
6065         break;
6066         }
6067       }
6068     total_jit_compile_time += time_taken;
6069     if (jitrc == 0)
6070       fprintf(outfile, "JIT compile  %8.4f microseconds\n",
6071         ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit);
6072     }
6073   else
6074     {
6075     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
6076     if (jitrc != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
6077       {
6078       fprintf(outfile, "JIT compilation was not successful");
6079       if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND;
6080       }
6081     }
6082   }
6083 
6084 /* Compilation failed; go back for another re, skipping to blank line
6085 if non-interactive. */
6086 
6087 if (TEST(compiled_code, ==, NULL))
6088   {
6089   fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
6090     (int)erroroffset);
6091   if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
6092   return PR_SKIP;
6093   }
6094 
6095 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
6096 locked out at compile time, but we must also check for occurrences of \P, \p,
6097 and \X, which are only supported when Unicode is supported. */
6098 
6099 if (forbid_utf != 0)
6100   {
6101   if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
6102     {
6103     fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
6104       "#forbid_utf command\n");
6105     return PR_SKIP;
6106     }
6107   }
6108 
6109 /* Remember the maximum lookbehind, for partial matching. */
6110 
6111 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
6112   return PR_ABEND;
6113 
6114 /* Remember the number of captures. */
6115 
6116 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
6117   return PR_ABEND;
6118 
6119 /* If an explicit newline modifier was given, set the information flag in the
6120 pattern so that it is preserved over push/pop. */
6121 
6122 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
6123   {
6124   SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
6125   }
6126 
6127 /* Output code size and other information if requested. */
6128 
6129 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
6130 if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize();
6131 if ((pat_patctl.control & CTL_ANYINFO) != 0)
6132   {
6133   int rc = show_pattern_info();
6134   if (rc != PR_OK) return rc;
6135   }
6136 
6137 /* The "push" control requests that the compiled pattern be remembered on a
6138 stack. This is mainly for testing the serialization functionality. */
6139 
6140 if ((pat_patctl.control & CTL_PUSH) != 0)
6141   {
6142   if (patstacknext >= PATSTACKSIZE)
6143     {
6144     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
6145     return PR_ABEND;
6146     }
6147   patstack[patstacknext++] = PTR(compiled_code);
6148   SET(compiled_code, NULL);
6149   }
6150 
6151 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
6152 copy of the pattern, the latter with a copy of its character tables. This tests
6153 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
6154 
6155 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
6156   {
6157   if (patstacknext >= PATSTACKSIZE)
6158     {
6159     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
6160     return PR_ABEND;
6161     }
6162   if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
6163     {
6164     PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
6165     }
6166   else
6167     {
6168     PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
6169       compiled_code); }
6170   }
6171 
6172 return PR_OK;
6173 }
6174 
6175 
6176 
6177 /*************************************************
6178 *          Check heap, match or depth limit      *
6179 *************************************************/
6180 
6181 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
6182 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
6183 
6184 Arguments:
6185   pp        the subject string
6186   ulen      length of subject or PCRE2_ZERO_TERMINATED
6187   errnumber defines which limit to test
6188   msg       string to include in final message
6189 
6190 Returns:    the return from the final match function call
6191 */
6192 
6193 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)6194 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
6195 {
6196 int capcount;
6197 uint32_t min = 0;
6198 uint32_t mid = 64;
6199 uint32_t max = UINT32_MAX;
6200 
6201 PCRE2_SET_MATCH_LIMIT(dat_context, max);
6202 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6203 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6204 
6205 for (;;)
6206   {
6207   uint32_t stack_start = 0;
6208 
6209   /* If we are checking the heap limit, free any frames vector that is cached
6210   in the match_data so we always start without one. */
6211 
6212   if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6213     {
6214     PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6215 
6216 #ifdef SUPPORT_PCRE2_8
6217     if (code_unit_size == 1)
6218       {
6219       match_data8->memctl.free(match_data8->heapframes,
6220         match_data8->memctl.memory_data);
6221       match_data8->heapframes = NULL;
6222       match_data8->heapframes_size = 0;
6223       }
6224 #endif
6225 
6226 #ifdef SUPPORT_PCRE2_16
6227     if (code_unit_size == 2)
6228       {
6229       match_data16->memctl.free(match_data16->heapframes,
6230         match_data16->memctl.memory_data);
6231       match_data16->heapframes = NULL;
6232       match_data16->heapframes_size = 0;
6233       }
6234 #endif
6235 
6236 #ifdef SUPPORT_PCRE2_32
6237     if (code_unit_size == 4)
6238       {
6239       match_data32->memctl.free(match_data32->heapframes,
6240         match_data32->memctl.memory_data);
6241       match_data32->heapframes = NULL;
6242       match_data32->heapframes_size = 0;
6243       }
6244 #endif
6245     }
6246 
6247   /* No need to mess with the frames vector for match or depth limits. */
6248 
6249   else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6250     {
6251     PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6252     }
6253   else
6254     {
6255     PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6256     }
6257 
6258   /* Do the appropriate match */
6259 
6260   if ((dat_datctl.control & CTL_DFA) != 0)
6261     {
6262     stack_start = DFA_START_RWS_SIZE/1024;
6263     if (dfa_workspace == NULL)
6264       dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6265     if (dfa_matched++ == 0)
6266       dfa_workspace[0] = -1;  /* To catch bad restart */
6267     PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6268       dat_datctl.options, match_data,
6269       PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6270     }
6271 
6272   else if ((pat_patctl.control & CTL_JITFAST) != 0)
6273     PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6274       dat_datctl.options, match_data, PTR(dat_context));
6275 
6276   else
6277     {
6278     PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6279       dat_datctl.options, match_data, PTR(dat_context));
6280     }
6281 
6282   if (capcount == errnumber)
6283     {
6284     if ((mid & 0x80000000u) != 0)
6285       {
6286       fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6287         "restriction\n", msg);
6288       break;
6289       }
6290 
6291     min = mid;
6292     mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6293     }
6294   else if (capcount >= 0 ||
6295            capcount == PCRE2_ERROR_NOMATCH ||
6296            capcount == PCRE2_ERROR_PARTIAL)
6297     {
6298     /* If we've not hit the error with a heap limit less than the size of the
6299     initial stack frame vector (for pcre2_match()) or the initial stack
6300     workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6301     the minimum limit is zero; there's no need to go on. The other limits are
6302     always greater than zero. */
6303 
6304     if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6305       {
6306       fprintf(outfile, "Minimum %s limit = 0\n", msg);
6307       break;
6308       }
6309     if (mid == min + 1)
6310       {
6311       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6312       break;
6313       }
6314     max = mid;
6315     mid = (min + max)/2;
6316     }
6317   else break;    /* Some other error */
6318   }
6319 
6320 return capcount;
6321 }
6322 
6323 
6324 
6325 /*************************************************
6326 *        Substitute callout function             *
6327 *************************************************/
6328 
6329 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6330 Print out the data that is passed back. The substitute callout block is
6331 identical for all code unit widths, so we just pick one.
6332 
6333 Arguments:
6334   scb         pointer to substitute callout block
6335   data_ptr    callout data
6336 
6337 Returns:      nothing
6338 */
6339 
6340 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6341 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6342   void *data_ptr)
6343 {
6344 int yield = 0;
6345 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6346 (void)data_ptr;   /* Not used */
6347 
6348 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6349   scb->subscount, scb->oveccount,
6350   scb->ovector[0], scb->ovector[1]);
6351 
6352 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6353   utf, outfile);
6354 
6355 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6356   scb->output_offsets[0], scb->output_offsets[1]);
6357 
6358 PCHARSV(scb->output, scb->output_offsets[0],
6359   scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6360 
6361 if (scb->subscount == dat_datctl.substitute_stop)
6362   {
6363   yield = -1;
6364   fprintf(outfile, " STOPPED");
6365   }
6366 else if (scb->subscount == dat_datctl.substitute_skip)
6367   {
6368   yield = +1;
6369   fprintf(outfile, " SKIPPED");
6370   }
6371 
6372 fprintf(outfile, "\"\n");
6373 return yield;
6374 }
6375 
6376 
6377 /*************************************************
6378 *              Callout function                  *
6379 *************************************************/
6380 
6381 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6382 we are in the match (unless suppressed). Yield zero unless more callouts than
6383 the fail count, or the callout data is not zero. The only differences in the
6384 callout block for different code unit widths are that the pointers to the
6385 subject, the most recent MARK, and a callout argument string point to strings
6386 of the appropriate width. Casts can be used to deal with this.
6387 
6388 Arguments:
6389   cb                a pointer to a callout block
6390   callout_data_ptr  the provided callout data
6391 
6392 Returns:            0 or 1 or an error, as determined by settings
6393 */
6394 
6395 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6396 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6397 {
6398 FILE *f, *fdefault;
6399 uint32_t i, pre_start, post_start, subject_length;
6400 PCRE2_SIZE current_position;
6401 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6402 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6403 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6404 
6405 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6406 happens only once in simple cases, but we want to repeat after any additional
6407 output caused by CALLOUT_EXTRA. */
6408 
6409 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6410   NULL : outfile;
6411 
6412 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6413   {
6414   f = outfile;
6415   switch (cb->callout_flags)
6416     {
6417     case PCRE2_CALLOUT_BACKTRACK:
6418     fprintf(f, "Backtrack\n");
6419     break;
6420 
6421     case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6422     fprintf(f, "Backtrack\nNo other matching paths\n");
6423     /* Fall through */
6424 
6425     case PCRE2_CALLOUT_STARTMATCH:
6426     fprintf(f, "New match attempt\n");
6427     break;
6428 
6429     default:
6430     f = fdefault;
6431     break;
6432     }
6433   }
6434 else f = fdefault;
6435 
6436 /* For a callout with a string argument, show the string first because there
6437 isn't a tidy way to fit it in the rest of the data. */
6438 
6439 if (cb->callout_string != NULL)
6440   {
6441   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6442   fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6443     cb->callout_string_offset, delimiter);
6444   PCHARSV(cb->callout_string, 0,
6445     cb->callout_string_length, utf, outfile);
6446   for (i = 0; callout_start_delims[i] != 0; i++)
6447     if (delimiter == callout_start_delims[i])
6448       {
6449       delimiter = callout_end_delims[i];
6450       break;
6451       }
6452   fprintf(outfile, "%c", delimiter);
6453   if (!callout_capture) fprintf(outfile, "\n");
6454   }
6455 
6456 /* Show captured strings if required */
6457 
6458 if (callout_capture)
6459   {
6460   if (cb->callout_string == NULL)
6461     fprintf(outfile, "Callout %d:", cb->callout_number);
6462   fprintf(outfile, " last capture = %d\n", cb->capture_last);
6463   for (i = 2; i < cb->capture_top * 2; i += 2)
6464     {
6465     fprintf(outfile, "%2d: ", i/2);
6466     if (cb->offset_vector[i] == PCRE2_UNSET)
6467       fprintf(outfile, "<unset>");
6468     else
6469       {
6470       PCHARSV(cb->subject, cb->offset_vector[i],
6471         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6472       }
6473     fprintf(outfile, "\n");
6474     }
6475   }
6476 
6477 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6478 non-printing characters), the first time, or if giving full details. On
6479 subsequent calls in the same match, we use PCHARS() just to find the printed
6480 lengths of the substrings. */
6481 
6482 if (callout_where)
6483   {
6484   if (f != NULL) fprintf(f, "--->");
6485 
6486   /* The subject before the match start. */
6487 
6488   PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6489 
6490   /* If a lookbehind is involved, the current position may be earlier than the
6491   match start. If so, use the match start instead. */
6492 
6493   current_position = (cb->current_position >= cb->start_match)?
6494     cb->current_position : cb->start_match;
6495 
6496   /* The subject between the match start and the current position. */
6497 
6498   PCHARS(post_start, cb->subject, cb->start_match,
6499     current_position - cb->start_match, utf, f);
6500 
6501   /* Print from the current position to the end. */
6502 
6503   PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6504     utf, f);
6505 
6506   /* Calculate the total subject printed length (no print). */
6507 
6508   PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6509 
6510   if (f != NULL) fprintf(f, "\n");
6511 
6512   /* For automatic callouts, show the pattern offset. Otherwise, for a
6513   numerical callout whose number has not already been shown with captured
6514   strings, show the number here. A callout with a string argument has been
6515   displayed above. */
6516 
6517   if (cb->callout_number == 255)
6518     {
6519     fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6520     if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
6521     }
6522   else
6523     {
6524     if (callout_capture || cb->callout_string != NULL) fprintf(outfile, "    ");
6525       else fprintf(outfile, "%3d ", cb->callout_number);
6526     }
6527 
6528   /* Now show position indicators */
6529 
6530   for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6531   fprintf(outfile, "^");
6532 
6533   if (post_start > 0)
6534     {
6535     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6536     fprintf(outfile, "^");
6537     }
6538 
6539   for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6540     fprintf(outfile, " ");
6541 
6542   if (cb->next_item_length != 0)
6543     fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6544       pbuffer8 + cb->pattern_position);
6545   else
6546     fprintf(outfile, "End of pattern");
6547 
6548   fprintf(outfile, "\n");
6549   }
6550 
6551 first_callout = FALSE;
6552 
6553 /* Show any mark info */
6554 
6555 if (cb->mark != last_callout_mark)
6556   {
6557   if (cb->mark == NULL)
6558     fprintf(outfile, "Latest Mark: <unset>\n");
6559   else
6560     {
6561     fprintf(outfile, "Latest Mark: ");
6562     PCHARSV(cb->mark, -1, -1, utf, outfile);
6563     putc('\n', outfile);
6564     }
6565   last_callout_mark = cb->mark;
6566   }
6567 
6568 /* Show callout data */
6569 
6570 if (callout_data_ptr != NULL)
6571   {
6572   int callout_data = *((int32_t *)callout_data_ptr);
6573   if (callout_data != 0)
6574     {
6575     fprintf(outfile, "Callout data = %d\n", callout_data);
6576     return callout_data;
6577     }
6578   }
6579 
6580 /* Keep count and give the appropriate return code */
6581 
6582 callout_count++;
6583 
6584 if (cb->callout_number == dat_datctl.cerror[0] &&
6585     callout_count >= dat_datctl.cerror[1])
6586   return PCRE2_ERROR_CALLOUT;
6587 
6588 if (cb->callout_number == dat_datctl.cfail[0] &&
6589     callout_count >= dat_datctl.cfail[1])
6590   return 1;
6591 
6592 return 0;
6593 }
6594 
6595 
6596 
6597 /*************************************************
6598 *       Handle *MARK and copy/get tests          *
6599 *************************************************/
6600 
6601 /* This function is called after complete and partial matches. It runs the
6602 tests for substring extraction.
6603 
6604 Arguments:
6605   utf       TRUE for utf
6606   capcount  return from pcre2_match()
6607 
6608 Returns:    FALSE if print_error_message() fails
6609 */
6610 
6611 static BOOL
copy_and_get(BOOL utf,int capcount)6612 copy_and_get(BOOL utf, int capcount)
6613 {
6614 int i;
6615 uint8_t *nptr;
6616 
6617 /* Test copy strings by number */
6618 
6619 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6620   {
6621   int rc;
6622   PCRE2_SIZE length, length2;
6623   uint32_t copybuffer[256];
6624   uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6625   length = sizeof(copybuffer)/code_unit_size;
6626   PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6627   if (rc < 0)
6628     {
6629     fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6630     if (!print_error_message(rc, "", "\n")) return FALSE;
6631     }
6632   else
6633     {
6634     PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6635     if (rc < 0)
6636       {
6637       fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6638       if (!print_error_message(rc, "", "\n")) return FALSE;
6639       }
6640     else if (length2 != length)
6641       {
6642       fprintf(outfile, "Mismatched substring lengths: %"
6643         SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6644       }
6645     fprintf(outfile, "%2dC ", n);
6646     PCHARSV(copybuffer, 0, length, utf, outfile);
6647     fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6648     }
6649   }
6650 
6651 /* Test copy strings by name */
6652 
6653 nptr = dat_datctl.copy_names;
6654 for (;;)
6655   {
6656   int rc;
6657   int groupnumber;
6658   PCRE2_SIZE length, length2;
6659   uint32_t copybuffer[256];
6660   int namelen = strlen((const char *)nptr);
6661 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6662   PCRE2_SIZE cnl = namelen;
6663 #endif
6664   if (namelen == 0) break;
6665 
6666 #ifdef SUPPORT_PCRE2_8
6667   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6668 #endif
6669 #ifdef SUPPORT_PCRE2_16
6670   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6671 #endif
6672 #ifdef SUPPORT_PCRE2_32
6673   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6674 #endif
6675 
6676   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6677   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6678     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6679 
6680   length = sizeof(copybuffer)/code_unit_size;
6681   PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6682   if (rc < 0)
6683     {
6684     fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6685     if (!print_error_message(rc, "", "\n")) return FALSE;
6686     }
6687   else
6688     {
6689     PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6690     if (rc < 0)
6691       {
6692       fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6693       if (!print_error_message(rc, "", "\n")) return FALSE;
6694       }
6695     else if (length2 != length)
6696       {
6697       fprintf(outfile, "Mismatched substring lengths: %"
6698         SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6699       }
6700     fprintf(outfile, "  C ");
6701     PCHARSV(copybuffer, 0, length, utf, outfile);
6702     fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6703     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6704       else fprintf(outfile, " (non-unique)\n");
6705     }
6706   nptr += namelen + 1;
6707   }
6708 
6709 /* Test get strings by number */
6710 
6711 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6712   {
6713   int rc;
6714   PCRE2_SIZE length;
6715   void *gotbuffer;
6716   uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6717   PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6718   if (rc < 0)
6719     {
6720     fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6721     if (!print_error_message(rc, "", "\n")) return FALSE;
6722     }
6723   else
6724     {
6725     fprintf(outfile, "%2dG ", n);
6726     PCHARSV(gotbuffer, 0, length, utf, outfile);
6727     fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6728     PCRE2_SUBSTRING_FREE(gotbuffer);
6729     }
6730   }
6731 
6732 /* Test get strings by name */
6733 
6734 nptr = dat_datctl.get_names;
6735 for (;;)
6736   {
6737   PCRE2_SIZE length;
6738   void *gotbuffer;
6739   int rc;
6740   int groupnumber;
6741   int namelen = strlen((const char *)nptr);
6742 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6743   PCRE2_SIZE cnl = namelen;
6744 #endif
6745   if (namelen == 0) break;
6746 
6747 #ifdef SUPPORT_PCRE2_8
6748   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6749 #endif
6750 #ifdef SUPPORT_PCRE2_16
6751   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6752 #endif
6753 #ifdef SUPPORT_PCRE2_32
6754   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6755 #endif
6756 
6757   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6758   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6759     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6760 
6761   PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6762   if (rc < 0)
6763     {
6764     fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6765     if (!print_error_message(rc, "", "\n")) return FALSE;
6766     }
6767   else
6768     {
6769     fprintf(outfile, "  G ");
6770     PCHARSV(gotbuffer, 0, length, utf, outfile);
6771     fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6772     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6773       else fprintf(outfile, " (non-unique)\n");
6774     PCRE2_SUBSTRING_FREE(gotbuffer);
6775     }
6776   nptr += namelen + 1;
6777   }
6778 
6779 /* Test getting the complete list of captured strings. */
6780 
6781 if ((dat_datctl.control & CTL_GETALL) != 0)
6782   {
6783   int rc;
6784   void **stringlist;
6785   PCRE2_SIZE *lengths;
6786   PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6787   if (rc < 0)
6788     {
6789     fprintf(outfile, "get substring list failed (%d): ", rc);
6790     if (!print_error_message(rc, "", "\n")) return FALSE;
6791     }
6792   else
6793     {
6794     for (i = 0; i < capcount; i++)
6795       {
6796       fprintf(outfile, "%2dL ", i);
6797       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6798       putc('\n', outfile);
6799       }
6800     if (stringlist[i] != NULL)
6801       fprintf(outfile, "string list not terminated by NULL\n");
6802     PCRE2_SUBSTRING_LIST_FREE(stringlist);
6803     }
6804   }
6805 
6806 return TRUE;
6807 }
6808 
6809 
6810 
6811 /*************************************************
6812 *            Show an entire ovector              *
6813 *************************************************/
6814 
6815 /* This function is called after partial matching or match failure, when the
6816 "allvector" modifier is set. It is a means of checking the contents of the
6817 entire ovector, to ensure no modification of fields that should be unchanged.
6818 
6819 Arguments:
6820   ovector      points to the ovector
6821   oveccount    number of pairs
6822 
6823 Returns:       nothing
6824 */
6825 
6826 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6827 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6828 {
6829 uint32_t i;
6830 for (i = 0; i < 2*oveccount; i += 2)
6831   {
6832   PCRE2_SIZE start = ovector[i];
6833   PCRE2_SIZE end = ovector[i+1];
6834 
6835   fprintf(outfile, "%2d: ", i/2);
6836   if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6837     fprintf(outfile, "<unset>\n");
6838   else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6839     fprintf(outfile, "<unchanged>\n");
6840   else
6841     fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6842       (unsigned long int)end);
6843   }
6844 }
6845 
6846 
6847 /*************************************************
6848 *               Process a data line              *
6849 *************************************************/
6850 
6851 /* The line is in buffer; it will not be empty.
6852 
6853 Arguments:  none
6854 
6855 Returns:    PR_OK     continue processing next line
6856             PR_SKIP   skip to a blank line
6857             PR_ABEND  abort the pcre2test run
6858 */
6859 
6860 static int
process_data(void)6861 process_data(void)
6862 {
6863 PCRE2_SIZE len, ulen, arg_ulen;
6864 uint32_t gmatched;
6865 uint32_t c, k;
6866 uint32_t g_notempty = 0;
6867 uint8_t *p, *pp, *start_rep;
6868 size_t needlen;
6869 void *use_dat_context;
6870 BOOL utf;
6871 BOOL subject_literal;
6872 
6873 PCRE2_SIZE *ovector;
6874 PCRE2_SIZE ovecsave[3];
6875 uint32_t oveccount;
6876 
6877 #ifdef SUPPORT_PCRE2_8
6878 uint8_t *q8 = NULL;
6879 #endif
6880 #ifdef SUPPORT_PCRE2_16
6881 uint16_t *q16 = NULL;
6882 #endif
6883 #ifdef SUPPORT_PCRE2_32
6884 uint32_t *q32 = NULL;
6885 #endif
6886 
6887 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6888 
6889 /* Copy the default context and data control blocks to the active ones. Then
6890 copy from the pattern the controls that can be set in either the pattern or the
6891 data. This allows them to be overridden in the data line. We do not do this for
6892 options because those that are common apply separately to compiling and
6893 matching. */
6894 
6895 DATCTXCPY(dat_context, default_dat_context);
6896 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6897 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6898 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6899 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6900 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6901 
6902 if (dat_datctl.substitute_skip == 0)
6903     dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6904 if (dat_datctl.substitute_stop == 0)
6905     dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6906 
6907 /* Initialize for scanning the data line. */
6908 
6909 #ifdef SUPPORT_PCRE2_8
6910 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6911   ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6912   FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6913 #else
6914 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6915 #endif
6916 
6917 start_rep = NULL;
6918 len = strlen((const char *)buffer);
6919 while (len > 0 && isspace(buffer[len-1])) len--;
6920 buffer[len] = 0;
6921 p = buffer;
6922 while (isspace(*p)) p++;
6923 
6924 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6925 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6926 
6927 if (utf)
6928   {
6929   uint8_t *q;
6930   uint32_t cc;
6931   int n = 1;
6932   uint8_t *q_end = p + len;
6933 
6934   for (q = p; n > 0 && *q; q += n) n = utf82ord(q, q_end, &cc);
6935   if (n <= 0)
6936     {
6937     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6938       "in UTF mode\n");
6939     return PR_OK;
6940     }
6941   }
6942 
6943 #ifdef SUPPORT_VALGRIND
6944 /* Mark the dbuffer as addressable but undefined again. */
6945 if (dbuffer != NULL)
6946   {
6947   VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6948   }
6949 #endif
6950 
6951 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6952 the number of code units that will be needed (though the buffer may have to be
6953 extended if replication is involved). */
6954 
6955 needlen = (len+1) * code_unit_size;
6956 if (dbuffer == NULL || needlen >= dbuffer_size)
6957   {
6958   while (needlen >= dbuffer_size)
6959     {
6960     if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2;
6961       else dbuffer_size = needlen + 1;
6962     }
6963   dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6964   if (dbuffer == NULL)
6965     {
6966     fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n", dbuffer_size);
6967     exit(1);
6968     }
6969   }
6970 SETCASTPTR(q, dbuffer);  /* Sets q8, q16, or q32, as appropriate. */
6971 
6972 /* Scan the data line, interpreting data escapes, and put the result into a
6973 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6974 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6975 */
6976 
6977 while ((c = *p++) != 0)
6978   {
6979   int32_t i = 0;
6980   size_t replen;
6981 
6982   /* ] may mark the end of a replicated sequence */
6983 
6984   if (c == ']' && start_rep != NULL)
6985     {
6986     PCRE2_SIZE d;
6987     long li;
6988     char *endptr;
6989 
6990     if (*p++ != '{')
6991       {
6992       fprintf(outfile, "** Expected '{' after \\[....]\n");
6993       return PR_OK;
6994       }
6995 
6996     li = strtol((const char *)p, &endptr, 10);
6997     if (S32OVERFLOW(li))
6998       {
6999       fprintf(outfile, "** Repeat count too large\n");
7000       return PR_OK;
7001       }
7002 
7003     p = (uint8_t *)endptr;
7004     if (*p++ != '}')
7005       {
7006       fprintf(outfile, "** Expected '}' after \\[...]{...\n");
7007       return PR_OK;
7008       }
7009 
7010     i = (int32_t)li;
7011     if (i-- <= 0)
7012       {
7013       fprintf(outfile, "** Zero or negative repeat not allowed\n");
7014       return PR_OK;
7015       }
7016 
7017     replen = CAST8VAR(q) - start_rep;
7018     if (PRIV(ckd_smul)(&d, replen, i))
7019       {
7020       fprintf(outfile, "** Expanded content too large\n");
7021       return PR_OK;
7022       }
7023     needlen += d;
7024 
7025     if (needlen >= dbuffer_size)
7026       {
7027       size_t qoffset = CAST8VAR(q) - dbuffer;
7028       size_t rep_offset = start_rep - dbuffer;
7029       while (needlen >= dbuffer_size)
7030         {
7031         if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2;
7032           else dbuffer_size = needlen + 1;
7033         }
7034       dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
7035       if (dbuffer == NULL)
7036         {
7037         fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n",
7038           dbuffer_size);
7039         exit(1);
7040         }
7041       SETCASTPTR(q, dbuffer + qoffset);
7042       start_rep = dbuffer + rep_offset;
7043       }
7044 
7045     while (i-- > 0)
7046       {
7047       memcpy(CAST8VAR(q), start_rep, replen);
7048       SETPLUS(q, replen/code_unit_size);
7049       }
7050 
7051     start_rep = NULL;
7052     continue;
7053     }
7054 
7055   /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
7056   set, do the fudge for setting the top bit. */
7057 
7058   if (c != '\\' || subject_literal)
7059     {
7060     uint32_t topbit = 0;
7061     if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
7062       {
7063       topbit = 0x80000000;
7064       c = *p++;
7065       }
7066     if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
7067       HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
7068     c |= topbit;
7069     }
7070 
7071   /* Handle backslash escapes */
7072 
7073   else switch ((c = *p++))
7074     {
7075     case '\\': break;
7076     case 'a': c = CHAR_BEL; break;
7077     case 'b': c = '\b'; break;
7078     case 'e': c = CHAR_ESC; break;
7079     case 'f': c = '\f'; break;
7080     case 'n': c = '\n'; break;
7081     case 'r': c = '\r'; break;
7082     case 't': c = '\t'; break;
7083     case 'v': c = '\v'; break;
7084 
7085     case '0': case '1': case '2': case '3':
7086     case '4': case '5': case '6': case '7':
7087     c -= '0';
7088     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
7089       c = c * 8 + *p++ - '0';
7090     break;
7091 
7092     case 'o':
7093     if (*p == '{')
7094       {
7095       uint8_t *pt = p;
7096       c = 0;
7097       for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
7098         {
7099         if (++i == 12)
7100           fprintf(outfile, "** Too many octal digits in \\o{...} item; "
7101                            "using only the first twelve.\n");
7102         else c = c * 8 + *pt - '0';
7103         }
7104       if (*pt == '}') p = pt + 1;
7105         else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
7106       }
7107     break;
7108 
7109     case 'x':
7110     if (*p == '{')
7111       {
7112       uint8_t *pt = p;
7113       c = 0;
7114 
7115       /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
7116       when isxdigit() is a macro that refers to its argument more than
7117       once. This is banned by the C Standard, but apparently happens in at
7118       least one MacOS environment. */
7119 
7120       for (pt++; isxdigit(*pt); pt++)
7121         {
7122         if (++i == 9)
7123           fprintf(outfile, "** Too many hex digits in \\x{...} item; "
7124                            "using only the first eight.\n");
7125         else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
7126         }
7127       if (*pt == '}')
7128         {
7129         p = pt + 1;
7130         break;
7131         }
7132       /* Not correct form for \x{...}; fall through */
7133       }
7134 
7135     /* \x without {} always defines just one byte in 8-bit mode. This
7136     allows UTF-8 characters to be constructed byte by byte, and also allows
7137     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
7138     Otherwise, pass it down as data. */
7139 
7140     c = 0;
7141     while (i++ < 2 && isxdigit(*p))
7142       {
7143       c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
7144       p++;
7145       }
7146 #if defined SUPPORT_PCRE2_8
7147     if (utf && (test_mode == PCRE8_MODE))
7148       {
7149       *q8++ = c;
7150       continue;
7151       }
7152 #endif
7153     break;
7154 
7155     case 0:     /* \ followed by EOF allows for an empty line */
7156     p--;
7157     continue;
7158 
7159     case '=':   /* \= terminates the data, starts modifiers */
7160     goto ENDSTRING;
7161 
7162     case '[':   /* \[ introduces a replicated character sequence */
7163     if (start_rep != NULL)
7164       {
7165       fprintf(outfile, "** Nested replication is not supported\n");
7166       return PR_OK;
7167       }
7168     start_rep = CAST8VAR(q);
7169     continue;
7170 
7171     default:
7172     if (isalnum(c))
7173       {
7174       fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
7175       return PR_OK;
7176       }
7177     }
7178 
7179   /* We now have a character value in c that may be greater than 255.
7180   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
7181   than 127 in UTF mode must have come from \x{...} or octal constructs
7182   because values from \x.. get this far only in non-UTF mode. */
7183 
7184 #ifdef SUPPORT_PCRE2_8
7185   if (test_mode == PCRE8_MODE)
7186     {
7187     if (utf)
7188       {
7189       if (c > 0x7fffffff)
7190         {
7191         fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
7192           "and so cannot be converted to UTF-8\n", c);
7193         return PR_OK;
7194         }
7195       q8 += ord2utf8(c, q8);
7196       }
7197     else
7198       {
7199       if (c > 0xffu)
7200         {
7201         fprintf(outfile, "** Character \\x{%x} is greater than 255 "
7202           "and UTF-8 mode is not enabled.\n", c);
7203         fprintf(outfile, "** Truncation will probably give the wrong "
7204           "result.\n");
7205         }
7206       *q8++ = (uint8_t)c;
7207       }
7208     }
7209 #endif
7210 #ifdef SUPPORT_PCRE2_16
7211   if (test_mode == PCRE16_MODE)
7212     {
7213     if (utf)
7214       {
7215       if (c > 0x10ffffu)
7216         {
7217         fprintf(outfile, "** Failed: character \\x{%x} is greater than "
7218           "0x10ffff and so cannot be converted to UTF-16\n", c);
7219         return PR_OK;
7220         }
7221       else if (c >= 0x10000u)
7222         {
7223         c-= 0x10000u;
7224         *q16++ = 0xD800 | (c >> 10);
7225         *q16++ = 0xDC00 | (c & 0x3ff);
7226         }
7227       else
7228         *q16++ = c;
7229       }
7230     else
7231       {
7232       if (c > 0xffffu)
7233         {
7234         fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
7235           "and UTF-16 mode is not enabled.\n", c);
7236         fprintf(outfile, "** Truncation will probably give the wrong "
7237           "result.\n");
7238         }
7239 
7240       *q16++ = (uint16_t)c;
7241       }
7242     }
7243 #endif
7244 #ifdef SUPPORT_PCRE2_32
7245   if (test_mode == PCRE32_MODE)
7246     {
7247     *q32++ = c;
7248     }
7249 #endif
7250   }
7251 
7252 ENDSTRING:
7253 SET(*q, 0);
7254 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
7255 ulen = len/code_unit_size;                /* Length in code units */
7256 arg_ulen = ulen;                          /* Value to use in match arg */
7257 
7258 /* If the string was terminated by \= we must now interpret modifiers. */
7259 
7260 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7261   return PR_OK;
7262 
7263 /* Setting substitute_{skip,fail} implies a substitute callout. */
7264 
7265 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7266   dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7267 
7268 /* Check for mutually exclusive modifiers. At present, these are all in the
7269 first control word. */
7270 
7271 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7272   {
7273   c = dat_datctl.control & exclusive_dat_controls[k];
7274   if (c != 0 && c != (c & (~c+1)))
7275     {
7276     show_controls(c, 0, "** Not allowed together:");
7277     fprintf(outfile, "\n");
7278     return PR_OK;
7279     }
7280   }
7281 
7282 if (pat_patctl.replacement[0] != 0)
7283   {
7284   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7285       (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7286     {
7287     fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7288     return PR_OK;
7289     }
7290 
7291   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7292     fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7293   }
7294 
7295 /* Warn for modifiers that are ignored for DFA. */
7296 
7297 if ((dat_datctl.control & CTL_DFA) != 0)
7298   {
7299   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7300     fprintf(outfile, "** Ignored for DFA matching: allcaptures\n");
7301   if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0)
7302     fprintf(outfile, "** Ignored for DFA matching: heapframes_size\n");
7303   }
7304 
7305 /* We now have the subject in dbuffer, with len containing the byte length, and
7306 ulen containing the code unit length, with a copy in arg_ulen for use in match
7307 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7308 zero_terminate modifier is present).
7309 
7310 Move the data to the end of the buffer so that a read over the end can be
7311 caught by valgrind or other means. If we have explicit valgrind support, mark
7312 the unused start of the buffer unaddressable. If we are using the POSIX
7313 interface, or testing zero-termination, we must include the terminating zero in
7314 the usable data. */
7315 
7316 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7317                        (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7318 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7319 #ifdef SUPPORT_VALGRIND
7320   VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7321 #endif
7322 
7323 /* Now pp points to the subject string, but if null_subject was specified, set
7324 it to NULL to test PCRE2's behaviour. */
7325 
7326 if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL;
7327 
7328 /* POSIX matching is only possible in 8-bit mode, and it does not support
7329 timing or other fancy features. Some were checked at compile time, but we need
7330 to check the match-time settings here. */
7331 
7332 #ifdef SUPPORT_PCRE2_8
7333 if ((pat_patctl.control & CTL_POSIX) != 0)
7334   {
7335   int rc;
7336   int eflags = 0;
7337   regmatch_t *pmatch = NULL;
7338   const char *msg = "** Ignored with POSIX interface:";
7339 
7340   if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7341     prmsg(&msg, "callout_error");
7342   if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7343     prmsg(&msg, "callout_fail");
7344   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7345     prmsg(&msg, "copy");
7346   if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7347     prmsg(&msg, "get");
7348   if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7349   if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7350 
7351   if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7352     {
7353     fprintf(outfile, "%s", msg);
7354     show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7355     msg = "";
7356     }
7357 
7358   if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7359       (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7360     {
7361     show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7362                   dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7363     msg = "";
7364     }
7365 
7366   if (msg[0] == 0) fprintf(outfile, "\n");
7367 
7368   if (dat_datctl.oveccount > 0)
7369     {
7370     pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7371     if (pmatch == NULL)
7372       {
7373       fprintf(outfile, "** Failed to get memory for recording matching "
7374         "information (size set = %du)\n", dat_datctl.oveccount);
7375       return PR_OK;
7376       }
7377     }
7378 
7379   if (dat_datctl.startend[0] != CFORE_UNSET)
7380     {
7381     pmatch[0].rm_so = dat_datctl.startend[0];
7382     pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7383       dat_datctl.startend[1] : len;
7384     eflags |= REG_STARTEND;
7385     }
7386 
7387   if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7388   if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7389   if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7390 
7391   rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7392   if (rc != 0)
7393     {
7394     (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7395     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7396     }
7397   else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7398     fprintf(outfile, "Matched with REG_NOSUB\n");
7399   else if (dat_datctl.oveccount == 0)
7400     fprintf(outfile, "Matched without capture\n");
7401   else
7402     {
7403     size_t i, j;
7404     size_t last_printed = (size_t)dat_datctl.oveccount;
7405     for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7406       {
7407       if (pmatch[i].rm_so >= 0)
7408         {
7409         PCRE2_SIZE start = pmatch[i].rm_so;
7410         PCRE2_SIZE end = pmatch[i].rm_eo;
7411         for (j = last_printed + 1; j < i; j++)
7412           fprintf(outfile, "%2d: <unset>\n", (int)j);
7413         last_printed = i;
7414         if (start > end)
7415           {
7416           start = pmatch[i].rm_eo;
7417           end = pmatch[i].rm_so;
7418           fprintf(outfile, "Start of matched string is beyond its end - "
7419             "displaying from end to start.\n");
7420           }
7421         fprintf(outfile, "%2d: ", (int)i);
7422         PCHARSV(pp, start, end - start, utf, outfile);
7423         fprintf(outfile, "\n");
7424 
7425         if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7426             (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7427           {
7428           fprintf(outfile, "%2d+ ", (int)i);
7429           /* Note: don't use the start/end variables here because we want to
7430           show the text from what is reported as the end. */
7431           PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7432           fprintf(outfile, "\n"); }
7433         }
7434       }
7435     }
7436   free(pmatch);
7437   return PR_OK;
7438   }
7439 #endif  /* SUPPORT_PCRE2_8 */
7440 
7441  /* Handle matching via the native interface. Check for consistency of
7442 modifiers. */
7443 
7444 if (dat_datctl.startend[0] != CFORE_UNSET)
7445   fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7446 
7447 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7448 matching, even if the JIT compiler was used. */
7449 
7450 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7451     FLD(compiled_code, executable_jit) != NULL)
7452   {
7453   fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7454   dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7455   }
7456 
7457 /* Handle passing the subject as zero-terminated. */
7458 
7459 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7460   arg_ulen = PCRE2_ZERO_TERMINATED;
7461 
7462 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7463 NULL context. */
7464 
7465 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7466   NULL : PTR(dat_context);
7467 
7468 /* Enable display of malloc/free if wanted. We can do this only if either the
7469 pattern or the subject is processed with a context. */
7470 
7471 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7472 
7473 if (show_memory &&
7474     (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7475   fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7476     "context: ignored\n");
7477 
7478 /* Create and assign a JIT stack if requested. */
7479 
7480 if (dat_datctl.jitstack != 0)
7481   {
7482   if (dat_datctl.jitstack != jit_stack_size)
7483     {
7484     PCRE2_JIT_STACK_FREE(jit_stack);
7485     PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7486     jit_stack_size = dat_datctl.jitstack;
7487     }
7488   PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7489   }
7490 
7491 /* Or de-assign */
7492 
7493 else if (jit_stack != NULL)
7494   {
7495   PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7496   PCRE2_JIT_STACK_FREE(jit_stack);
7497   jit_stack = NULL;
7498   jit_stack_size = 0;
7499   }
7500 
7501 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7502 if we want to verify that JIT was actually used. */
7503 
7504 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7505    {
7506    PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7507    }
7508 
7509 /* Adjust match_data according to size of offsets required. A size of zero
7510 causes a new match data block to be obtained that exactly fits the pattern. */
7511 
7512 if (dat_datctl.oveccount == 0)
7513   {
7514   PCRE2_MATCH_DATA_FREE(match_data);
7515   PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code,
7516     general_context);
7517   PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7518   }
7519 else if (dat_datctl.oveccount <= max_oveccount)
7520   {
7521   SETFLD(match_data, oveccount, dat_datctl.oveccount);
7522   }
7523 else
7524   {
7525   max_oveccount = dat_datctl.oveccount;
7526   PCRE2_MATCH_DATA_FREE(match_data);
7527   PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, general_context);
7528   }
7529 
7530 if (CASTVAR(void *, match_data) == NULL)
7531   {
7532   fprintf(outfile, "** Failed to get memory for recording matching "
7533     "information (size requested: %d)\n", dat_datctl.oveccount);
7534   max_oveccount = 0;
7535   return PR_OK;
7536   }
7537 
7538 ovector = FLD(match_data, ovector);
7539 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7540 
7541 /* Replacement processing is ignored for DFA matching. */
7542 
7543 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7544   {
7545   fprintf(outfile, "** Ignored for DFA matching: replace\n");
7546   dat_datctl.replacement[0] = 0;
7547   }
7548 
7549 /* If a replacement string is provided, call pcre2_substitute() instead of or
7550 after one of the matching functions. First we have to convert the replacement
7551 string to the appropriate width. */
7552 
7553 if (dat_datctl.replacement[0] != 0)
7554   {
7555   int rc;
7556   uint8_t *pr;
7557   uint8_t rbuffer[REPLACE_BUFFSIZE];
7558   uint8_t nbuffer[REPLACE_BUFFSIZE];
7559   uint8_t *rbptr;
7560   uint32_t xoptions;
7561   uint32_t emoption;  /* External match option */
7562   PCRE2_SIZE j, rlen, nsize, erroroffset;
7563   BOOL badutf = FALSE;
7564 
7565 #ifdef SUPPORT_PCRE2_8
7566   uint8_t *r8 = NULL;
7567 #endif
7568 #ifdef SUPPORT_PCRE2_16
7569   uint16_t *r16 = NULL;
7570 #endif
7571 #ifdef SUPPORT_PCRE2_32
7572   uint32_t *r32 = NULL;
7573 #endif
7574 
7575   /* Fill the ovector with junk to detect elements that do not get set
7576   when they should be (relevant only when "allvector" is specified). */
7577 
7578   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7579 
7580   if (timeitm)
7581     fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7582 
7583   if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7584     fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7585 
7586   /* Check for a test that does substitution after an initial external match.
7587   If this is set, we run the external match, but leave the interpretation of
7588   its output to pcre2_substitute(). */
7589 
7590   emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7591     PCRE2_SUBSTITUTE_MATCHED;
7592 
7593   if (emoption != 0)
7594     {
7595     if ((pat_patctl.control & CTL_JITFAST) != 0)
7596       {
7597       PCRE2_JIT_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7598         dat_datctl.options, match_data, use_dat_context);
7599       }
7600     else
7601       {
7602       PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7603         dat_datctl.options, match_data, use_dat_context);
7604       }
7605     }
7606 
7607   xoptions = emoption |
7608              (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7609                 PCRE2_SUBSTITUTE_GLOBAL) |
7610              (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7611                 PCRE2_SUBSTITUTE_EXTENDED) |
7612              (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7613                 PCRE2_SUBSTITUTE_LITERAL) |
7614              (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7615                 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7616              (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7617                 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7618              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7619                 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7620              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7621                 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7622 
7623   SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
7624   pr = dat_datctl.replacement;
7625 
7626   /* If the replacement starts with '[<number>]' we interpret that as length
7627   value for the replacement buffer. */
7628 
7629   nsize = REPLACE_BUFFSIZE/code_unit_size;
7630   if (*pr == '[')
7631     {
7632     PCRE2_SIZE n = 0;
7633     while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7634     if (*pr++ != ']')
7635       {
7636       fprintf(outfile, "Bad buffer size in replacement string\n");
7637       return PR_OK;
7638       }
7639     if (n > nsize)
7640       {
7641       fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7642         "large (max %" SIZ_FORM ")\n", n, nsize);
7643       return PR_OK;
7644       }
7645     nsize = n;
7646     }
7647 
7648   /* Now copy the replacement string to a buffer of the appropriate width. No
7649   escape processing is done for replacements. In UTF mode, check for an invalid
7650   UTF-8 input string, and if it is invalid, just copy its code units without
7651   UTF interpretation. This provides a means of checking that an invalid string
7652   is detected. Otherwise, UTF-8 can be used to include wide characters in a
7653   replacement. */
7654 
7655   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7656 
7657   /* Not UTF or invalid UTF-8: just copy the code units. */
7658 
7659   if (!utf || badutf)
7660     {
7661     while ((c = *pr++) != 0)
7662       {
7663 #ifdef SUPPORT_PCRE2_8
7664       if (test_mode == PCRE8_MODE) *r8++ = c;
7665 #endif
7666 #ifdef SUPPORT_PCRE2_16
7667       if (test_mode == PCRE16_MODE) *r16++ = c;
7668 #endif
7669 #ifdef SUPPORT_PCRE2_32
7670       if (test_mode == PCRE32_MODE) *r32++ = c;
7671 #endif
7672       }
7673     }
7674 
7675   /* Valid UTF-8 replacement string */
7676 
7677   else while ((c = *pr++) != 0)
7678     {
7679     if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7680 
7681 #ifdef SUPPORT_PCRE2_8
7682     if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7683 #endif
7684 
7685 #ifdef SUPPORT_PCRE2_16
7686     if (test_mode == PCRE16_MODE)
7687       {
7688       if (c >= 0x10000u)
7689         {
7690         c-= 0x10000u;
7691         *r16++ = 0xD800 | (c >> 10);
7692         *r16++ = 0xDC00 | (c & 0x3ff);
7693         }
7694       else *r16++ = c;
7695       }
7696 #endif
7697 
7698 #ifdef SUPPORT_PCRE2_32
7699     if (test_mode == PCRE32_MODE) *r32++ = c;
7700 #endif
7701     }
7702 
7703   SET(*r, 0);
7704   if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7705     rlen = PCRE2_ZERO_TERMINATED;
7706   else
7707     rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7708 
7709   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7710     {
7711     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7712     }
7713   else
7714     {
7715     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7716     }
7717 
7718   /* There is a special option to set the replacement to NULL in order to test
7719   that case. */
7720 
7721   rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
7722 
7723   PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7724     dat_datctl.options|xoptions, match_data, use_dat_context,
7725     rbptr, rlen, nbuffer, &nsize);
7726 
7727   if (rc < 0)
7728     {
7729     fprintf(outfile, "Failed: error %d", rc);
7730     if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7731       fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7732     fprintf(outfile, ": ");
7733     if (!print_error_message(rc, "", "")) return PR_ABEND;
7734     if (rc == PCRE2_ERROR_NOMEMORY &&
7735         (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7736       fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7737     }
7738   else
7739     {
7740     fprintf(outfile, "%2d: ", rc);
7741     PCHARSV(nbuffer, 0, nsize, utf, outfile);
7742     }
7743 
7744   fprintf(outfile, "\n");
7745   show_memory = FALSE;
7746 
7747   /* Show final ovector contents and resulting heapframe size if requested. */
7748 
7749   if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7750     show_ovector(ovector, oveccount);
7751 
7752   if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 &&
7753       (dat_datctl.control & CTL_DFA) == 0)
7754     show_heapframes_size();
7755 
7756   return PR_OK;
7757   }   /* End of substitution handling */
7758 
7759 /* When a replacement string is not provided, run a loop for global matching
7760 with one of the basic matching functions. For altglobal (or first time round
7761 the loop), set an "unset" value for the previous match info. */
7762 
7763 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7764 
7765 for (gmatched = 0;; gmatched++)
7766   {
7767   PCRE2_SIZE j;
7768   int capcount;
7769 
7770   /* Fill the ovector with junk to detect elements that do not get set
7771   when they should be. */
7772 
7773   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7774 
7775   /* When matching is via pcre2_match(), we will detect the use of JIT via the
7776   stack callback function. */
7777 
7778   jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7779 
7780   /* Do timing if required. */
7781 
7782   if (timeitm > 0)
7783     {
7784     int i;
7785     clock_t start_time, time_taken;
7786 
7787     if ((dat_datctl.control & CTL_DFA) != 0)
7788       {
7789       if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7790         {
7791         fprintf(outfile, "Timing DFA restarts is not supported\n");
7792         return PR_OK;
7793         }
7794       if (dfa_workspace == NULL)
7795         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7796       start_time = clock();
7797       for (i = 0; i < timeitm; i++)
7798         {
7799         PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7800           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7801           use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7802         }
7803       }
7804 
7805     else if ((pat_patctl.control & CTL_JITFAST) != 0)
7806       {
7807       start_time = clock();
7808       for (i = 0; i < timeitm; i++)
7809         {
7810         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7811           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7812           use_dat_context);
7813         }
7814       }
7815 
7816     else
7817       {
7818       start_time = clock();
7819       for (i = 0; i < timeitm; i++)
7820         {
7821         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7822           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7823           use_dat_context);
7824         }
7825       }
7826     total_match_time += (time_taken = clock() - start_time);
7827     fprintf(outfile, "Match time %7.4f microseconds\n",
7828       ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeitm);
7829     }
7830 
7831   /* Find the heap, match and depth limits if requested. The depth and heap
7832   limits are not relevant for JIT. The return from check_match_limit() is the
7833   return from the final call to pcre2_match() or pcre2_dfa_match(). */
7834 
7835   if ((dat_datctl.control & (CTL_FINDLIMITS|CTL_FINDLIMITS_NOHEAP)) != 0)
7836     {
7837     capcount = 0;  /* This stops compiler warnings */
7838 
7839     if ((dat_datctl.control & CTL_FINDLIMITS_NOHEAP) == 0 &&
7840         (FLD(compiled_code, executable_jit) == NULL ||
7841           (dat_datctl.options & PCRE2_NO_JIT) != 0))
7842       {
7843       (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7844       }
7845 
7846     capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7847       "match");
7848 
7849     if (FLD(compiled_code, executable_jit) == NULL ||
7850         (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7851         (dat_datctl.control & CTL_DFA) != 0)
7852       {
7853       capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7854         "depth");
7855       }
7856 
7857     if (capcount == 0)
7858       {
7859       fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7860       capcount = dat_datctl.oveccount;
7861       }
7862     }
7863 
7864   /* Otherwise just run a single match, setting up a callout if required (the
7865   default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7866 
7867   else
7868     {
7869     if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7870       {
7871       PCRE2_SET_CALLOUT(dat_context, callout_function,
7872         (void *)(&dat_datctl.callout_data));
7873       first_callout = TRUE;
7874       last_callout_mark = NULL;
7875       callout_count = 0;
7876       }
7877     else
7878       {
7879       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7880       }
7881 
7882     /* Run a single DFA or NFA match. */
7883 
7884     if ((dat_datctl.control & CTL_DFA) != 0)
7885       {
7886       if (dfa_workspace == NULL)
7887         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7888       if (dfa_matched++ == 0)
7889         dfa_workspace[0] = -1;  /* To catch bad restart */
7890       PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7891         dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7892         use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7893       if (capcount == 0)
7894         {
7895         fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7896         capcount = dat_datctl.oveccount;
7897         }
7898       }
7899     else
7900       {
7901       if ((pat_patctl.control & CTL_JITFAST) != 0)
7902         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7903           dat_datctl.options | g_notempty, match_data, use_dat_context);
7904       else
7905         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7906           dat_datctl.options | g_notempty, match_data, use_dat_context);
7907       if (capcount == 0)
7908         {
7909         fprintf(outfile, "Matched, but too many substrings\n");
7910         capcount = dat_datctl.oveccount;
7911         }
7912       }
7913     }
7914 
7915   /* The result of the match is now in capcount. First handle a successful
7916   match. If pp was forced to be NULL (to test NULL handling) it will have been
7917   treated as an empty string if the length was zero. So re-create that for
7918   outputting. */
7919 
7920   if (capcount >= 0)
7921     {
7922     int i;
7923 
7924     if (pp == NULL) pp = (uint8_t *)"";
7925 
7926     if (capcount > (int)oveccount)   /* Check for lunatic return value */
7927       {
7928       fprintf(outfile,
7929         "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7930         capcount, oveccount);
7931       capcount = oveccount;
7932       if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7933         {
7934         fprintf(outfile, "** Global loop abandoned\n");
7935         dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7936         }
7937       }
7938 
7939     /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7940     should be, but not for fast JIT, where it isn't supported. */
7941 
7942     if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7943         (pat_patctl.control & CTL_JITFAST) == 0)
7944       {
7945       if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7946         fprintf(outfile,
7947           "** PCRE2 error: flag not set after copy_matched_subject\n");
7948 
7949       if (CASTFLD(void *, match_data, subject) == pp)
7950         fprintf(outfile,
7951           "** PCRE2 error: copy_matched_subject has not copied\n");
7952 
7953       if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7954         fprintf(outfile,
7955           "** PCRE2 error: copy_matched_subject mismatch\n");
7956       }
7957 
7958     /* If this is not the first time round a global loop, check that the
7959     returned string has changed. If it has not, check for an empty string match
7960     at different starting offset from the previous match. This is a failed test
7961     retry for null-matching patterns that don't match at their starting offset,
7962     for example /(?<=\G.)/. A repeated match at the same point is not such a
7963     pattern, and must be discarded, and we then proceed to seek a non-null
7964     match at the current point. For any other repeated match, there is a bug
7965     somewhere and we must break the loop because it will go on for ever. We
7966     know that there are always at least two elements in the ovector. */
7967 
7968     if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7969       {
7970       if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7971         {
7972         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7973         ovecsave[2] = dat_datctl.offset;
7974         continue;    /* Back to the top of the loop */
7975         }
7976       fprintf(outfile,
7977         "** PCRE2 error: global repeat returned the same string as previous\n");
7978       fprintf(outfile, "** Global loop abandoned\n");
7979       dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7980       }
7981 
7982     /* "allcaptures" requests showing of all captures in the pattern, to check
7983     unset ones at the end. It may be set on the pattern or the data. Implement
7984     by setting capcount to the maximum. This is not relevant for DFA matching,
7985     so ignore it (warning given above). */
7986 
7987     if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7988       {
7989       capcount = maxcapcount + 1;   /* Allow for full match */
7990       if (capcount > (int)oveccount) capcount = oveccount;
7991       }
7992 
7993     /* "allvector" request showing the entire ovector. */
7994 
7995     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7996 
7997     /* Output the captured substrings. Note that, for the matched string,
7998     the use of \K in an assertion can make the start later than the end. */
7999 
8000     for (i = 0; i < 2*capcount; i += 2)
8001       {
8002       PCRE2_SIZE lleft, lmiddle, lright;
8003       PCRE2_SIZE start = ovector[i];
8004       PCRE2_SIZE end = ovector[i+1];
8005 
8006       if (start > end)
8007         {
8008         start = ovector[i+1];
8009         end = ovector[i];
8010         fprintf(outfile, "Start of matched string is beyond its end - "
8011           "displaying from end to start.\n");
8012         }
8013 
8014       fprintf(outfile, "%2d: ", i/2);
8015 
8016       /* Check for an unset group */
8017 
8018       if (start == PCRE2_UNSET && end == PCRE2_UNSET)
8019         {
8020         fprintf(outfile, "<unset>\n");
8021         continue;
8022         }
8023 
8024       /* Check for silly offsets, in particular, values that have not been
8025       set when they should have been. However, if we are past the end of the
8026       captures for this pattern ("allvector" causes this), or if we are DFA
8027       matching, it isn't an error if the entry is unchanged. */
8028 
8029       if (start > ulen || end > ulen)
8030         {
8031         if (((dat_datctl.control & CTL_DFA) != 0 ||
8032               i >= (int)(2*maxcapcount + 2)) &&
8033             start == JUNK_OFFSET && end == JUNK_OFFSET)
8034           fprintf(outfile, "<unchanged>\n");
8035         else
8036           fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
8037             (unsigned long int)start, (unsigned long int)end);
8038         continue;
8039         }
8040 
8041       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
8042       JIT, it is disabled above, with a comment.) When the match is done by the
8043       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
8044       set, and if the leftmost consulted character is before the start of the
8045       match or the rightmost consulted character is past the end of the match,
8046       we want to show all consulted characters for the main matched string, and
8047       indicate which were lookarounds. */
8048 
8049       if (i == 0)
8050         {
8051         BOOL showallused;
8052         PCRE2_SIZE leftchar, rightchar;
8053 
8054         if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
8055           {
8056           leftchar = FLD(match_data, leftchar);
8057           rightchar = FLD(match_data, rightchar);
8058           showallused = i == 0 && (leftchar < start || rightchar > end);
8059           }
8060         else showallused = FALSE;
8061 
8062         if (showallused)
8063           {
8064           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
8065           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
8066           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
8067           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8068             fprintf(outfile, " (JIT)");
8069           fprintf(outfile, "\n    ");
8070           for (j = 0; j < lleft; j++) fprintf(outfile, "<");
8071           for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
8072           for (j = 0; j < lright; j++) fprintf(outfile, ">");
8073           }
8074 
8075         /* When a pattern contains \K, the start of match position may be
8076         different to the start of the matched string. When this is the case,
8077         show it when requested. */
8078 
8079         else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
8080           {
8081           PCRE2_SIZE startchar;
8082           PCRE2_GET_STARTCHAR(startchar, match_data);
8083           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
8084           PCHARSV(pp, start, end - start, utf, outfile);
8085           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8086             fprintf(outfile, " (JIT)");
8087           if (startchar != start)
8088             {
8089             fprintf(outfile, "\n    ");
8090             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
8091             }
8092           }
8093 
8094         /* Otherwise, just show the matched string. */
8095 
8096         else
8097           {
8098           PCHARSV(pp, start, end - start, utf, outfile);
8099           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8100             fprintf(outfile, " (JIT)");
8101           }
8102         }
8103 
8104       /* Not the main matched string. Just show it unadorned. */
8105 
8106       else
8107         {
8108         PCHARSV(pp, start, end - start, utf, outfile);
8109         }
8110 
8111       fprintf(outfile, "\n");
8112 
8113       /* Note: don't use the start/end variables here because we want to
8114       show the text from what is reported as the end. */
8115 
8116       if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
8117           (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
8118         {
8119         fprintf(outfile, "%2d+ ", i/2);
8120         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
8121         fprintf(outfile, "\n");
8122         }
8123       }
8124 
8125     /* Output (*MARK) data if requested */
8126 
8127     if ((dat_datctl.control & CTL_MARK) != 0 &&
8128          TESTFLD(match_data, mark, !=, NULL))
8129       {
8130       fprintf(outfile, "MK: ");
8131       PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
8132       fprintf(outfile, "\n");
8133       }
8134 
8135     /* Process copy/get strings */
8136 
8137     if (!copy_and_get(utf, capcount)) return PR_ABEND;
8138 
8139     }    /* End of handling a successful match */
8140 
8141   /* There was a partial match. The value of ovector[0] is the bumpalong point,
8142   that is, startchar, not any \K point that might have been passed. When JIT is
8143   not in use, "allusedtext" may be set, in which case we indicate the leftmost
8144   consulted character. */
8145 
8146   else if (capcount == PCRE2_ERROR_PARTIAL)
8147     {
8148     PCRE2_SIZE leftchar;
8149     int backlength;
8150     int rubriclength = 0;
8151 
8152     if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
8153       {
8154       leftchar = FLD(match_data, leftchar);
8155       }
8156     else leftchar = ovector[0];
8157 
8158     fprintf(outfile, "Partial match");
8159     if ((dat_datctl.control & CTL_MARK) != 0 &&
8160          TESTFLD(match_data, mark, !=, NULL))
8161       {
8162       fprintf(outfile, ", mark=");
8163       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
8164         outfile);
8165       rubriclength += 7;
8166       }
8167     fprintf(outfile, ": ");
8168     rubriclength += 15;
8169 
8170     PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
8171     PCHARSV(pp, ovector[0], ovector[1] - ovector[0], utf, outfile);
8172 
8173     if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8174       fprintf(outfile, " (JIT)");
8175     fprintf(outfile, "\n");
8176 
8177     if (backlength != 0)
8178       {
8179       int i;
8180       for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
8181       for (i = 0; i < backlength; i++) fprintf(outfile, "<");
8182       fprintf(outfile, "\n");
8183       }
8184 
8185     if (ulen != ovector[1])
8186       fprintf(outfile, "** ovector[1] is not equal to the subject length: "
8187         "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
8188 
8189     /* Process copy/get strings */
8190 
8191     if (!copy_and_get(utf, 1)) return PR_ABEND;
8192 
8193     /* "allvector" outputs the entire vector */
8194 
8195     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8196       show_ovector(ovector, oveccount);
8197 
8198     break;  /* Out of the /g loop */
8199     }       /* End of handling partial match */
8200 
8201   /* Failed to match. If this is a /g or /G loop, we might previously have
8202   set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
8203   If that is the case, this is not necessarily the end. We want to advance the
8204   start offset, and continue. We won't be at the end of the string - that was
8205   checked before setting g_notempty. We achieve the effect by pretending that a
8206   single character was matched.
8207 
8208   Complication arises in the case when the newline convention is "any", "crlf",
8209   or "anycrlf". If the previous match was at the end of a line terminated by
8210   CRLF, an advance of one character just passes the CR, whereas we should
8211   prefer the longer newline sequence, as does the code in pcre2_match().
8212 
8213   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
8214   character, not one byte. */
8215 
8216   else if (g_notempty != 0)   /* There was a previous null match */
8217     {
8218     uint16_t nl = FLD(compiled_code, newline_convention);
8219     PCRE2_SIZE start_offset = dat_datctl.offset;    /* Where the match was */
8220     PCRE2_SIZE end_offset = start_offset + 1;
8221 
8222     if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
8223          nl == PCRE2_NEWLINE_ANYCRLF) &&
8224         start_offset < ulen - 1 &&
8225         CODE_UNIT(pp, start_offset) == '\r' &&
8226         CODE_UNIT(pp, end_offset) == '\n')
8227       end_offset++;
8228 
8229     else if (utf && test_mode != PCRE32_MODE)
8230       {
8231       if (test_mode == PCRE8_MODE)
8232         {
8233         for (; end_offset < ulen; end_offset++)
8234           if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8235         }
8236       else  /* 16-bit mode */
8237         {
8238         for (; end_offset < ulen; end_offset++)
8239           if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8240         }
8241       }
8242 
8243     SETFLDVEC(match_data, ovector, 0, start_offset);
8244     SETFLDVEC(match_data, ovector, 1, end_offset);
8245     }  /* End of handling null match in a global loop */
8246 
8247   /* A "normal" match failure. There will be a negative error number in
8248   capcount. */
8249 
8250   else
8251     {
8252     switch(capcount)
8253       {
8254       case PCRE2_ERROR_NOMATCH:
8255       if (gmatched == 0)
8256         {
8257         fprintf(outfile, "No match");
8258         if ((dat_datctl.control & CTL_MARK) != 0 &&
8259              TESTFLD(match_data, mark, !=, NULL))
8260           {
8261           fprintf(outfile, ", mark = ");
8262           PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
8263           }
8264         if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8265           fprintf(outfile, " (JIT)");
8266         fprintf(outfile, "\n");
8267 
8268         /* "allvector" outputs the entire vector */
8269 
8270         if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8271           show_ovector(ovector, oveccount);
8272         }
8273       break;
8274 
8275       case PCRE2_ERROR_BADUTFOFFSET:
8276       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
8277       break;
8278 
8279       default:
8280       fprintf(outfile, "Failed: error %d: ", capcount);
8281       if (!print_error_message(capcount, "", "")) return PR_ABEND;
8282       if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
8283           capcount >= PCRE2_ERROR_UTF32_ERR2)
8284         {
8285         PCRE2_SIZE startchar;
8286         PCRE2_GET_STARTCHAR(startchar, match_data);
8287         fprintf(outfile, " at offset %" SIZ_FORM, startchar);
8288         }
8289       fprintf(outfile, "\n");
8290       break;
8291       }
8292 
8293     break;  /* Out of the /g loop */
8294     }       /* End of failed match handling */
8295 
8296   /* Control reaches here in two circumstances: (a) after a match, and (b)
8297   after a non-match that immediately followed a match on an empty string when
8298   doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8299   PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8300   of one character. So effectively we get here only after a match. If we
8301   are not doing a global search, we are done. */
8302 
8303   if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8304     {
8305     PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8306     PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8307 
8308     /* We must now set up for the next iteration of a global search. If we have
8309     matched an empty string, first check to see if we are at the end of the
8310     subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8311     does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8312     at the same point. If this fails it will be picked up above, where a fake
8313     match is set up so that at this point we advance to the next character.
8314 
8315     However, in order to cope with patterns that never match at their starting
8316     offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8317     than the starting offset. This means there will be a retry with the
8318     starting offset at the match offset. If this returns the same match again,
8319     it is picked up above and ignored, and the special action is then taken. */
8320 
8321     if (match_offset == end_offset)
8322       {
8323       if (end_offset == ulen) break;           /* End of subject */
8324       if (match_offset <= dat_datctl.offset)
8325         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8326       }
8327 
8328     /* However, even after matching a non-empty string, there is still one
8329     tricky case. If a pattern contains \K within a lookbehind assertion at the
8330     start, the end of the matched string can be at the offset where the match
8331     started. In the case of a normal /g iteration without special action, this
8332     leads to a loop that keeps on returning the same substring. The loop would
8333     be caught above, but we really want to move on to the next match. */
8334 
8335     else
8336       {
8337       g_notempty = 0;   /* Set for a "normal" repeat */
8338       if ((dat_datctl.control & CTL_GLOBAL) != 0)
8339         {
8340         PCRE2_SIZE startchar;
8341         PCRE2_GET_STARTCHAR(startchar, match_data);
8342         if (end_offset <= startchar)
8343           {
8344           if (startchar >= ulen) break;       /* End of subject */
8345           end_offset = startchar + 1;
8346           if (utf && test_mode != PCRE32_MODE)
8347             {
8348             if (test_mode == PCRE8_MODE)
8349               {
8350               for (; end_offset < ulen; end_offset++)
8351                 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8352               }
8353             else  /* 16-bit mode */
8354               {
8355               for (; end_offset < ulen; end_offset++)
8356                 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8357               }
8358             }
8359           }
8360         }
8361       }
8362 
8363     /* For a normal global (/g) iteration, save the current ovector[0,1] and
8364     the starting offset so that we can check that they do change each time.
8365     Otherwise a matching bug that returns the same string causes an infinite
8366     loop. It has happened! Then update the start offset, leaving other
8367     parameters alone. */
8368 
8369     if ((dat_datctl.control & CTL_GLOBAL) != 0)
8370       {
8371       ovecsave[0] = ovector[0];
8372       ovecsave[1] = ovector[1];
8373       ovecsave[2] = dat_datctl.offset;
8374       dat_datctl.offset = end_offset;
8375       }
8376 
8377     /* For altglobal, just update the pointer and length. */
8378 
8379     else
8380       {
8381       pp += end_offset * code_unit_size;
8382       len -= end_offset * code_unit_size;
8383       ulen -= end_offset;
8384       if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8385       }
8386     }
8387   }  /* End of global loop */
8388 
8389 /* All matching is done; show the resulting heapframe size if requested. */
8390 
8391 if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 &&
8392     (dat_datctl.control & CTL_DFA) == 0)
8393   show_heapframes_size();
8394 
8395 show_memory = FALSE;
8396 return PR_OK;
8397 }
8398 
8399 
8400 
8401 
8402 /*************************************************
8403 *               Print PCRE2 version              *
8404 *************************************************/
8405 
8406 static void
print_version(FILE * f,BOOL include_mode)8407 print_version(FILE *f, BOOL include_mode)
8408 {
8409 char buf[16];
8410 VERSION_TYPE *vp;
8411 fprintf(f, "PCRE2 version ");
8412 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8413 if (include_mode)
8414   {
8415   sprintf(buf, "%d-bit", test_mode);
8416   fprintf(f, " (%s)", buf);
8417   }
8418 fprintf(f, "\n");
8419 }
8420 
8421 
8422 
8423 /*************************************************
8424 *               Print Unicode version            *
8425 *************************************************/
8426 
8427 static void
print_unicode_version(FILE * f)8428 print_unicode_version(FILE *f)
8429 {
8430 VERSION_TYPE *vp;
8431 fprintf(f, "Unicode version ");
8432 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8433 }
8434 
8435 
8436 
8437 /*************************************************
8438 *               Print JIT target                 *
8439 *************************************************/
8440 
8441 static void
print_jit_target(FILE * f)8442 print_jit_target(FILE *f)
8443 {
8444 VERSION_TYPE *vp;
8445 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8446 }
8447 
8448 
8449 
8450 /*************************************************
8451 *       Print newline configuration              *
8452 *************************************************/
8453 
8454 /* Output is always to stdout.
8455 
8456 Arguments:
8457   rc         the return code from PCRE2_CONFIG_NEWLINE
8458   isc        TRUE if called from "-C newline"
8459 Returns:     nothing
8460 */
8461 
8462 static void
print_newline_config(uint32_t optval,BOOL isc)8463 print_newline_config(uint32_t optval, BOOL isc)
8464 {
8465 if (!isc) printf("  Default newline sequence is ");
8466 if (optval < sizeof(newlines)/sizeof(char *))
8467   printf("%s\n", newlines[optval]);
8468 else
8469   printf("a non-standard value: %d\n", optval);
8470 }
8471 
8472 
8473 
8474 /*************************************************
8475 *             Usage function                     *
8476 *************************************************/
8477 
8478 static void
usage(void)8479 usage(void)
8480 {
8481 printf("Usage:     pcre2test [options] [<input file> [<output file>]]\n\n");
8482 printf("Input and output default to stdin and stdout.\n");
8483 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8484 printf("If input is a terminal, readline() is used to read from it.\n");
8485 #else
8486 printf("This version of pcre2test is not linked with readline().\n");
8487 #endif
8488 printf("\nOptions:\n");
8489 #ifdef SUPPORT_PCRE2_8
8490 printf("  -8            use the 8-bit library\n");
8491 #endif
8492 #ifdef SUPPORT_PCRE2_16
8493 printf("  -16           use the 16-bit library\n");
8494 #endif
8495 #ifdef SUPPORT_PCRE2_32
8496 printf("  -32           use the 32-bit library\n");
8497 #endif
8498 printf("  -ac           set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8499 printf("  -AC           as -ac, but also set subject 'callout_extra' modifier\n");
8500 printf("  -b            set default pattern modifier 'fullbincode'\n");
8501 printf("  -C            show PCRE2 compile-time options and exit\n");
8502 printf("  -C arg        show a specific compile-time option and exit with its\n");
8503 printf("                  value if numeric (else 0). The arg can be:\n");
8504 printf("     backslash-C    use of \\C is enabled [0, 1]\n");
8505 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
8506 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
8507 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
8508 printf("     jit            just-in-time compiler supported [0, 1]\n");
8509 printf("     linksize       internal link size [2, 3, 4]\n");
8510 printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8511 printf("     pcre2-8        8 bit library support enabled [0, 1]\n");
8512 printf("     pcre2-16       16 bit library support enabled [0, 1]\n");
8513 printf("     pcre2-32       32 bit library support enabled [0, 1]\n");
8514 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
8515 printf("  -d            set default pattern modifier 'debug'\n");
8516 printf("  -dfa          set default subject modifier 'dfa'\n");
8517 printf("  -error <n,m,..>  show messages for error numbers, then exit\n");
8518 printf("  -help         show usage information\n");
8519 printf("  -i            set default pattern modifier 'info'\n");
8520 printf("  -jit          set default pattern modifier 'jit'\n");
8521 printf("  -jitfast      set default pattern modifier 'jitfast'\n");
8522 printf("  -jitverify    set default pattern modifier 'jitverify'\n");
8523 printf("  -LM           list pattern and subject modifiers, then exit\n");
8524 printf("  -LP           list non-script properties, then exit\n");
8525 printf("  -LS           list supported scripts, then exit\n");
8526 printf("  -q            quiet: do not output PCRE2 version number at start\n");
8527 printf("  -pattern <s>  set default pattern modifier fields\n");
8528 printf("  -subject <s>  set default subject modifier fields\n");
8529 printf("  -S <n>        set stack size to <n> mebibytes\n");
8530 printf("  -t [<n>]      time compilation and execution, repeating <n> times\n");
8531 printf("  -tm [<n>]     time execution (matching) only, repeating <n> times\n");
8532 printf("  -T            same as -t, but show total times at the end\n");
8533 printf("  -TM           same as -tm, but show total time at the end\n");
8534 printf("  -v|--version  show PCRE2 version and exit\n");
8535 }
8536 
8537 
8538 
8539 /*************************************************
8540 *             Handle -C option                   *
8541 *************************************************/
8542 
8543 /* This option outputs configuration options and sets an appropriate return
8544 code when asked for a single option. The code is abstracted into a separate
8545 function because of its size. Use whichever pcre2_config() function is
8546 available.
8547 
8548 Argument:   an option name or NULL
8549 Returns:    the return code
8550 */
8551 
8552 static int
c_option(const char * arg)8553 c_option(const char *arg)
8554 {
8555 uint32_t optval;
8556 unsigned int i = COPTLISTCOUNT;
8557 int yield = 0;
8558 
8559 if (arg != NULL && arg[0] != CHAR_MINUS)
8560   {
8561   for (i = 0; i < COPTLISTCOUNT; i++)
8562     if (strcmp(arg, coptlist[i].name) == 0) break;
8563 
8564   if (i >= COPTLISTCOUNT)
8565     {
8566     fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8567     return 0;
8568     }
8569 
8570   switch (coptlist[i].type)
8571     {
8572     case CONF_BSR:
8573     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8574     printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8575     break;
8576 
8577     case CONF_FIX:
8578     yield = coptlist[i].value;
8579     printf("%d\n", yield);
8580     break;
8581 
8582     case CONF_FIZ:
8583     optval = coptlist[i].value;
8584     printf("%d\n", optval);
8585     break;
8586 
8587     case CONF_INT:
8588     (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8589     printf("%d\n", yield);
8590     break;
8591 
8592     case CONF_NL:
8593     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8594     print_newline_config(optval, TRUE);
8595     break;
8596     }
8597 
8598 /* For VMS, return the value by setting a symbol, for certain values only. This
8599 is contributed code which the PCRE2 developers have no means of testing. */
8600 
8601 #ifdef __VMS
8602 
8603 /* This is the original code provided by the first VMS contributor. */
8604 #ifdef NEVER
8605   if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8606     {
8607     char ucname[16];
8608     strcpy(ucname, coptlist[i].name);
8609     for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8610     vms_setsymbol(ucname, 0, optval);
8611     }
8612 #endif
8613 
8614 /* This is the new code, provided by a second VMS contributor. */
8615 
8616   if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8617     {
8618     char nam_buf[22], val_buf[4];
8619     $DESCRIPTOR(nam, nam_buf);
8620     $DESCRIPTOR(val, val_buf);
8621 
8622     strcpy(nam_buf, coptlist[i].name);
8623     nam.dsc$w_length = strlen(nam_buf);
8624     sprintf(val_buf, "%d", yield);
8625     val.dsc$w_length = strlen(val_buf);
8626     lib$set_symbol(&nam, &val);
8627     }
8628 #endif  /* __VMS */
8629 
8630   return yield;
8631   }
8632 
8633 /* No argument for -C: output all configuration information. */
8634 
8635 print_version(stdout, FALSE);
8636 printf("Compiled with\n");
8637 
8638 #ifdef EBCDIC
8639 printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8640 #if defined NATIVE_ZOS
8641 printf("  EBCDIC code page %s or similar\n", pcrz_cpversion());
8642 #endif
8643 #endif
8644 
8645 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8646 if (optval & 1) printf("  8-bit support\n");
8647 if (optval & 2) printf("  16-bit support\n");
8648 if (optval & 4) printf("  32-bit support\n");
8649 
8650 #ifdef SUPPORT_VALGRIND
8651 printf("  Valgrind support\n");
8652 #endif
8653 
8654 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8655 if (optval != 0)
8656   {
8657   printf("  UTF and UCP support (");
8658   print_unicode_version(stdout);
8659   printf(")\n");
8660   }
8661 else printf("  No Unicode support\n");
8662 
8663 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8664 if (optval != 0)
8665   {
8666   printf("  Just-in-time compiler support: ");
8667   print_jit_target(stdout);
8668   printf("\n");
8669   }
8670 else
8671   {
8672   printf("  No just-in-time compiler support\n");
8673   }
8674 
8675 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8676 print_newline_config(optval, FALSE);
8677 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8678 printf("  \\R matches %s\n",
8679   (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8680                                  "all Unicode newlines");
8681 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8682 printf("  \\C is %ssupported\n", optval? "not ":"");
8683 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8684 printf("  Internal link size = %d\n", optval);
8685 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8686 printf("  Parentheses nest limit = %d\n", optval);
8687 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8688 printf("  Default heap limit = %d kibibytes\n", optval);
8689 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8690 printf("  Default match limit = %d\n", optval);
8691 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8692 printf("  Default depth limit = %d\n", optval);
8693 
8694 #if defined SUPPORT_LIBREADLINE
8695 printf("  pcre2test has libreadline support\n");
8696 #elif defined SUPPORT_LIBEDIT
8697 printf("  pcre2test has libedit support\n");
8698 #else
8699 printf("  pcre2test has neither libreadline nor libedit support\n");
8700 #endif
8701 
8702 return 0;
8703 }
8704 
8705 
8706 /*************************************************
8707 *      Format one property/script list item      *
8708 *************************************************/
8709 
8710 #ifdef SUPPORT_UNICODE
8711 static void
format_list_item(int16_t * ff,char * buff,BOOL isscript)8712 format_list_item(int16_t *ff, char *buff, BOOL isscript)
8713 {
8714 int count;
8715 int maxi = 0;
8716 const char *maxs = "";
8717 size_t max = 0;
8718 
8719 for (count = 0; ff[count] >= 0; count++) {}
8720 
8721 /* Find the name to put first. For scripts, any 3-character name is chosen.
8722 For non-scripts, or if there is no 3-character name, take the longest. */
8723 
8724 for (int i = 0; ff[i] >= 0; i++)
8725   {
8726   const char *s = PRIV(utt_names) + ff[i];
8727   size_t len = strlen(s);
8728   if (isscript && len == 3)
8729     {
8730     maxi = i;
8731     max = len;
8732     maxs = s;
8733     break;
8734     }
8735   else if (len > max)
8736     {
8737     max = len;
8738     maxi = i;
8739     maxs = s;
8740     }
8741   }
8742 
8743 strcpy(buff, maxs);
8744 buff += max;
8745 
8746 if (count > 1)
8747   {
8748   const char *sep = " (";
8749   for (int i = 0; i < count; i++)
8750     {
8751     if (i == maxi) continue;
8752     buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
8753     sep = ", ";
8754     }
8755   (void)sprintf(buff, ")");
8756   }
8757 }
8758 #endif  /* SUPPORT_UNICODE */
8759 
8760 
8761 
8762 /*************************************************
8763 *        Display scripts or properties           *
8764 *************************************************/
8765 
8766 #define MAX_SYNONYMS 5
8767 
8768 static void
display_properties(BOOL wantscripts)8769 display_properties(BOOL wantscripts)
8770 {
8771 #ifndef SUPPORT_UNICODE
8772 (void)wantscripts;
8773 printf("** This version of PCRE2 was compiled without Unicode support.\n");
8774 #else
8775 
8776 uint16_t seentypes[1024];
8777 uint16_t seenvalues[1024];
8778 int seencount = 0;
8779 int16_t found[256][MAX_SYNONYMS + 1];
8780 int fc = 0;
8781 int colwidth = 40;
8782 int n = wantscripts? ucp_Script_Count : ucp_Bprop_Count;
8783 
8784 for (size_t i = 0; i < PRIV(utt_size); i++)
8785   {
8786   int k;
8787   int m = 0;
8788   int16_t *fv;
8789   const ucp_type_table *t = PRIV(utt) + i;
8790   unsigned int value = t->value;
8791 
8792   if (wantscripts)
8793     {
8794     if (t->type != PT_SC && t->type != PT_SCX) continue;
8795     }
8796   else
8797     {
8798     if (t->type != PT_BOOL) continue;
8799     }
8800 
8801   for (k = 0; k < seencount; k++)
8802     {
8803     if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
8804     }
8805   if (k < seencount) continue;
8806 
8807   seentypes[seencount] = t->type;
8808   seenvalues[seencount++] = t->value;
8809 
8810   fv = found[fc++];
8811   fv[m++] = t->name_offset;
8812 
8813   for (size_t j = i + 1; j < PRIV(utt_size); j++)
8814     {
8815     const ucp_type_table *tt = PRIV(utt) + j;
8816     if (tt->type != t->type || tt->value != value) continue;
8817     if (m >= MAX_SYNONYMS)
8818       printf("** Too many synonyms: %s ignored\n",
8819         PRIV(utt_names) + tt->name_offset);
8820     else fv[m++] = tt->name_offset;
8821     }
8822 
8823   fv[m] = -1;
8824   }
8825 
8826 printf("-------------------------- SUPPORTED %s --------------------------\n\n",
8827   wantscripts? "SCRIPTS" : "PROPERTIES");
8828 
8829 if (!wantscripts) printf(
8830 "This release of PCRE2 supports Unicode's general category properties such\n"
8831 "as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
8832 "and the following binary (yes/no) properties:\n\n");
8833 
8834 
8835 for (int k = 0; k < (n+1)/2; k++)
8836   {
8837   int x;
8838   char buff1[128];
8839   char buff2[128];
8840 
8841   format_list_item(found[k], buff1, wantscripts);
8842   x = k + (n+1)/2;
8843   if (x < n) format_list_item(found[x], buff2, wantscripts);
8844     else buff2[0] = 0;
8845 
8846   x = printf("%s", buff1);
8847   while (x++ < colwidth) printf(" ");
8848   printf("%s\n", buff2);
8849   }
8850 
8851 #endif  /* SUPPORT_UNICODE */
8852 }
8853 
8854 
8855 
8856 /*************************************************
8857 *              Display one modifier              *
8858 *************************************************/
8859 
8860 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8861 display_one_modifier(modstruct *m, BOOL for_pattern)
8862 {
8863 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8864   '*' : ' ';
8865 printf("%c%s", c, m->name);
8866 for (size_t i = 0; i < C1MODLISTCOUNT; i++)
8867   {
8868   if (strcmp(m->name, c1modlist[i].fullname) == 0)
8869     printf(" (%c)", c1modlist[i].onechar);
8870   }
8871 }
8872 
8873 
8874 
8875 /*************************************************
8876 *       Display pattern or subject modifiers     *
8877 *************************************************/
8878 
8879 /* In order to print in two columns, first scan without printing to get a list
8880 of the modifiers that are required.
8881 
8882 Arguments:
8883   for_pattern   TRUE for pattern modifiers, FALSE for subject modifiers
8884   title         string to be used in title
8885 
8886 Returns:        nothing
8887 */
8888 
8889 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8890 display_selected_modifiers(BOOL for_pattern, const char *title)
8891 {
8892 uint32_t i, j;
8893 uint32_t n = 0;
8894 uint32_t list[MODLISTCOUNT];
8895 uint32_t extra[MODLISTCOUNT];
8896 
8897 for (i = 0; i < MODLISTCOUNT; i++)
8898   {
8899   BOOL is_pattern = TRUE;
8900   modstruct *m = modlist + i;
8901 
8902   switch (m->which)
8903     {
8904     case MOD_CTC:       /* Compile context */
8905     case MOD_PAT:       /* Pattern */
8906     case MOD_PATP:      /* Pattern, OK for Perl-compatible test */
8907     break;
8908 
8909     /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8910     subjects, but can be given with a pattern. We list them as subject
8911     modifiers, but marked with an asterisk.*/
8912 
8913     case MOD_CTM:       /* Match context */
8914     case MOD_DAT:       /* Subject line */
8915     case MOD_DATP:      /* Subject line, OK for Perl-compatible test */
8916     case MOD_PND:       /* As PD, but not default pattern */
8917     case MOD_PNDP:      /* As PND, OK for Perl-compatible test */
8918     is_pattern = FALSE;
8919     break;
8920 
8921     default: printf("** Unknown type for modifier '%s'\n", m->name);
8922     /* Fall through */
8923     case MOD_PD:        /* Pattern or subject */
8924     case MOD_PDP:       /* As PD, OK for Perl-compatible test */
8925     is_pattern = for_pattern;
8926     break;
8927     }
8928 
8929   if (for_pattern == is_pattern)
8930     {
8931     extra[n] = 0;
8932     for (size_t k = 0; k < C1MODLISTCOUNT; k++)
8933       {
8934       if (strcmp(m->name, c1modlist[k].fullname) == 0)
8935         {
8936         extra[n] += 4;
8937         break;
8938         }
8939       }
8940     list[n++] = i;
8941     }
8942   }
8943 
8944 /* Now print from the list in two columns. */
8945 
8946 printf("-------------- %s MODIFIERS --------------\n", title);
8947 
8948 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8949   {
8950   modstruct *m = modlist + list[i];
8951   display_one_modifier(m, for_pattern);
8952   if (j < n)
8953     {
8954     uint32_t k = 27 - strlen(m->name) - extra[i];
8955     while (k-- > 0) printf(" ");
8956     display_one_modifier(modlist + list[j], for_pattern);
8957     }
8958   printf("\n");
8959   }
8960 }
8961 
8962 
8963 
8964 /*************************************************
8965 *          Display the list of modifiers         *
8966 *************************************************/
8967 
8968 static void
display_modifiers(void)8969 display_modifiers(void)
8970 {
8971 printf(
8972   "An asterisk on a subject modifier means that it may be given on a pattern\n"
8973   "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8974   "that are listed for both patterns and subjects have different effects in\n"
8975   "each case.\n\n");
8976 display_selected_modifiers(TRUE, "PATTERN");
8977 printf("\n");
8978 display_selected_modifiers(FALSE, "SUBJECT");
8979 }
8980 
8981 
8982 
8983 /*************************************************
8984 *                Main Program                    *
8985 *************************************************/
8986 
8987 int
main(int argc,char ** argv)8988 main(int argc, char **argv)
8989 {
8990 uint32_t temp;
8991 uint32_t yield = 0;
8992 uint32_t op = 1;
8993 BOOL notdone = TRUE;
8994 BOOL quiet = FALSE;
8995 BOOL showtotaltimes = FALSE;
8996 BOOL skipping = FALSE;
8997 char *arg_subject = NULL;
8998 char *arg_pattern = NULL;
8999 char *arg_error = NULL;
9000 
9001 /* The offsets to the options and control bits fields of the pattern and data
9002 control blocks must be the same so that common options and controls such as
9003 "anchored" or "memory" can work for either of them from a single table entry.
9004 We cannot test this till runtime because "offsetof" does not work in the
9005 preprocessor. */
9006 
9007 if (PO(options) != DO(options) || PO(control) != DO(control) ||
9008     PO(control2) != DO(control2))
9009   {
9010   fprintf(stderr, "** Coding error: "
9011     "options and control offsets for pattern and data must be the same.\n");
9012   return 1;
9013   }
9014 
9015 /* Get the PCRE2 and Unicode version number and JIT target information, at the
9016 same time checking that a request for the length gives the same answer. Also
9017 check lengths for non-string items. */
9018 
9019 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
9020     PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
9021 
9022     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
9023     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
9024 
9025     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
9026     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
9027 
9028     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
9029     PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
9030   {
9031   fprintf(stderr, "** Error in pcre2_config(): bad length\n");
9032   return 1;
9033   }
9034 
9035 /* Check that bad options are diagnosed. */
9036 
9037 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
9038     PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
9039   {
9040   fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
9041   return 1;
9042   }
9043 
9044 /* This configuration option is now obsolete, but running a quick check ensures
9045 that its code is covered. */
9046 
9047 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
9048 
9049 /* Get buffers from malloc() so that valgrind will check their misuse when
9050 debugging. They grow automatically when very long lines are read. The 16-
9051 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
9052 
9053 buffer = (uint8_t *)malloc(pbuffer8_size);
9054 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
9055 
9056 /* The following  _setmode() stuff is some Windows magic that tells its runtime
9057 library to translate CRLF into a single LF character. At least, that's what
9058 I've been told: never having used Windows I take this all on trust. Originally
9059 it set 0x8000, but then I was advised that _O_BINARY was better. */
9060 
9061 #if defined(_WIN32) || defined(WIN32)
9062 _setmode( _fileno( stdout ), _O_BINARY );
9063 #endif
9064 
9065 /* Initialization that does not depend on the running mode. */
9066 
9067 locale_name[0] = 0;
9068 
9069 memset(&def_patctl, 0, sizeof(patctl));
9070 def_patctl.convert_type = CONVERT_UNSET;
9071 
9072 memset(&def_datctl, 0, sizeof(datctl));
9073 def_datctl.oveccount = DEFAULT_OVECCOUNT;
9074 def_datctl.copy_numbers[0] = -1;
9075 def_datctl.get_numbers[0] = -1;
9076 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
9077 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
9078 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
9079 
9080 /* Scan command line options. */
9081 
9082 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
9083   {
9084   char *endptr;
9085   char *arg = argv[op];
9086   unsigned long uli;
9087 
9088   /* List modifiers and exit. */
9089 
9090   if (strcmp(arg, "-LM") == 0)
9091     {
9092     display_modifiers();
9093     goto EXIT;
9094     }
9095 
9096   /* List properties and exit */
9097 
9098   if (strcmp(arg, "-LP") == 0)
9099     {
9100     display_properties(FALSE);
9101     goto EXIT;
9102     }
9103 
9104   /* List scripts and exit */
9105 
9106   if (strcmp(arg, "-LS") == 0)
9107     {
9108     display_properties(TRUE);
9109     goto EXIT;
9110     }
9111 
9112   /* Display and/or set return code for configuration options. */
9113 
9114   if (strcmp(arg, "-C") == 0)
9115     {
9116     yield = c_option(argv[op + 1]);
9117     goto EXIT;
9118     }
9119 
9120   /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
9121   and 32-bit modes because that won't happen naturally when 8-bit is also
9122   configured. Also call some other functions that are not otherwise used. This
9123   means that a coverage report won't claim there are uncalled functions. */
9124 
9125   if (strcmp(arg, "-8") == 0)
9126     {
9127 #ifdef SUPPORT_PCRE2_8
9128     test_mode = PCRE8_MODE;
9129     (void)pcre2_set_bsr_8(pat_context8, 999);
9130     (void)pcre2_set_newline_8(pat_context8, 999);
9131 #else
9132     fprintf(stderr,
9133       "** This version of PCRE2 was built without 8-bit support\n");
9134     exit(1);
9135 #endif
9136     }
9137 
9138   else if (strcmp(arg, "-16") == 0)
9139     {
9140 #ifdef SUPPORT_PCRE2_16
9141     test_mode = PCRE16_MODE;
9142     (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
9143     (void)pcre2_set_bsr_16(pat_context16, 999);
9144     (void)pcre2_set_newline_16(pat_context16, 999);
9145 #else
9146     fprintf(stderr,
9147       "** This version of PCRE2 was built without 16-bit support\n");
9148     exit(1);
9149 #endif
9150     }
9151 
9152   else if (strcmp(arg, "-32") == 0)
9153     {
9154 #ifdef SUPPORT_PCRE2_32
9155     test_mode = PCRE32_MODE;
9156     (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
9157     (void)pcre2_set_bsr_32(pat_context32, 999);
9158     (void)pcre2_set_newline_32(pat_context32, 999);
9159 #else
9160     fprintf(stderr,
9161       "** This version of PCRE2 was built without 32-bit support\n");
9162     exit(1);
9163 #endif
9164     }
9165 
9166   /* Set quiet (no version verification) */
9167 
9168   else if (strcmp(arg, "-q") == 0) quiet = TRUE;
9169 
9170   /* Set system stack size */
9171 
9172   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
9173       ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
9174     {
9175 #if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS)
9176     fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
9177     exit(1);
9178 #else
9179     int rc;
9180     uint32_t stack_size;
9181     struct rlimit rlim;
9182     if (U32OVERFLOW(uli))
9183       {
9184       fprintf(stderr, "** Argument for -S is too big\n");
9185       exit(1);
9186       }
9187     stack_size = (uint32_t)uli;
9188     getrlimit(RLIMIT_STACK, &rlim);
9189     rlim.rlim_cur = stack_size * 1024 * 1024;
9190     if (rlim.rlim_cur > rlim.rlim_max)
9191       {
9192       fprintf(stderr,
9193         "pcre2test: requested stack size %luMiB is greater than hard limit ",
9194           (unsigned long int)stack_size);
9195       if (rlim.rlim_max % (1024*1024) == 0) fprintf(stderr, "%luMiB\n",
9196         (unsigned long int)(rlim.rlim_max/(1024 * 1024)));
9197       else if (rlim.rlim_max % 1024 == 0) fprintf(stderr, "%luKiB\n",
9198         (unsigned long int)(rlim.rlim_max/1024));
9199       else fprintf(stderr, "%lu bytes\n", (unsigned long int)(rlim.rlim_max));
9200       exit(1);
9201       }
9202     rc = setrlimit(RLIMIT_STACK, &rlim);
9203     if (rc != 0)
9204       {
9205       fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
9206         (unsigned long int)stack_size, strerror(errno));
9207       exit(1);
9208       }
9209     op++;
9210     argc--;
9211 #endif
9212     }
9213 
9214   /* Set some common pattern and subject controls */
9215 
9216   else if (strcmp(arg, "-AC") == 0)
9217     {
9218     def_patctl.options |= PCRE2_AUTO_CALLOUT;
9219     def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
9220     }
9221   else if (strcmp(arg, "-ac") == 0)  def_patctl.options |= PCRE2_AUTO_CALLOUT;
9222   else if (strcmp(arg, "-b") == 0)   def_patctl.control |= CTL_FULLBINCODE;
9223   else if (strcmp(arg, "-d") == 0)   def_patctl.control |= CTL_DEBUG;
9224   else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
9225   else if (strcmp(arg, "-i") == 0)   def_patctl.control |= CTL_INFO;
9226   else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
9227            strcmp(arg, "-jitfast") == 0)
9228     {
9229     if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
9230       else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
9231     def_patctl.jit = JIT_DEFAULT;  /* full & partial */
9232 #ifndef SUPPORT_JIT
9233     fprintf(stderr, "** Warning: JIT support is not available: "
9234                     "-jit[fast|verify] calls functions that do nothing.\n");
9235 #endif
9236     }
9237 
9238   /* Set timing parameters */
9239 
9240   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
9241            strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
9242     {
9243     int both = arg[2] == 0;
9244     showtotaltimes = arg[1] == 'T';
9245     if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
9246       {
9247       if (uli == 0)
9248         {
9249         fprintf(stderr, "** Argument for %s must not be zero\n", arg);
9250         exit(1);
9251         }
9252       if (U32OVERFLOW(uli))
9253         {
9254         fprintf(stderr, "** Argument for %s is too big\n", arg);
9255         exit(1);
9256         }
9257       timeitm = (int)uli;
9258       op++;
9259       argc--;
9260       }
9261     else timeitm = LOOPREPEAT;
9262     if (both) timeit = timeitm;
9263     }
9264 
9265   /* Give help */
9266 
9267   else if (strcmp(arg, "-help") == 0 ||
9268            strcmp(arg, "--help") == 0)
9269     {
9270     usage();
9271     goto EXIT;
9272     }
9273 
9274   /* Show version */
9275 
9276   else if (memcmp(arg, "-v", 2) == 0 ||
9277            strcmp(arg, "--version") == 0)
9278     {
9279     print_version(stdout, FALSE);
9280     goto EXIT;
9281     }
9282 
9283   /* The following options save their data for processing once we know what
9284   the running mode is. */
9285 
9286   else if (strcmp(arg, "-error") == 0)
9287     {
9288     arg_error = argv[op+1];
9289     goto CHECK_VALUE_EXISTS;
9290     }
9291 
9292   else if (strcmp(arg, "-subject") == 0)
9293     {
9294     arg_subject = argv[op+1];
9295     goto CHECK_VALUE_EXISTS;
9296     }
9297 
9298   else if (strcmp(arg, "-pattern") == 0)
9299     {
9300     arg_pattern = argv[op+1];
9301     CHECK_VALUE_EXISTS:
9302     if (argc <= 2)
9303       {
9304       fprintf(stderr, "** Missing value for %s\n", arg);
9305       yield = 1;
9306       goto EXIT;
9307       }
9308     op++;
9309     argc--;
9310     }
9311 
9312   /* Unrecognized option */
9313 
9314   else
9315     {
9316     fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
9317     usage();
9318     yield = 1;
9319     goto EXIT;
9320     }
9321   op++;
9322   argc--;
9323   }
9324 
9325 /* If -error was present, get the error numbers, show the messages, and exit.
9326 We wait to do this until we know which mode we are in. */
9327 
9328 if (arg_error != NULL)
9329   {
9330   int len;
9331   int errcode;
9332   char *endptr;
9333 
9334 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
9335 least 128 code units, because it is used for retrieving error messages. */
9336 
9337 #ifdef SUPPORT_PCRE2_16
9338   if (test_mode == PCRE16_MODE)
9339     {
9340     pbuffer16_size = 256;
9341     pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
9342     if (pbuffer16 == NULL)
9343       {
9344       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
9345         pbuffer16_size);
9346       yield = 1;
9347       goto EXIT;
9348       }
9349     }
9350 #endif
9351 
9352 #ifdef SUPPORT_PCRE2_32
9353   if (test_mode == PCRE32_MODE)
9354     {
9355     pbuffer32_size = 512;
9356     pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
9357     if (pbuffer32 == NULL)
9358       {
9359       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
9360         pbuffer32_size);
9361       yield = 1;
9362       goto EXIT;
9363       }
9364     }
9365 #endif
9366 
9367   /* Loop along a list of error numbers. */
9368 
9369   for (;;)
9370     {
9371     errcode = strtol(arg_error, &endptr, 10);
9372     if (*endptr != 0 && *endptr != CHAR_COMMA)
9373       {
9374       fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
9375       yield = 1;
9376       goto EXIT;
9377       }
9378     printf("Error %d: ", errcode);
9379     PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
9380     if (len < 0)
9381       {
9382       switch (len)
9383         {
9384         case PCRE2_ERROR_BADDATA:
9385         printf("PCRE2_ERROR_BADDATA (unknown error number)");
9386         break;
9387 
9388         case PCRE2_ERROR_NOMEMORY:
9389         printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
9390         break;
9391 
9392         default:
9393         printf("Unexpected return (%d) from pcre2_get_error_message()", len);
9394         break;
9395         }
9396       }
9397     else
9398       {
9399       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
9400       }
9401     printf("\n");
9402     if (*endptr == 0) goto EXIT;
9403     arg_error = endptr + 1;
9404     }
9405   /* Control never reaches here */
9406   }  /* End of -error handling */
9407 
9408 /* Initialize things that cannot be done until we know which test mode we are
9409 running in. Exercise the general context copying and match data size functions,
9410 which are not otherwise used. */
9411 
9412 code_unit_size = test_mode/8;
9413 max_oveccount = DEFAULT_OVECCOUNT;
9414 
9415 /* Use macros to save a lot of duplication. */
9416 
9417 #define CREATECONTEXTS \
9418   G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
9419   G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
9420   G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
9421   G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
9422   G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
9423   G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
9424   G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
9425   G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
9426   G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
9427 
9428 #define CONTEXTTESTS \
9429   (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
9430   (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
9431   (void)G(pcre2_set_max_pattern_compiled_length_,BITS)(G(pat_context,BITS), 0); \
9432   (void)G(pcre2_set_max_varlookbehind_,BITS)(G(pat_context,BITS), 0); \
9433   (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
9434   (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
9435 
9436 /* Call the appropriate functions for the current mode, and exercise some
9437 functions that are not otherwise called. */
9438 
9439 #ifdef SUPPORT_PCRE2_8
9440 #undef BITS
9441 #define BITS 8
9442 if (test_mode == PCRE8_MODE)
9443   {
9444   CREATECONTEXTS;
9445   CONTEXTTESTS;
9446   }
9447 #endif
9448 
9449 #ifdef SUPPORT_PCRE2_16
9450 #undef BITS
9451 #define BITS 16
9452 if (test_mode == PCRE16_MODE)
9453   {
9454   CREATECONTEXTS;
9455   CONTEXTTESTS;
9456   }
9457 #endif
9458 
9459 #ifdef SUPPORT_PCRE2_32
9460 #undef BITS
9461 #define BITS 32
9462 if (test_mode == PCRE32_MODE)
9463   {
9464   CREATECONTEXTS;
9465   CONTEXTTESTS;
9466   }
9467 #endif
9468 
9469 /* Set a default parentheses nest limit that is large enough to run the
9470 standard tests (this also exercises the function). */
9471 
9472 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
9473 
9474 /* Handle command line modifier settings, sending any error messages to
9475 stderr. We need to know the mode before modifying the context, and it is tidier
9476 to do them all in the same way. */
9477 
9478 outfile = stderr;
9479 if ((arg_pattern != NULL &&
9480     !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
9481     (arg_subject != NULL &&
9482     !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
9483   {
9484   yield = 1;
9485   goto EXIT;
9486   }
9487 
9488 /* Sort out the input and output files, defaulting to stdin/stdout. */
9489 
9490 infile = stdin;
9491 outfile = stdout;
9492 
9493 if (argc > 1 && strcmp(argv[op], "-") != 0)
9494   {
9495   infile = fopen(argv[op], INPUT_MODE);
9496   if (infile == NULL)
9497     {
9498     printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9499     yield = 1;
9500     goto EXIT;
9501     }
9502   }
9503 
9504 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9505 if (INTERACTIVE(infile)) using_history();
9506 #endif
9507 
9508 if (argc > 2)
9509   {
9510   outfile = fopen(argv[op+1], OUTPUT_MODE);
9511   if (outfile == NULL)
9512     {
9513     printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9514     yield = 1;
9515     goto EXIT;
9516     }
9517   }
9518 
9519 /* Output a heading line unless quiet, then process input lines. */
9520 
9521 if (!quiet) print_version(outfile, TRUE);
9522 
9523 SET(compiled_code, NULL);
9524 
9525 #ifdef SUPPORT_PCRE2_8
9526 preg.re_pcre2_code = NULL;
9527 preg.re_match_data = NULL;
9528 #endif
9529 
9530 while (notdone)
9531   {
9532   uint8_t *p;
9533   int rc = PR_OK;
9534   BOOL expectdata = TEST(compiled_code, !=, NULL);
9535 #ifdef SUPPORT_PCRE2_8
9536   expectdata |= preg.re_pcre2_code != NULL;
9537 #endif
9538 
9539   if (extend_inputline(infile, buffer, expectdata? "data> " : "  re> ") == NULL)
9540     break;
9541   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9542   fflush(outfile);
9543   p = buffer;
9544 
9545   /* If we have a pattern set up for testing, or we are skipping after a
9546   compile failure, a blank line terminates this test. */
9547 
9548   if (expectdata || skipping)
9549     {
9550     while (isspace(*p)) p++;
9551     if (*p == 0)
9552       {
9553 #ifdef SUPPORT_PCRE2_8
9554       if (preg.re_pcre2_code != NULL)
9555         {
9556         regfree(&preg);
9557         preg.re_pcre2_code = NULL;
9558         preg.re_match_data = NULL;
9559         }
9560 #endif  /* SUPPORT_PCRE2_8 */
9561       if (TEST(compiled_code, !=, NULL))
9562         {
9563         SUB1(pcre2_code_free, compiled_code);
9564         SET(compiled_code, NULL);
9565         }
9566       skipping = FALSE;
9567       setlocale(LC_CTYPE, "C");
9568       }
9569 
9570     /* Otherwise, if we are not skipping, and the line is not a data comment
9571     line starting with "\=", process a data line. */
9572 
9573     else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9574       {
9575       rc = process_data();
9576       }
9577     }
9578 
9579   /* We do not have a pattern set up for testing. Lines starting with # are
9580   either comments or special commands. Blank lines are ignored. Otherwise, the
9581   line must start with a valid delimiter. It is then processed as a pattern
9582   line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9583   valgrind, make the unused part of the buffer undefined, to catch overruns. */
9584 
9585   else if (*p == '#')
9586     {
9587     if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9588     rc = process_command();
9589     }
9590 
9591   else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9592     {
9593     rc = process_pattern();
9594     dfa_matched = 0;
9595     }
9596 
9597   else
9598     {
9599     while (isspace(*p)) p++;
9600     if (*p != 0)
9601       {
9602       fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9603         *buffer);
9604       rc = PR_SKIP;
9605       }
9606     }
9607 
9608   if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9609   else if (rc == PR_ABEND)
9610     {
9611     fprintf(outfile, "** pcre2test run abandoned\n");
9612     yield = 1;
9613     goto EXIT;
9614     }
9615   }
9616 
9617 /* Finish off a normal run. */
9618 
9619 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9620 
9621 if (showtotaltimes)
9622   {
9623   const char *pad = "";
9624   fprintf(outfile, "--------------------------------------\n");
9625   if (timeit > 0)
9626     {
9627     fprintf(outfile, "Total compile time %8.2f microseconds\n",
9628       ((1000000 / CLOCKS_PER_SEC) * (double)total_compile_time) / timeit);
9629     if (total_jit_compile_time > 0)
9630       fprintf(outfile, "Total JIT compile  %8.2f microseconds\n",
9631         ((1000000 / CLOCKS_PER_SEC) * (double)total_jit_compile_time) / \
9632         timeit);
9633     pad = "  ";
9634     }
9635   fprintf(outfile, "Total match time %s%8.2f microseconds\n", pad,
9636     ((1000000 / CLOCKS_PER_SEC) * (double)total_match_time) / timeitm);
9637   }
9638 
9639 
9640 EXIT:
9641 
9642 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9643 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9644 #endif
9645 
9646 if (infile != NULL && infile != stdin) fclose(infile);
9647 if (outfile != NULL && outfile != stdout) fclose(outfile);
9648 
9649 free(buffer);
9650 free(dbuffer);
9651 free(pbuffer8);
9652 free(dfa_workspace);
9653 free(tables3);
9654 PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables);
9655 PCRE2_MATCH_DATA_FREE(match_data);
9656 SUB1(pcre2_code_free, compiled_code);
9657 
9658 while(patstacknext-- > 0)
9659   {
9660   SET(compiled_code, patstack[patstacknext]);
9661   SUB1(pcre2_code_free, compiled_code);
9662   }
9663 
9664 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9665 if (jit_stack != NULL)
9666   {
9667   PCRE2_JIT_STACK_FREE(jit_stack);
9668   }
9669 
9670 #define FREECONTEXTS \
9671   G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9672   G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9673   G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9674   G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9675   G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9676   G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9677   G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9678   G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9679 
9680 #ifdef SUPPORT_PCRE2_8
9681 #undef BITS
9682 #define BITS 8
9683 if (preg.re_pcre2_code != NULL) regfree(&preg);
9684 FREECONTEXTS;
9685 #endif
9686 
9687 #ifdef SUPPORT_PCRE2_16
9688 #undef BITS
9689 #define BITS 16
9690 free(pbuffer16);
9691 FREECONTEXTS;
9692 #endif
9693 
9694 #ifdef SUPPORT_PCRE2_32
9695 #undef BITS
9696 #define BITS 32
9697 free(pbuffer32);
9698 FREECONTEXTS;
9699 #endif
9700 
9701 #if defined(__VMS)
9702   yield = SS$_NORMAL;  /* Return values via DCL symbols */
9703 #endif
9704 
9705 return yield;
9706 }
9707 
9708 /* End of pcre2test.c */
9709