1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2024 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Debugging code enabler */
82
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84
85 /* Both libreadline and libedit are optionally supported */
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #elif defined(HAVE_EDIT_READLINE_READLINE_H)
94 #include <edit/readline/readline.h>
95 #else
96 #include <readline.h>
97 /* GNU readline defines this macro but libedit doesn't, if that ever changes
98 this needs to be updated or the build could break */
99 #ifdef RL_VERSION_MAJOR
100 #include <history.h>
101 #endif
102 #endif
103 #endif
104 #endif
105
106 /* Put the test for interactive input into a macro so that it can be changed if
107 required for different environments. */
108
109 #define INTERACTIVE(f) isatty(fileno(f))
110
111
112 /* ---------------------- System-specific definitions ---------------------- */
113
114 /* A number of things vary for Windows builds. Originally, pcretest opened its
115 input and output without "b"; then I was told that "b" was needed in some
116 environments, so it was added for release 5.0 to both the input and output. (It
117 makes no difference on Unix-like systems.) Later I was told that it is wrong
118 for the input on Windows. I've now abstracted the modes into macros that are
119 set here, to make it easier to fiddle with them, and removed "b" from the input
120 mode under Windows. The BINARY versions are used when saving/restoring compiled
121 patterns. */
122
123 #if defined(_WIN32) || defined(WIN32)
124 #include <io.h> /* For _setmode() */
125 #include <fcntl.h> /* For _O_BINARY */
126 #define INPUT_MODE "r"
127 #define OUTPUT_MODE "wb"
128 #define BINARY_INPUT_MODE "rb"
129 #define BINARY_OUTPUT_MODE "wb"
130
131 #ifndef isatty
132 #define isatty _isatty /* This is what Windows calls them, I'm told, */
133 #endif /* though in some environments they seem to */
134 /* be already defined, hence the #ifndefs. */
135 #ifndef fileno
136 #define fileno _fileno
137 #endif
138
139 /* A user sent this fix for Borland Builder 5 under Windows. */
140
141 #ifdef __BORLANDC__
142 #define _setmode(handle, mode) setmode(handle, mode)
143 #endif
144
145 /* Not Windows */
146
147 #else
148 #include <sys/time.h> /* These two includes are needed */
149 #include <sys/resource.h> /* for setrlimit(). */
150 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
151 #define INPUT_MODE "r"
152 #define OUTPUT_MODE "w"
153 #define BINARY_INPUT_MODE "rb"
154 #define BINARY_OUTPUT_MODE "wb"
155 #else
156 #define INPUT_MODE "rb"
157 #define OUTPUT_MODE "wb"
158 #define BINARY_INPUT_MODE "rb"
159 #define BINARY_OUTPUT_MODE "wb"
160 #endif
161 #endif
162
163 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
164 user [2] provided alternative code which worked better for him. I have
165 commented out the original, but kept it around just in case. */
166
167 #ifdef __VMS
168 #include <ssdef.h>
169 /* These two includes came from [2]. */
170 #include descrip
171 #include lib$routines
172 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
173 #endif
174
175 /* old VC and older compilers don't support %td or %zu, and even some that
176 claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
177
178 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
179 (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)))
180 #ifdef _WIN64
181 #define PTR_FORM "lld"
182 #define SIZ_FORM "llu"
183 #else
184 #define PTR_FORM "ld"
185 #define SIZ_FORM "lu"
186 #endif
187 #else
188 #define PTR_FORM "td"
189 #define SIZ_FORM "zu"
190 #endif
191
192 /* ------------------End of system-specific definitions -------------------- */
193
194 /* Glueing macros that are used in several places below. */
195
196 #define glue(a,b) a##b
197 #define G(a,b) glue(a,b)
198
199 /* Miscellaneous parameters and manifests */
200
201 #ifndef CLOCKS_PER_SEC
202 #ifdef CLK_TCK
203 #define CLOCKS_PER_SEC CLK_TCK
204 #else
205 #define CLOCKS_PER_SEC 100
206 #endif
207 #endif
208
209 #define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
210 #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
211 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
212 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
213 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
214 #define LOCALESIZE 32 /* Size of locale name */
215 #define LOOPREPEAT 500000 /* Default loop count for timing */
216 #define MALLOCLISTSIZE 20 /* For remembering mallocs */
217 #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
218 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
219 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
220 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
221
222 /* Default JIT compile options */
223
224 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
225 PCRE2_JIT_PARTIAL_SOFT|\
226 PCRE2_JIT_PARTIAL_HARD)
227
228 /* Make sure the buffer into which replacement strings are copied is big enough
229 to hold them as 32-bit code units. */
230
231 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
232
233 /* Execution modes */
234
235 #define PCRE8_MODE 8
236 #define PCRE16_MODE 16
237 #define PCRE32_MODE 32
238
239 /* Processing returns */
240
241 enum { PR_OK, PR_SKIP, PR_ABEND };
242
243 /* The macro PRINTABLE determines whether to print an output character as-is or
244 as a hex value when showing compiled patterns. is We use it in cases when the
245 locale has not been explicitly changed, so as to get consistent output from
246 systems that differ in their output from isprint() even in the "C" locale. */
247
248 #ifdef EBCDIC
249 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
250 #else
251 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
252 #endif
253
254 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
255
256 /* We have to include some of the library source files because we need
257 to use some of the macros, internal structure definitions, and other internal
258 values - pcre2test has "inside information" compared to an application program
259 that strictly follows the PCRE2 API.
260
261 Before including pcre2_internal.h we define PRIV so that it does not get
262 defined therein. This ensures that PRIV names in the included files do not
263 clash with those in the libraries. Also, although pcre2_internal.h does itself
264 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
265 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
266 for building the library.
267
268 Setting PCRE2_CODE_UNIT_WIDTH to zero cuts out all the width-specific settings
269 in pcre2.h and pcre2_internal.h. Defining PCRE2_BUILDING_PCRE2TEST cuts out the
270 check in pcre2_internal.h that ensures PCRE2_CODE_UNIT_WIDTH is 8, 16, or 32
271 (which it needs to be when compiling one of the libraries). */
272
273 #define PRIV(name) name
274 #define PCRE2_CODE_UNIT_WIDTH 0
275 #define PCRE2_BUILDING_PCRE2TEST
276 #include "pcre2.h"
277 #include "pcre2posix.h"
278 #include "pcre2_internal.h"
279
280 /* We need access to some of the data tables that PCRE2 uses. Defining
281 PCRE2_PCRE2TEST makes some minor changes in the files. The previous definition
282 of PRIV avoids name clashes. */
283
284 #define PCRE2_PCRE2TEST
285 #include "pcre2_tables.c"
286 #include "pcre2_ucd.c"
287
288 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
289 check needed for overflow depends on whether long ints are in fact longer than
290 ints. They are defined not to be shorter. */
291
292 #if ULONG_MAX > UINT32_MAX
293 #define U32OVERFLOW(x) (x > UINT32_MAX)
294 #else
295 #define U32OVERFLOW(x) (x == UINT32_MAX)
296 #endif
297
298 #if LONG_MAX > INT32_MAX
299 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
300 #else
301 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
302 #endif
303
304 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
305 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
306 defined. We can now include it for each supported code unit width. Because
307 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
308 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
309 while including these files, and then restore it to a no-op. Because LINK_SIZE
310 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
311 these inclusions should not be changed. */
312
313 #undef PCRE2_SUFFIX
314 #undef PCRE2_CODE_UNIT_WIDTH
315
316 #ifdef SUPPORT_PCRE2_8
317 #define PCRE2_CODE_UNIT_WIDTH 8
318 #define PCRE2_SUFFIX(a) G(a,8)
319 #include "pcre2_intmodedep.h"
320 #include "pcre2_printint.c"
321 #undef PCRE2_CODE_UNIT_WIDTH
322 #undef PCRE2_SUFFIX
323 #endif /* SUPPORT_PCRE2_8 */
324
325 #ifdef SUPPORT_PCRE2_16
326 #define PCRE2_CODE_UNIT_WIDTH 16
327 #define PCRE2_SUFFIX(a) G(a,16)
328 #include "pcre2_intmodedep.h"
329 #include "pcre2_printint.c"
330 #undef PCRE2_CODE_UNIT_WIDTH
331 #undef PCRE2_SUFFIX
332 #endif /* SUPPORT_PCRE2_16 */
333
334 #ifdef SUPPORT_PCRE2_32
335 #define PCRE2_CODE_UNIT_WIDTH 32
336 #define PCRE2_SUFFIX(a) G(a,32)
337 #include "pcre2_intmodedep.h"
338 #include "pcre2_printint.c"
339 #undef PCRE2_CODE_UNIT_WIDTH
340 #undef PCRE2_SUFFIX
341 #endif /* SUPPORT_PCRE2_32 */
342
343 #define PCRE2_SUFFIX(a) a
344
345 #include "pcre2_chkdint.c"
346
347 /* We need to be able to check input text for UTF-8 validity, whatever code
348 widths are actually available, because the input to pcre2test is always in
349 8-bit code units. So we include the UTF validity checking function for 8-bit
350 code units. */
351
352 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
353
354 #define PCRE2_CODE_UNIT_WIDTH 8
355 #undef PCRE2_SPTR
356 #define PCRE2_SPTR PCRE2_SPTR8
357 #include "pcre2_valid_utf.c"
358 #undef PCRE2_CODE_UNIT_WIDTH
359 #undef PCRE2_SPTR
360
361 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
362 support, it can be selected by a command-line option. If there is no 8-bit
363 support, there must be 16-bit or 32-bit support, so default to one of them. The
364 config function, JIT stack, contexts, and version string are the same in all
365 modes, so use the form of the first that is available. */
366
367 #if defined SUPPORT_PCRE2_8
368 #define DEFAULT_TEST_MODE PCRE8_MODE
369 #define VERSION_TYPE PCRE2_UCHAR8
370 #define PCRE2_CONFIG pcre2_config_8
371 #define PCRE2_JIT_STACK pcre2_jit_stack_8
372 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
373 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
374 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
375 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
376
377 #elif defined SUPPORT_PCRE2_16
378 #define DEFAULT_TEST_MODE PCRE16_MODE
379 #define VERSION_TYPE PCRE2_UCHAR16
380 #define PCRE2_CONFIG pcre2_config_16
381 #define PCRE2_JIT_STACK pcre2_jit_stack_16
382 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
383 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
384 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
385 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
386
387 #elif defined SUPPORT_PCRE2_32
388 #define DEFAULT_TEST_MODE PCRE32_MODE
389 #define VERSION_TYPE PCRE2_UCHAR32
390 #define PCRE2_CONFIG pcre2_config_32
391 #define PCRE2_JIT_STACK pcre2_jit_stack_32
392 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
393 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
394 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
395 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
396 #endif
397
398 /* ------------- Structure and table for handling #-commands ------------- */
399
400 typedef struct cmdstruct {
401 const char *name;
402 int value;
403 } cmdstruct;
404
405 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
406 CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
407 CMD_UNKNOWN };
408
409 static cmdstruct cmdlist[] = {
410 { "forbid_utf", CMD_FORBID_UTF },
411 { "load", CMD_LOAD },
412 { "loadtables", CMD_LOADTABLES },
413 { "newline_default", CMD_NEWLINE_DEFAULT },
414 { "pattern", CMD_PATTERN },
415 { "perltest", CMD_PERLTEST },
416 { "pop", CMD_POP },
417 { "popcopy", CMD_POPCOPY },
418 { "save", CMD_SAVE },
419 { "subject", CMD_SUBJECT }};
420
421 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
422
423 /* ------------- Structures and tables for handling modifiers -------------- */
424
425 /* Table of names for newline types. Must be kept in step with the definitions
426 of PCRE2_NEWLINE_xx in pcre2.h. */
427
428 static const char *newlines[] = {
429 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
430
431 /* Structure and table for handling pattern conversion types. */
432
433 typedef struct convertstruct {
434 const char *name;
435 uint32_t option;
436 } convertstruct;
437
438 static convertstruct convertlist[] = {
439 { "glob", PCRE2_CONVERT_GLOB },
440 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
441 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
442 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
443 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
444 { "unset", CONVERT_UNSET }};
445
446 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
447
448 /* Modifier types and applicability */
449
450 enum { MOD_CTC, /* Applies to a compile context */
451 MOD_CTM, /* Applies to a match context */
452 MOD_PAT, /* Applies to a pattern */
453 MOD_PATP, /* Ditto, OK for Perl test */
454 MOD_DAT, /* Applies to a data line */
455 MOD_DATP, /* Ditto, OK for Perl test */
456 MOD_PD, /* Applies to a pattern or a data line */
457 MOD_PDP, /* As MOD_PD, OK for Perl test */
458 MOD_PND, /* As MOD_PD, but not for a default pattern */
459 MOD_PNDP, /* As MOD_PND, OK for Perl test */
460 MOD_CHR, /* Is a single character */
461 MOD_CON, /* Is a "convert" type/options list */
462 MOD_CTL, /* Is a control bit */
463 MOD_BSR, /* Is a BSR value */
464 MOD_IN2, /* Is one or two unsigned integers */
465 MOD_INS, /* Is a signed integer */
466 MOD_INT, /* Is an unsigned integer */
467 MOD_IND, /* Is an unsigned integer, but no value => default */
468 MOD_NL, /* Is a newline value */
469 MOD_NN, /* Is a number or a name; more than one may occur */
470 MOD_OPT, /* Is an option bit */
471 MOD_SIZ, /* Is a PCRE2_SIZE value */
472 MOD_STR }; /* Is a string */
473
474 /* Control bits. Some apply to compiling, some to matching, but some can be set
475 either on a pattern or a data line, so they must all be distinct. There are now
476 so many of them that they are split into two fields. */
477
478 #define CTL_AFTERTEXT 0x00000001u
479 #define CTL_ALLAFTERTEXT 0x00000002u
480 #define CTL_ALLCAPTURES 0x00000004u
481 #define CTL_ALLUSEDTEXT 0x00000008u
482 #define CTL_ALTGLOBAL 0x00000010u
483 #define CTL_BINCODE 0x00000020u
484 #define CTL_CALLOUT_CAPTURE 0x00000040u
485 #define CTL_CALLOUT_INFO 0x00000080u
486 #define CTL_CALLOUT_NONE 0x00000100u
487 #define CTL_DFA 0x00000200u
488 #define CTL_EXPAND 0x00000400u
489 #define CTL_FINDLIMITS 0x00000800u
490 #define CTL_FINDLIMITS_NOHEAP 0x00001000u
491 #define CTL_FULLBINCODE 0x00002000u
492 #define CTL_GETALL 0x00004000u
493 #define CTL_GLOBAL 0x00008000u
494 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
495 #define CTL_INFO 0x00020000u
496 #define CTL_JITFAST 0x00040000u
497 #define CTL_JITVERIFY 0x00080000u
498 #define CTL_MARK 0x00100000u
499 #define CTL_MEMORY 0x00200000u
500 #define CTL_NULLCONTEXT 0x00400000u
501 #define CTL_POSIX 0x00800000u
502 #define CTL_POSIX_NOSUB 0x01000000u
503 #define CTL_PUSH 0x02000000u /* These three must be */
504 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
505 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
506 #define CTL_STARTCHAR 0x10000000u
507 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
508 #define CTL_UTF8_INPUT 0x40000000u
509 #define CTL_ZERO_TERMINATE 0x80000000u
510
511 /* Combinations */
512
513 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
514 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
515 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
516
517 /* Second control word */
518
519 #define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
520 #define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
521 #define CTL2_SUBSTITUTE_LITERAL 0x00000004u
522 #define CTL2_SUBSTITUTE_MATCHED 0x00000008u
523 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000010u
524 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
525 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000040u
526 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000080u
527 #define CTL2_SUBJECT_LITERAL 0x00000100u
528 #define CTL2_CALLOUT_NO_WHERE 0x00000200u
529 #define CTL2_CALLOUT_EXTRA 0x00000400u
530 #define CTL2_ALLVECTOR 0x00000800u
531 #define CTL2_NULL_PATTERN 0x00001000u
532 #define CTL2_NULL_SUBJECT 0x00002000u
533 #define CTL2_NULL_REPLACEMENT 0x00004000u
534 #define CTL2_FRAMESIZE 0x00008000u
535
536 #define CTL2_HEAPFRAMES_SIZE 0x20000000u /* Informational */
537 #define CTL2_NL_SET 0x40000000u /* Informational */
538 #define CTL2_BSR_SET 0x80000000u /* Informational */
539
540 /* These are the matching controls that may be set either on a pattern or on a
541 data line. They are copied from the pattern controls as initial settings for
542 data line controls. Note that CTL_MEMORY is not included here, because it does
543 different things in the two cases. */
544
545 #define CTL_ALLPD (CTL_AFTERTEXT|\
546 CTL_ALLAFTERTEXT|\
547 CTL_ALLCAPTURES|\
548 CTL_ALLUSEDTEXT|\
549 CTL_ALTGLOBAL|\
550 CTL_GLOBAL|\
551 CTL_MARK|\
552 CTL_STARTCHAR|\
553 CTL_UTF8_INPUT)
554
555 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
556 CTL2_SUBSTITUTE_EXTENDED|\
557 CTL2_SUBSTITUTE_LITERAL|\
558 CTL2_SUBSTITUTE_MATCHED|\
559 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
560 CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
561 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
562 CTL2_SUBSTITUTE_UNSET_EMPTY|\
563 CTL2_ALLVECTOR|\
564 CTL2_HEAPFRAMES_SIZE)
565
566 /* Structures for holding modifier information for patterns and subject strings
567 (data). Fields containing modifiers that can be set either for a pattern or a
568 subject must be at the start and in the same order in both cases so that the
569 same offset in the big table below works for both. */
570
571 typedef struct patctl { /* Structure for pattern modifiers. */
572 uint32_t options; /* Must be in same position as datctl */
573 uint32_t control; /* Must be in same position as datctl */
574 uint32_t control2; /* Must be in same position as datctl */
575 uint32_t jitstack; /* Must be in same position as datctl */
576 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
577 uint32_t substitute_skip; /* Must be in same position as patctl */
578 uint32_t substitute_stop; /* Must be in same position as patctl */
579 uint32_t jit;
580 uint32_t stackguard_test;
581 uint32_t tables_id;
582 uint32_t convert_type;
583 uint32_t convert_length;
584 uint32_t convert_glob_escape;
585 uint32_t convert_glob_separator;
586 uint32_t regerror_buffsize;
587 uint8_t locale[LOCALESIZE];
588 } patctl;
589
590 #define MAXCPYGET 10
591 #define LENCPYGET 64
592
593 typedef struct datctl { /* Structure for data line modifiers. */
594 uint32_t options; /* Must be in same position as patctl */
595 uint32_t control; /* Must be in same position as patctl */
596 uint32_t control2; /* Must be in same position as patctl */
597 uint32_t jitstack; /* Must be in same position as patctl */
598 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
599 uint32_t substitute_skip; /* Must be in same position as patctl */
600 uint32_t substitute_stop; /* Must be in same position as patctl */
601 uint32_t startend[2];
602 uint32_t cerror[2];
603 uint32_t cfail[2];
604 int32_t callout_data;
605 int32_t copy_numbers[MAXCPYGET];
606 int32_t get_numbers[MAXCPYGET];
607 uint32_t oveccount;
608 uint32_t offset;
609 uint8_t copy_names[LENCPYGET];
610 uint8_t get_names[LENCPYGET];
611 } datctl;
612
613 /* Ids for which context to modify. */
614
615 enum { CTX_PAT, /* Active pattern context */
616 CTX_POPPAT, /* Ditto, for a popped pattern */
617 CTX_DEFPAT, /* Default pattern context */
618 CTX_DAT, /* Active data (match) context */
619 CTX_DEFDAT }; /* Default data (match) context */
620
621 /* Macros to simplify the big table below. */
622
623 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
624 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
625 #define PO(name) offsetof(patctl, name)
626 #define PD(name) PO(name)
627 #define DO(name) offsetof(datctl, name)
628
629 /* Table of all long-form modifiers. Must be in collating sequence of modifier
630 name because it is searched by binary chop. */
631
632 typedef struct modstruct {
633 const char *name;
634 uint16_t which;
635 uint16_t type;
636 uint32_t value;
637 PCRE2_SIZE offset;
638 } modstruct;
639
640 #define PCRE2_EXTRA_ASCII_ALL (PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| \
641 PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX)
642
643 static modstruct modlist[] = {
644 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
645 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
646 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
647 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
648 { "allow_lookaround_bsk", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) },
649 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
650 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
651 { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
652 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
653 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
654 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
655 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
656 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
657 { "ascii_all", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_ALL, CO(extra_options) },
658 { "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) },
659 { "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) },
660 { "ascii_bsw", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSW, CO(extra_options) },
661 { "ascii_digit", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_DIGIT, CO(extra_options) },
662 { "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) },
663 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
664 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
665 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
666 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
667 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
668 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
669 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
670 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
671 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
672 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
673 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
674 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
675 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
676 { "caseless_restrict", MOD_CTC, MOD_OPT, PCRE2_EXTRA_CASELESS_RESTRICT, CO(extra_options) },
677 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
678 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
679 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
680 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
681 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
682 { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
683 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
684 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
685 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
686 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
687 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
688 { "disable_recurseloop_check", MOD_DAT, MOD_OPT, PCRE2_DISABLE_RECURSELOOP_CHECK, DO(options) },
689 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
690 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
691 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
692 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
693 { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
694 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
695 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
696 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
697 { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
698 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
699 { "find_limits_noheap", MOD_DAT, MOD_CTL, CTL_FINDLIMITS_NOHEAP, DO(control) },
700 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
701 { "framesize", MOD_PAT, MOD_CTL, CTL2_FRAMESIZE, PO(control2) },
702 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
703 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
704 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
705 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
706 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
707 { "heapframes_size", MOD_PND, MOD_CTL, CTL2_HEAPFRAMES_SIZE, PO(control2) },
708 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
709 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
710 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
711 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
712 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
713 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
714 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
715 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
716 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
717 { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
718 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
719 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
720 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
721 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
722 { "max_pattern_compiled_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_compiled_length) },
723 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
724 { "max_varlookbehind", MOD_CTC, MOD_INT, 0, CO(max_varlookbehind) },
725 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
726 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
727 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
728 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
729 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
730 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
731 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
732 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
733 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
734 { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
735 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
736 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
737 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
738 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
739 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
740 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
741 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
742 { "null_pattern", MOD_PAT, MOD_CTL, CTL2_NULL_PATTERN, PO(control2) },
743 { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) },
744 { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) },
745 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
746 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
747 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
748 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
749 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
750 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
751 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
752 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
753 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
754 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
755 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
756 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
757 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
758 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
759 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
760 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
761 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
762 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
763 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
764 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
765 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
766 { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
767 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
768 { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) },
769 { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) },
770 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
771 { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
772 { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
773 { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
774 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
775 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
776 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
777 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
778 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
779 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
780 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
781 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
782 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
783 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
784 };
785
786 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
787
788 /* Controls and options that are supported for use with the POSIX interface. */
789
790 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
791 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
792 PCRE2_UTF|PCRE2_UNGREEDY)
793
794 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
795
796 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
797 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
798 CTL_POSIX_NOSUB|CTL_USE_LENGTH)
799
800 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
801
802 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
803 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
804
805 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
806 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT)
807
808 /* Control bits that are not ignored with 'push'. */
809
810 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
811 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
812 CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY| \
813 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
814
815 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET| \
816 CTL2_HEAPFRAMES_SIZE|CTL2_FRAMESIZE|CTL2_NL_SET)
817
818 /* Controls that apply only at compile time with 'push'. */
819
820 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
821 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
822
823 /* Controls that are forbidden with #pop or #popcopy. */
824
825 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
826 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
827
828 /* Pattern controls that are mutually exclusive. At present these are all in
829 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
830 CTL_POSIX, so it doesn't need its own entries. */
831
832 static uint32_t exclusive_pat_controls[] = {
833 CTL_POSIX | CTL_PUSH,
834 CTL_POSIX | CTL_PUSHCOPY,
835 CTL_POSIX | CTL_PUSHTABLESCOPY,
836 CTL_PUSH | CTL_PUSHCOPY,
837 CTL_PUSH | CTL_PUSHTABLESCOPY,
838 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
839 CTL_EXPAND | CTL_HEXPAT };
840
841 /* Data controls that are mutually exclusive. At present these are all in the
842 first control word. */
843
844 static uint32_t exclusive_dat_controls[] = {
845 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
846 CTL_FINDLIMITS | CTL_NULLCONTEXT,
847 CTL_FINDLIMITS_NOHEAP | CTL_NULLCONTEXT };
848
849 /* Table of single-character abbreviated modifiers. The index field is
850 initialized to -1, but the first time the modifier is encountered, it is filled
851 in with the index of the full entry in modlist, to save repeated searching when
852 processing multiple test items. This short list is searched serially, so its
853 order does not matter. */
854
855 typedef struct c1modstruct {
856 const char *fullname;
857 uint32_t onechar;
858 int index;
859 } c1modstruct;
860
861 static c1modstruct c1modlist[] = {
862 { "bincode", 'B', -1 },
863 { "info", 'I', -1 },
864 { "ascii_all", 'a', -1 },
865 { "global", 'g', -1 },
866 { "caseless", 'i', -1 },
867 { "multiline", 'm', -1 },
868 { "no_auto_capture", 'n', -1 },
869 { "caseless_restrict", 'r', -1 },
870 { "dotall", 's', -1 },
871 { "extended", 'x', -1 }
872 };
873
874 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
875
876 /* Table of arguments for the -C command line option. Use macros to make the
877 table itself easier to read. */
878
879 #if defined SUPPORT_PCRE2_8
880 #define SUPPORT_8 1
881 #endif
882 #if defined SUPPORT_PCRE2_16
883 #define SUPPORT_16 1
884 #endif
885 #if defined SUPPORT_PCRE2_32
886 #define SUPPORT_32 1
887 #endif
888
889 #ifndef SUPPORT_8
890 #define SUPPORT_8 0
891 #endif
892 #ifndef SUPPORT_16
893 #define SUPPORT_16 0
894 #endif
895 #ifndef SUPPORT_32
896 #define SUPPORT_32 0
897 #endif
898
899 #ifdef EBCDIC
900 #define SUPPORT_EBCDIC 1
901 #define EBCDIC_NL CHAR_LF
902 #else
903 #define SUPPORT_EBCDIC 0
904 #define EBCDIC_NL 0
905 #endif
906
907 #ifdef NEVER_BACKSLASH_C
908 #define BACKSLASH_C 0
909 #else
910 #define BACKSLASH_C 1
911 #endif
912
913 typedef struct coptstruct {
914 const char *name;
915 uint32_t type;
916 uint32_t value;
917 } coptstruct;
918
919 enum { CONF_BSR,
920 CONF_FIX,
921 CONF_FIZ,
922 CONF_INT,
923 CONF_NL
924 };
925
926 static coptstruct coptlist[] = {
927 { "backslash-C", CONF_FIX, BACKSLASH_C },
928 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
929 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
930 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
931 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
932 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
933 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
934 { "pcre2-16", CONF_FIX, SUPPORT_16 },
935 { "pcre2-32", CONF_FIX, SUPPORT_32 },
936 { "pcre2-8", CONF_FIX, SUPPORT_8 },
937 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
938 };
939
940 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
941
942 #undef SUPPORT_8
943 #undef SUPPORT_16
944 #undef SUPPORT_32
945 #undef SUPPORT_EBCDIC
946
947
948 /* ----------------------- Static variables ------------------------ */
949
950 static FILE *infile;
951 static FILE *outfile;
952
953 static const void *last_callout_mark;
954 static PCRE2_JIT_STACK *jit_stack = NULL;
955 static size_t jit_stack_size = 0;
956
957 static BOOL first_callout;
958 static BOOL jit_was_used;
959 static BOOL restrict_for_perl_test = FALSE;
960 static BOOL show_memory = FALSE;
961
962 static int jitrc; /* Return from JIT compile */
963 static int test_mode = DEFAULT_TEST_MODE;
964 static int timeit = 0;
965 static int timeitm = 0;
966
967 clock_t total_compile_time = 0;
968 clock_t total_jit_compile_time = 0;
969 clock_t total_match_time = 0;
970
971 static uint32_t code_unit_size; /* Bytes */
972 static uint32_t dfa_matched;
973 static uint32_t forbid_utf = 0;
974 static uint32_t maxlookbehind;
975 static uint32_t max_oveccount;
976 static uint32_t callout_count;
977 static uint32_t maxcapcount;
978
979 static uint16_t local_newline_default = 0;
980
981 static VERSION_TYPE jittarget[VERSION_SIZE];
982 static VERSION_TYPE version[VERSION_SIZE];
983 static VERSION_TYPE uversion[VERSION_SIZE];
984
985 static patctl def_patctl;
986 static patctl pat_patctl;
987 static datctl def_datctl;
988 static datctl dat_datctl;
989
990 static void *patstack[PATSTACKSIZE];
991 static int patstacknext = 0;
992
993 static void *malloclist[MALLOCLISTSIZE];
994 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
995 static uint32_t malloclistptr = 0;
996
997 #ifdef SUPPORT_PCRE2_8
998 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
999 #endif
1000
1001 static int *dfa_workspace = NULL;
1002 static const uint8_t *locale_tables = NULL;
1003 static const uint8_t *use_tables = NULL;
1004 static uint8_t locale_name[32];
1005 static uint8_t *tables3 = NULL; /* For binary-loaded tables */
1006 static uint32_t loadtables_length = 0;
1007
1008 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
1009 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
1010 buffer is where all input lines are read. Its size is the same as pbuffer8.
1011 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
1012 are actually compiled from pbuffer16 or pbuffer32. */
1013
1014 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
1015 static uint8_t *pbuffer8 = NULL;
1016 static uint8_t *buffer = NULL;
1017
1018 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
1019 is cast as needed. For long data lines it grows as necessary. */
1020
1021 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
1022 static uint8_t *dbuffer = NULL;
1023
1024
1025 /* ---------------- Mode-dependent variables -------------------*/
1026
1027 #ifdef SUPPORT_PCRE2_8
1028 static pcre2_code_8 *compiled_code8;
1029 static pcre2_general_context_8 *general_context8, *general_context_copy8;
1030 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
1031 static pcre2_convert_context_8 *con_context8, *default_con_context8;
1032 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
1033 static pcre2_match_data_8 *match_data8;
1034 #endif
1035
1036 #ifdef SUPPORT_PCRE2_16
1037 static pcre2_code_16 *compiled_code16;
1038 static pcre2_general_context_16 *general_context16, *general_context_copy16;
1039 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
1040 static pcre2_convert_context_16 *con_context16, *default_con_context16;
1041 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
1042 static pcre2_match_data_16 *match_data16;
1043 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
1044 static uint16_t *pbuffer16 = NULL;
1045 #endif
1046
1047 #ifdef SUPPORT_PCRE2_32
1048 static pcre2_code_32 *compiled_code32;
1049 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1050 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1051 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1052 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
1053 static pcre2_match_data_32 *match_data32;
1054 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
1055 static uint32_t *pbuffer32 = NULL;
1056 #endif
1057
1058
1059 /* ---------------- Macros that work in all modes ----------------- */
1060
1061 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1062 #define SET(x,y) SETOP(x,y,=)
1063 #define SETPLUS(x,y) SETOP(x,y,+=)
1064 #define strlen8(x) strlen((char *)x)
1065
1066
1067 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1068
1069 /* Define macros for variables and functions that must be selected dynamically
1070 depending on the mode setting (8, 16, 32). These are dependent on which modes
1071 are supported. */
1072
1073 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1074 defined (SUPPORT_PCRE2_32)) >= 2
1075
1076 /* ----- All three modes supported ----- */
1077
1078 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1079
1080 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1081 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1082
1083 #define CASTVAR(t,x) ( \
1084 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1085 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1086
1087 #define CODE_UNIT(a,b) ( \
1088 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1089 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1090 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1091
1092 #define CONCTXCPY(a,b) \
1093 if (test_mode == PCRE8_MODE) \
1094 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1095 else if (test_mode == PCRE16_MODE) \
1096 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1097 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1098
1099 #define CONVERT_COPY(a,b,c) \
1100 if (test_mode == PCRE8_MODE) \
1101 memcpy(G(a,8),(char *)b,c); \
1102 else if (test_mode == PCRE16_MODE) \
1103 memcpy(G(a,16),(char *)b,(c)*2); \
1104 else if (test_mode == PCRE32_MODE) \
1105 memcpy(G(a,32),(char *)b,(c)*4)
1106
1107 #define DATCTXCPY(a,b) \
1108 if (test_mode == PCRE8_MODE) \
1109 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1110 else if (test_mode == PCRE16_MODE) \
1111 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1112 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1113
1114 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1115 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1116
1117 #define PATCTXCPY(a,b) \
1118 if (test_mode == PCRE8_MODE) \
1119 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1120 else if (test_mode == PCRE16_MODE) \
1121 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1122 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1123
1124 #define PCHARS(lv, p, offset, len, utf, f) \
1125 if (test_mode == PCRE32_MODE) \
1126 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1127 else if (test_mode == PCRE16_MODE) \
1128 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1129 else \
1130 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1131
1132 #define PCHARSV(p, offset, len, utf, f) \
1133 if (test_mode == PCRE32_MODE) \
1134 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1135 else if (test_mode == PCRE16_MODE) \
1136 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1137 else \
1138 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1139
1140 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1141 if (test_mode == PCRE8_MODE) \
1142 a = pcre2_callout_enumerate_8(compiled_code8, \
1143 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1144 else if (test_mode == PCRE16_MODE) \
1145 a = pcre2_callout_enumerate_16(compiled_code16, \
1146 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1147 else \
1148 a = pcre2_callout_enumerate_32(compiled_code32, \
1149 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1150
1151 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1152 if (test_mode == PCRE8_MODE) \
1153 G(a,8) = pcre2_code_copy_8(b); \
1154 else if (test_mode == PCRE16_MODE) \
1155 G(a,16) = pcre2_code_copy_16(b); \
1156 else \
1157 G(a,32) = pcre2_code_copy_32(b)
1158
1159 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1160 if (test_mode == PCRE8_MODE) \
1161 a = (void *)pcre2_code_copy_8(G(b,8)); \
1162 else if (test_mode == PCRE16_MODE) \
1163 a = (void *)pcre2_code_copy_16(G(b,16)); \
1164 else \
1165 a = (void *)pcre2_code_copy_32(G(b,32))
1166
1167 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1168 if (test_mode == PCRE8_MODE) \
1169 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1170 else if (test_mode == PCRE16_MODE) \
1171 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1172 else \
1173 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1174
1175 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1176 if (test_mode == PCRE8_MODE) \
1177 G(a,8) = pcre2_compile_8(b,c,d,e,f,g); \
1178 else if (test_mode == PCRE16_MODE) \
1179 G(a,16) = pcre2_compile_16(b,c,d,e,f,g); \
1180 else \
1181 G(a,32) = pcre2_compile_32(b,c,d,e,f,g)
1182
1183 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1184 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1185 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1186 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1187
1188 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1189 if (test_mode == PCRE8_MODE) \
1190 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1191 else if (test_mode == PCRE16_MODE) \
1192 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1193 else \
1194 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1195
1196 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1197 if (test_mode == PCRE8_MODE) \
1198 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1199 else if (test_mode == PCRE16_MODE) \
1200 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1201 else \
1202 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1203
1204 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
1205 if (test_mode == PCRE8_MODE) \
1206 r = pcre2_get_match_data_heapframes_size_8(G(a,8)); \
1207 else if (test_mode == PCRE16_MODE) \
1208 r = pcre2_get_match_data_heapframes_size_16(G(a,16)); \
1209 else \
1210 r = pcre2_get_match_data_heapframes_size_32(G(a,32))
1211
1212 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1213 if (test_mode == PCRE8_MODE) \
1214 a = pcre2_get_ovector_count_8(G(b,8)); \
1215 else if (test_mode == PCRE16_MODE) \
1216 a = pcre2_get_ovector_count_16(G(b,16)); \
1217 else \
1218 a = pcre2_get_ovector_count_32(G(b,32))
1219
1220 #define PCRE2_GET_STARTCHAR(a,b) \
1221 if (test_mode == PCRE8_MODE) \
1222 a = pcre2_get_startchar_8(G(b,8)); \
1223 else if (test_mode == PCRE16_MODE) \
1224 a = pcre2_get_startchar_16(G(b,16)); \
1225 else \
1226 a = pcre2_get_startchar_32(G(b,32))
1227
1228 #define PCRE2_JIT_COMPILE(r,a,b) \
1229 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1230 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1231 else r = pcre2_jit_compile_32(G(a,32),b)
1232
1233 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1234 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1235 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1236 else pcre2_jit_free_unused_memory_32(G(a,32))
1237
1238 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1239 if (test_mode == PCRE8_MODE) \
1240 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1241 else if (test_mode == PCRE16_MODE) \
1242 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1243 else \
1244 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1245
1246 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1247 if (test_mode == PCRE8_MODE) \
1248 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1249 else if (test_mode == PCRE16_MODE) \
1250 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1251 else \
1252 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1253
1254 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1255 if (test_mode == PCRE8_MODE) \
1256 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1257 else if (test_mode == PCRE16_MODE) \
1258 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1259 else \
1260 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1261
1262 #define PCRE2_JIT_STACK_FREE(a) \
1263 if (test_mode == PCRE8_MODE) \
1264 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1265 else if (test_mode == PCRE16_MODE) \
1266 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1267 else \
1268 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1269
1270 #define PCRE2_MAKETABLES(a,c) \
1271 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(G(c,8)); \
1272 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(G(c,16)); \
1273 else a = pcre2_maketables_32(G(c,32))
1274
1275 #define PCRE2_MAKETABLES_FREE(c,a) \
1276 if (test_mode == PCRE8_MODE) pcre2_maketables_free_8(G(c,8),a); \
1277 else if (test_mode == PCRE16_MODE) pcre2_maketables_free_16(G(c,16),a); \
1278 else pcre2_maketables_free_32(G(c,32),a)
1279
1280 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1281 if (test_mode == PCRE8_MODE) \
1282 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1283 else if (test_mode == PCRE16_MODE) \
1284 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1285 else \
1286 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1287
1288 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1289 if (test_mode == PCRE8_MODE) \
1290 G(a,8) = pcre2_match_data_create_8(b,G(c,8)); \
1291 else if (test_mode == PCRE16_MODE) \
1292 G(a,16) = pcre2_match_data_create_16(b,G(c,16)); \
1293 else \
1294 G(a,32) = pcre2_match_data_create_32(b,G(c,32))
1295
1296 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1297 if (test_mode == PCRE8_MODE) \
1298 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8)); \
1299 else if (test_mode == PCRE16_MODE) \
1300 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16)); \
1301 else \
1302 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32))
1303
1304 #define PCRE2_MATCH_DATA_FREE(a) \
1305 if (test_mode == PCRE8_MODE) \
1306 pcre2_match_data_free_8(G(a,8)); \
1307 else if (test_mode == PCRE16_MODE) \
1308 pcre2_match_data_free_16(G(a,16)); \
1309 else \
1310 pcre2_match_data_free_32(G(a,32))
1311
1312 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1313 if (test_mode == PCRE8_MODE) \
1314 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1315 else if (test_mode == PCRE16_MODE) \
1316 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1317 else \
1318 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1319
1320 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1321 if (test_mode == PCRE8_MODE) \
1322 a = pcre2_pattern_info_8(G(b,8),c,d); \
1323 else if (test_mode == PCRE16_MODE) \
1324 a = pcre2_pattern_info_16(G(b,16),c,d); \
1325 else \
1326 a = pcre2_pattern_info_32(G(b,32),c,d)
1327
1328 #define PCRE2_PRINTINT(a) \
1329 if (test_mode == PCRE8_MODE) \
1330 pcre2_printint_8(compiled_code8,outfile,a); \
1331 else if (test_mode == PCRE16_MODE) \
1332 pcre2_printint_16(compiled_code16,outfile,a); \
1333 else \
1334 pcre2_printint_32(compiled_code32,outfile,a)
1335
1336 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1337 if (test_mode == PCRE8_MODE) \
1338 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1339 else if (test_mode == PCRE16_MODE) \
1340 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1341 else \
1342 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1343
1344 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1345 if (test_mode == PCRE8_MODE) \
1346 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1347 else if (test_mode == PCRE16_MODE) \
1348 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1349 else \
1350 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1351
1352 #define PCRE2_SERIALIZE_FREE(a) \
1353 if (test_mode == PCRE8_MODE) \
1354 pcre2_serialize_free_8(a); \
1355 else if (test_mode == PCRE16_MODE) \
1356 pcre2_serialize_free_16(a); \
1357 else \
1358 pcre2_serialize_free_32(a)
1359
1360 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1361 if (test_mode == PCRE8_MODE) \
1362 r = pcre2_serialize_get_number_of_codes_8(a); \
1363 else if (test_mode == PCRE16_MODE) \
1364 r = pcre2_serialize_get_number_of_codes_16(a); \
1365 else \
1366 r = pcre2_serialize_get_number_of_codes_32(a); \
1367
1368 #define PCRE2_SET_CALLOUT(a,b,c) \
1369 if (test_mode == PCRE8_MODE) \
1370 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1371 else if (test_mode == PCRE16_MODE) \
1372 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1373 else \
1374 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1375
1376 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1377 if (test_mode == PCRE8_MODE) \
1378 pcre2_set_character_tables_8(G(a,8),b); \
1379 else if (test_mode == PCRE16_MODE) \
1380 pcre2_set_character_tables_16(G(a,16),b); \
1381 else \
1382 pcre2_set_character_tables_32(G(a,32),b)
1383
1384 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1385 if (test_mode == PCRE8_MODE) \
1386 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1387 else if (test_mode == PCRE16_MODE) \
1388 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1389 else \
1390 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1391
1392 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1393 if (test_mode == PCRE8_MODE) \
1394 pcre2_set_depth_limit_8(G(a,8),b); \
1395 else if (test_mode == PCRE16_MODE) \
1396 pcre2_set_depth_limit_16(G(a,16),b); \
1397 else \
1398 pcre2_set_depth_limit_32(G(a,32),b)
1399
1400 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1401 if (test_mode == PCRE8_MODE) \
1402 r = pcre2_set_glob_separator_8(G(a,8),b); \
1403 else if (test_mode == PCRE16_MODE) \
1404 r = pcre2_set_glob_separator_16(G(a,16),b); \
1405 else \
1406 r = pcre2_set_glob_separator_32(G(a,32),b)
1407
1408 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1409 if (test_mode == PCRE8_MODE) \
1410 r = pcre2_set_glob_escape_8(G(a,8),b); \
1411 else if (test_mode == PCRE16_MODE) \
1412 r = pcre2_set_glob_escape_16(G(a,16),b); \
1413 else \
1414 r = pcre2_set_glob_escape_32(G(a,32),b)
1415
1416 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1417 if (test_mode == PCRE8_MODE) \
1418 pcre2_set_heap_limit_8(G(a,8),b); \
1419 else if (test_mode == PCRE16_MODE) \
1420 pcre2_set_heap_limit_16(G(a,16),b); \
1421 else \
1422 pcre2_set_heap_limit_32(G(a,32),b)
1423
1424 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1425 if (test_mode == PCRE8_MODE) \
1426 pcre2_set_match_limit_8(G(a,8),b); \
1427 else if (test_mode == PCRE16_MODE) \
1428 pcre2_set_match_limit_16(G(a,16),b); \
1429 else \
1430 pcre2_set_match_limit_32(G(a,32),b)
1431
1432 #define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) \
1433 if (test_mode == PCRE8_MODE) \
1434 pcre2_set_max_pattern_compiled_length_8(G(a,8),b); \
1435 else if (test_mode == PCRE16_MODE) \
1436 pcre2_set_max_pattern_compiled_length_16(G(a,16),b); \
1437 else \
1438 pcre2_set_max_pattern_compiled_length_32(G(a,32),b)
1439
1440 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1441 if (test_mode == PCRE8_MODE) \
1442 pcre2_set_max_pattern_length_8(G(a,8),b); \
1443 else if (test_mode == PCRE16_MODE) \
1444 pcre2_set_max_pattern_length_16(G(a,16),b); \
1445 else \
1446 pcre2_set_max_pattern_length_32(G(a,32),b)
1447
1448 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) \
1449 if (test_mode == PCRE8_MODE) \
1450 pcre2_set_max_varlookbehind_8(G(a,8),b); \
1451 else if (test_mode == PCRE16_MODE) \
1452 pcre2_set_max_varlookbehind_16(G(a,16),b); \
1453 else \
1454 pcre2_set_max_varlookbehind_32(G(a,32),b)
1455
1456 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1457 if (test_mode == PCRE8_MODE) \
1458 pcre2_set_offset_limit_8(G(a,8),b); \
1459 else if (test_mode == PCRE16_MODE) \
1460 pcre2_set_offset_limit_16(G(a,16),b); \
1461 else \
1462 pcre2_set_offset_limit_32(G(a,32),b)
1463
1464 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1465 if (test_mode == PCRE8_MODE) \
1466 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1467 else if (test_mode == PCRE16_MODE) \
1468 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1469 else \
1470 pcre2_set_parens_nest_limit_32(G(a,32),b)
1471
1472 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1473 if (test_mode == PCRE8_MODE) \
1474 pcre2_set_substitute_callout_8(G(a,8), \
1475 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1476 else if (test_mode == PCRE16_MODE) \
1477 pcre2_set_substitute_callout_16(G(a,16), \
1478 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1479 else \
1480 pcre2_set_substitute_callout_32(G(a,32), \
1481 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1482
1483 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1484 if (test_mode == PCRE8_MODE) \
1485 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1486 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1487 else if (test_mode == PCRE16_MODE) \
1488 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1489 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1490 else \
1491 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1492 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1493
1494 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1495 if (test_mode == PCRE8_MODE) \
1496 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1497 else if (test_mode == PCRE16_MODE) \
1498 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1499 else \
1500 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1501
1502 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1503 if (test_mode == PCRE8_MODE) \
1504 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1505 else if (test_mode == PCRE16_MODE) \
1506 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1507 else \
1508 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1509
1510 #define PCRE2_SUBSTRING_FREE(a) \
1511 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1512 else if (test_mode == PCRE16_MODE) \
1513 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1514 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1515
1516 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1517 if (test_mode == PCRE8_MODE) \
1518 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1519 else if (test_mode == PCRE16_MODE) \
1520 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1521 else \
1522 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1523
1524 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1525 if (test_mode == PCRE8_MODE) \
1526 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1527 else if (test_mode == PCRE16_MODE) \
1528 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1529 else \
1530 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1531
1532 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1533 if (test_mode == PCRE8_MODE) \
1534 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1535 else if (test_mode == PCRE16_MODE) \
1536 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1537 else \
1538 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1539
1540 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1541 if (test_mode == PCRE8_MODE) \
1542 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1543 else if (test_mode == PCRE16_MODE) \
1544 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1545 else \
1546 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1547
1548 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1549 if (test_mode == PCRE8_MODE) \
1550 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1551 else if (test_mode == PCRE16_MODE) \
1552 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1553 else \
1554 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1555
1556 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1557 if (test_mode == PCRE8_MODE) \
1558 pcre2_substring_list_free_8((PCRE2_UCHAR8 **)a); \
1559 else if (test_mode == PCRE16_MODE) \
1560 pcre2_substring_list_free_16((PCRE2_UCHAR16 **)a); \
1561 else \
1562 pcre2_substring_list_free_32((PCRE2_UCHAR32 **)a)
1563
1564 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1565 if (test_mode == PCRE8_MODE) \
1566 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1567 else if (test_mode == PCRE16_MODE) \
1568 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1569 else \
1570 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1571
1572 #define PTR(x) ( \
1573 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1574 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1575 (void *)G(x,32))
1576
1577 #define SETFLD(x,y,z) \
1578 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1579 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1580 else G(x,32)->y = z
1581
1582 #define SETFLDVEC(x,y,v,z) \
1583 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1584 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1585 else G(x,32)->y[v] = z
1586
1587 #define SETOP(x,y,z) \
1588 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1589 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1590 else G(x,32) z y
1591
1592 #define SETCASTPTR(x,y) \
1593 if (test_mode == PCRE8_MODE) \
1594 G(x,8) = (uint8_t *)(y); \
1595 else if (test_mode == PCRE16_MODE) \
1596 G(x,16) = (uint16_t *)(y); \
1597 else \
1598 G(x,32) = (uint32_t *)(y)
1599
1600 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1601 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1602 ((int)strlen32((PCRE2_SPTR32)p)))
1603
1604 #define SUB1(a,b) \
1605 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1606 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1607 else G(a,32)(G(b,32))
1608
1609 #define SUB2(a,b,c) \
1610 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1611 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1612 else G(a,32)(G(b,32),G(c,32))
1613
1614 #define TEST(x,r,y) ( \
1615 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1616 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1617 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1618
1619 #define TESTFLD(x,f,r,y) ( \
1620 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1621 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1622 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1623
1624
1625 /* ----- Two out of three modes are supported ----- */
1626
1627 #else
1628
1629 /* We can use some macro trickery to make a single set of definitions work in
1630 the three different cases. */
1631
1632 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1633
1634 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1635 #define BITONE 32
1636 #define BITTWO 16
1637
1638 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1639
1640 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1641 #define BITONE 32
1642 #define BITTWO 8
1643
1644 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1645
1646 #else
1647 #define BITONE 16
1648 #define BITTWO 8
1649 #endif
1650
1651
1652 /* ----- Common macros for two-mode cases ----- */
1653
1654 #define BYTEONE (BITONE/8)
1655 #define BYTETWO (BITTWO/8)
1656
1657 #define CASTFLD(t,a,b) \
1658 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1659 (t)(G(a,BITTWO)->b))
1660
1661 #define CASTVAR(t,x) ( \
1662 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1663 (t)G(x,BITONE) : (t)G(x,BITTWO))
1664
1665 #define CODE_UNIT(a,b) ( \
1666 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1667 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1668 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1669
1670 #define CONCTXCPY(a,b) \
1671 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1672 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1673 else \
1674 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1675
1676 #define CONVERT_COPY(a,b,c) \
1677 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1678 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1679 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1680
1681 #define DATCTXCPY(a,b) \
1682 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1683 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1684 else \
1685 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1686
1687 #define FLD(a,b) \
1688 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1689
1690 #define PATCTXCPY(a,b) \
1691 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1692 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1693 else \
1694 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1695
1696 #define PCHARS(lv, p, offset, len, utf, f) \
1697 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1698 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1699 else \
1700 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1701
1702 #define PCHARSV(p, offset, len, utf, f) \
1703 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1704 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1705 else \
1706 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1707
1708 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1709 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1710 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1711 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1712 else \
1713 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1714 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1715
1716 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1717 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1718 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1719 else \
1720 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1721
1722 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1723 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1724 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1725 else \
1726 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1727
1728 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1729 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1730 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1731 else \
1732 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1733
1734 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1735 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1736 G(a,BITONE) = G(pcre2_compile_,BITONE)(b,c,d,e,f,g); \
1737 else \
1738 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(b,c,d,e,f,g)
1739
1740 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1741 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1742 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1743 else \
1744 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1745
1746 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1747 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1748 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1749 G(g,BITONE),h,i,j); \
1750 else \
1751 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1752 G(g,BITTWO),h,i,j)
1753
1754 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1755 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1756 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1757 else \
1758 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1759
1760 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
1761 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1762 r = G(pcre2_get_match_data_heapframes_size_,BITONE)(G(a,BITONE)); \
1763 else \
1764 r = G(pcre2_get_match_data_heapframes_size_,BITTWO)(G(a,BITTWO))
1765
1766 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1767 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1768 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1769 else \
1770 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1771
1772 #define PCRE2_GET_STARTCHAR(a,b) \
1773 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1774 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1775 else \
1776 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1777
1778 #define PCRE2_JIT_COMPILE(r,a,b) \
1779 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1780 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1781 else \
1782 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1783
1784 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1785 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1786 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1787 else \
1788 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1789
1790 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1791 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1792 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1793 G(g,BITONE),h); \
1794 else \
1795 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1796 G(g,BITTWO),h)
1797
1798 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1799 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1800 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1801 else \
1802 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1803
1804 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1805 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1806 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1807 else \
1808 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1809
1810 #define PCRE2_JIT_STACK_FREE(a) \
1811 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1812 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1813 else \
1814 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1815
1816 #define PCRE2_MAKETABLES(a,c) \
1817 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1818 a = G(pcre2_maketables_,BITONE)(G(c,BITONE)); \
1819 else \
1820 a = G(pcre2_maketables_,BITTWO)(G(c,BITTWO))
1821
1822 #define PCRE2_MAKETABLES_FREE(c,a) \
1823 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1824 G(pcre2_maketables_free_,BITONE)(G(c,BITONE),a); \
1825 else \
1826 G(pcre2_maketables_free_,BITTWO)(G(c,BITTWO),a)
1827
1828 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1829 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1830 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1831 G(g,BITONE),h); \
1832 else \
1833 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1834 G(g,BITTWO),h)
1835
1836 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1837 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1838 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,G(c,BITONE)); \
1839 else \
1840 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,G(c,BITTWO))
1841
1842 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1843 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1844 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1845 else \
1846 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1847
1848 #define PCRE2_MATCH_DATA_FREE(a) \
1849 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1850 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1851 else \
1852 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1853
1854 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1855 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1856 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1857 else \
1858 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1859
1860 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1861 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1862 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1863 else \
1864 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1865
1866 #define PCRE2_PRINTINT(a) \
1867 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1868 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1869 else \
1870 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1871
1872 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1873 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1874 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1875 else \
1876 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1877
1878 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1879 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1880 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1881 else \
1882 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1883
1884 #define PCRE2_SERIALIZE_FREE(a) \
1885 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1886 G(pcre2_serialize_free_,BITONE)(a); \
1887 else \
1888 G(pcre2_serialize_free_,BITTWO)(a)
1889
1890 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1891 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1892 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1893 else \
1894 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1895
1896 #define PCRE2_SET_CALLOUT(a,b,c) \
1897 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1898 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1899 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1900 else \
1901 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1902 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1903
1904 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1905 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1906 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1907 else \
1908 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1909
1910 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1911 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1912 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1913 else \
1914 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1915
1916 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1917 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1918 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1919 else \
1920 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1921
1922 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1923 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1924 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1925 else \
1926 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1927
1928 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1929 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1930 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1931 else \
1932 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1933
1934 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1935 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1936 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1937 else \
1938 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1939
1940 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1941 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1942 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1943 else \
1944 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1945
1946 #define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) \
1947 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1948 G(pcre2_set_max_pattern_compiled_length_,BITONE)(G(a,BITONE),b); \
1949 else \
1950 G(pcre2_set_max_pattern_compiled_length_,BITTWO)(G(a,BITTWO),b)
1951
1952 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1953 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1954 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1955 else \
1956 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1957
1958 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) \
1959 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1960 G(pcre2_set_max_varlookbehind_,BITONE)(G(a,BITONE),b); \
1961 else \
1962 G(pcre2_set_max_varlookbehind_,BITTWO)(G(a,BITTWO),b)
1963
1964 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1965 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1966 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1967 else \
1968 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1969
1970 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1971 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1972 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1973 else \
1974 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1975
1976 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1977 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1978 G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1979 (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1980 else \
1981 G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1982 (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1983
1984 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1985 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1986 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1987 G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1988 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1989 else \
1990 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1991 G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1992 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1993
1994 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1995 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1996 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1997 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1998 else \
1999 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
2000 (G(PCRE2_UCHAR,BITTWO) *)d,e)
2001
2002 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2003 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2004 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
2005 (G(PCRE2_UCHAR,BITONE) *)d,e); \
2006 else \
2007 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
2008 (G(PCRE2_UCHAR,BITTWO) *)d,e)
2009
2010 #define PCRE2_SUBSTRING_FREE(a) \
2011 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2012 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
2013 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
2014
2015 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2016 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2017 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
2018 (G(PCRE2_UCHAR,BITONE) **)d,e); \
2019 else \
2020 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
2021 (G(PCRE2_UCHAR,BITTWO) **)d,e)
2022
2023 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2024 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2025 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
2026 (G(PCRE2_UCHAR,BITONE) **)d,e); \
2027 else \
2028 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
2029 (G(PCRE2_UCHAR,BITTWO) **)d,e)
2030
2031 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2032 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2033 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
2034 else \
2035 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
2036
2037 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2038 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2039 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
2040 else \
2041 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
2042
2043 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2044 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2045 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
2046 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
2047 else \
2048 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
2049 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
2050
2051 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2052 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2053 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_UCHAR,BITONE) **)a); \
2054 else \
2055 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) **)a)
2056
2057 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2058 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2059 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
2060 else \
2061 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
2062
2063 #define PTR(x) ( \
2064 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
2065 (void *)G(x,BITTWO))
2066
2067 #define SETFLD(x,y,z) \
2068 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
2069 else G(x,BITTWO)->y = z
2070
2071 #define SETFLDVEC(x,y,v,z) \
2072 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
2073 else G(x,BITTWO)->y[v] = z
2074
2075 #define SETOP(x,y,z) \
2076 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
2077 else G(x,BITTWO) z y
2078
2079 #define SETCASTPTR(x,y) \
2080 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2081 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
2082 else \
2083 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
2084
2085 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
2086 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
2087 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
2088
2089 #define SUB1(a,b) \
2090 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2091 G(a,BITONE)(G(b,BITONE)); \
2092 else \
2093 G(a,BITTWO)(G(b,BITTWO))
2094
2095 #define SUB2(a,b,c) \
2096 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2097 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2098 else \
2099 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2100
2101 #define TEST(x,r,y) ( \
2102 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2103 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2104
2105 #define TESTFLD(x,f,r,y) ( \
2106 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2107 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2108
2109
2110 #endif /* Two out of three modes */
2111
2112 /* ----- End of cases where more than one mode is supported ----- */
2113
2114
2115 /* ----- Only 8-bit mode is supported ----- */
2116
2117 #elif defined SUPPORT_PCRE2_8
2118 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2119 #define CASTVAR(t,x) (t)G(x,8)
2120 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2121 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2122 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2123 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2124 #define FLD(a,b) G(a,8)->b
2125 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2126 #define PCHARS(lv, p, offset, len, utf, f) \
2127 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2128 #define PCHARSV(p, offset, len, utf, f) \
2129 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2130 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2131 a = pcre2_callout_enumerate_8(compiled_code8, \
2132 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2133 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2134 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2135 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2136 #define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,8) = pcre2_compile_8(b,c,d,e,f,g)
2137 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2138 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2139 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2140 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2141 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2142 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2143 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
2144 r = pcre2_get_match_data_heapframes_size_8(G(a,8))
2145 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2146 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2147 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2148 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2149 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2150 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2151 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2152 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2153 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2154 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2155 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2156 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_8(G(c,8))
2157 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_8(G(c,8),a)
2158 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2159 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2160 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,G(c,8))
2161 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2162 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8))
2163 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2164 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2165 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2166 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2167 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2168 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2169 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2170 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2171 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2172 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2173 r = pcre2_serialize_get_number_of_codes_8(a)
2174 #define PCRE2_SET_CALLOUT(a,b,c) \
2175 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2176 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2177 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2178 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2179 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2180 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2181 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2182 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2183 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2184 #define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) pcre2_set_max_pattern_compiled_length_8(G(a,8),b)
2185 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2186 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_8(G(a,8),b)
2187 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2188 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2189 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2190 pcre2_set_substitute_callout_8(G(a,8), \
2191 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2192 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2193 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2194 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2195 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2196 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2197 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2198 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2199 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2200 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2201 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2202 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2203 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2204 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2205 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2206 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2207 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2208 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2209 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2210 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2211 pcre2_substring_list_free_8((PCRE2_UCHAR8 **)a)
2212 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2213 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2214 #define PTR(x) (void *)G(x,8)
2215 #define SETFLD(x,y,z) G(x,8)->y = z
2216 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2217 #define SETOP(x,y,z) G(x,8) z y
2218 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2219 #define STRLEN(p) (int)strlen((char *)p)
2220 #define SUB1(a,b) G(a,8)(G(b,8))
2221 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2222 #define TEST(x,r,y) (G(x,8) r (y))
2223 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2224
2225
2226 /* ----- Only 16-bit mode is supported ----- */
2227
2228 #elif defined SUPPORT_PCRE2_16
2229 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2230 #define CASTVAR(t,x) (t)G(x,16)
2231 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2232 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2233 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2234 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2235 #define FLD(a,b) G(a,16)->b
2236 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2237 #define PCHARS(lv, p, offset, len, utf, f) \
2238 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2239 #define PCHARSV(p, offset, len, utf, f) \
2240 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2241 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2242 a = pcre2_callout_enumerate_16(compiled_code16, \
2243 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2244 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2245 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2246 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2247 #define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,16) = pcre2_compile_16(b,c,d,e,f,g)
2248 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2249 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2250 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2251 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2252 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2253 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2254 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2255 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
2256 r = pcre2_get_match_data_heapframes_size_16(G(a,16))
2257 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2258 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2259 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2260 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2261 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2262 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2263 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2264 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2265 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2266 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2267 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_16(G(c,16))
2268 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_16(G(c,16),a)
2269 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2270 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2271 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,G(c,16))
2272 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2273 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16))
2274 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2275 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2276 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2277 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2278 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2279 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2280 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2281 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2282 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2283 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2284 r = pcre2_serialize_get_number_of_codes_16(a)
2285 #define PCRE2_SET_CALLOUT(a,b,c) \
2286 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2287 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2288 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2289 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2290 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2291 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2292 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2293 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2294 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2295 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_16(G(a,16),b)
2296 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2297 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2298 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2299 pcre2_set_substitute_callout_16(G(a,16), \
2300 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2301 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2302 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2303 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2304 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2305 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2306 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2307 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2308 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2309 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2310 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2311 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2312 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2313 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2314 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2315 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2316 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2317 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2318 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2319 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2320 pcre2_substring_list_free_16((PCRE2_UCHAR16 **)a)
2321 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2322 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2323 #define PTR(x) (void *)G(x,16)
2324 #define SETFLD(x,y,z) G(x,16)->y = z
2325 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2326 #define SETOP(x,y,z) G(x,16) z y
2327 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2328 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2329 #define SUB1(a,b) G(a,16)(G(b,16))
2330 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2331 #define TEST(x,r,y) (G(x,16) r (y))
2332 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2333
2334
2335 /* ----- Only 32-bit mode is supported ----- */
2336
2337 #elif defined SUPPORT_PCRE2_32
2338 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2339 #define CASTVAR(t,x) (t)G(x,32)
2340 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2341 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2342 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2343 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2344 #define FLD(a,b) G(a,32)->b
2345 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2346 #define PCHARS(lv, p, offset, len, utf, f) \
2347 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2348 #define PCHARSV(p, offset, len, utf, f) \
2349 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2350 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2351 a = pcre2_callout_enumerate_32(compiled_code32, \
2352 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2353 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2354 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2355 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2356 #define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,32) = pcre2_compile_32(b,c,d,e,f,g)
2357 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2358 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2359 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2360 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2361 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2362 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2363 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2364 #define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \
2365 r = pcre2_get_match_data_heapframes_size_32(G(a,32))
2366 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2367 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2368 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2369 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2370 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2371 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2372 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2373 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2374 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2375 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2376 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_32(G(c,32))
2377 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_32(G(c,32),a)
2378 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2379 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2380 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,G(c,32))
2381 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2382 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32))
2383 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2384 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2385 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2386 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2387 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2388 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2389 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2390 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2391 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2392 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2393 r = pcre2_serialize_get_number_of_codes_32(a)
2394 #define PCRE2_SET_CALLOUT(a,b,c) \
2395 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2396 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2397 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2398 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2399 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2400 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2401 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2402 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2403 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2404 #define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_32(G(a,32),b)
2405 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2406 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2407 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2408 pcre2_set_substitute_callout_32(G(a,32), \
2409 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2410 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2411 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2412 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2413 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2414 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2415 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2416 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2417 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2418 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2419 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2420 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2421 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2422 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2423 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2424 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2425 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2426 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2427 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2428 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2429 pcre2_substring_list_free_32((PCRE2_UCHAR32 **)a)
2430 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2431 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2432 #define PTR(x) (void *)G(x,32)
2433 #define SETFLD(x,y,z) G(x,32)->y = z
2434 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2435 #define SETOP(x,y,z) G(x,32) z y
2436 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2437 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2438 #define SUB1(a,b) G(a,32)(G(b,32))
2439 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2440 #define TEST(x,r,y) (G(x,32) r (y))
2441 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2442
2443 #endif
2444
2445 /* ----- End of mode-specific function call macros ----- */
2446
2447
2448
2449
2450 /*************************************************
2451 * Alternate character tables *
2452 *************************************************/
2453
2454 /* By default, the "tables" pointer in the compile context when calling
2455 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2456 library. However, the tables modifier can be used to select alternate sets of
2457 tables, for different kinds of testing. Note that the locale modifier also
2458 adjusts the tables. */
2459
2460 /* This is the set of tables distributed as default with PCRE2. It recognizes
2461 only ASCII characters. */
2462
2463 static const uint8_t tables1[] = {
2464
2465 /* This table is a lower casing table. */
2466
2467 0, 1, 2, 3, 4, 5, 6, 7,
2468 8, 9, 10, 11, 12, 13, 14, 15,
2469 16, 17, 18, 19, 20, 21, 22, 23,
2470 24, 25, 26, 27, 28, 29, 30, 31,
2471 32, 33, 34, 35, 36, 37, 38, 39,
2472 40, 41, 42, 43, 44, 45, 46, 47,
2473 48, 49, 50, 51, 52, 53, 54, 55,
2474 56, 57, 58, 59, 60, 61, 62, 63,
2475 64, 97, 98, 99,100,101,102,103,
2476 104,105,106,107,108,109,110,111,
2477 112,113,114,115,116,117,118,119,
2478 120,121,122, 91, 92, 93, 94, 95,
2479 96, 97, 98, 99,100,101,102,103,
2480 104,105,106,107,108,109,110,111,
2481 112,113,114,115,116,117,118,119,
2482 120,121,122,123,124,125,126,127,
2483 128,129,130,131,132,133,134,135,
2484 136,137,138,139,140,141,142,143,
2485 144,145,146,147,148,149,150,151,
2486 152,153,154,155,156,157,158,159,
2487 160,161,162,163,164,165,166,167,
2488 168,169,170,171,172,173,174,175,
2489 176,177,178,179,180,181,182,183,
2490 184,185,186,187,188,189,190,191,
2491 192,193,194,195,196,197,198,199,
2492 200,201,202,203,204,205,206,207,
2493 208,209,210,211,212,213,214,215,
2494 216,217,218,219,220,221,222,223,
2495 224,225,226,227,228,229,230,231,
2496 232,233,234,235,236,237,238,239,
2497 240,241,242,243,244,245,246,247,
2498 248,249,250,251,252,253,254,255,
2499
2500 /* This table is a case flipping table. */
2501
2502 0, 1, 2, 3, 4, 5, 6, 7,
2503 8, 9, 10, 11, 12, 13, 14, 15,
2504 16, 17, 18, 19, 20, 21, 22, 23,
2505 24, 25, 26, 27, 28, 29, 30, 31,
2506 32, 33, 34, 35, 36, 37, 38, 39,
2507 40, 41, 42, 43, 44, 45, 46, 47,
2508 48, 49, 50, 51, 52, 53, 54, 55,
2509 56, 57, 58, 59, 60, 61, 62, 63,
2510 64, 97, 98, 99,100,101,102,103,
2511 104,105,106,107,108,109,110,111,
2512 112,113,114,115,116,117,118,119,
2513 120,121,122, 91, 92, 93, 94, 95,
2514 96, 65, 66, 67, 68, 69, 70, 71,
2515 72, 73, 74, 75, 76, 77, 78, 79,
2516 80, 81, 82, 83, 84, 85, 86, 87,
2517 88, 89, 90,123,124,125,126,127,
2518 128,129,130,131,132,133,134,135,
2519 136,137,138,139,140,141,142,143,
2520 144,145,146,147,148,149,150,151,
2521 152,153,154,155,156,157,158,159,
2522 160,161,162,163,164,165,166,167,
2523 168,169,170,171,172,173,174,175,
2524 176,177,178,179,180,181,182,183,
2525 184,185,186,187,188,189,190,191,
2526 192,193,194,195,196,197,198,199,
2527 200,201,202,203,204,205,206,207,
2528 208,209,210,211,212,213,214,215,
2529 216,217,218,219,220,221,222,223,
2530 224,225,226,227,228,229,230,231,
2531 232,233,234,235,236,237,238,239,
2532 240,241,242,243,244,245,246,247,
2533 248,249,250,251,252,253,254,255,
2534
2535 /* This table contains bit maps for various character classes. Each map is 32
2536 bytes long and the bits run from the least significant end of each byte. The
2537 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2538 graph, print, punct, and cntrl. Other classes are built from combinations. */
2539
2540 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2543 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2544
2545 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2546 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2547 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2548 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2549
2550 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2554
2555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2556 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2557 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2558 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2559
2560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2561 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2563 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2564
2565 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2566 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2567 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2568 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2569
2570 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2571 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2572 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2573 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2574
2575 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2576 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2579
2580 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2581 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2582 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2583 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2584
2585 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2589
2590 /* This table identifies various classes of character by individual bits:
2591 0x01 white space character
2592 0x02 letter
2593 0x04 decimal digit
2594 0x08 hexadecimal digit
2595 0x10 alphanumeric or '_'
2596 0x80 regular expression metacharacter or binary zero
2597 */
2598
2599 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2600 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2603 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2604 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2605 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2606 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2607 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2608 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2609 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2610 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2611 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2612 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2613 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2614 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2624 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2625 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2626 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2627 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2628 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2631
2632 /* This is a set of tables that came originally from a Windows user. It seems
2633 to be at least an approximation of ISO 8859. In particular, there are
2634 characters greater than 128 that are marked as spaces, letters, etc. */
2635
2636 static const uint8_t tables2[] = {
2637 0,1,2,3,4,5,6,7,
2638 8,9,10,11,12,13,14,15,
2639 16,17,18,19,20,21,22,23,
2640 24,25,26,27,28,29,30,31,
2641 32,33,34,35,36,37,38,39,
2642 40,41,42,43,44,45,46,47,
2643 48,49,50,51,52,53,54,55,
2644 56,57,58,59,60,61,62,63,
2645 64,97,98,99,100,101,102,103,
2646 104,105,106,107,108,109,110,111,
2647 112,113,114,115,116,117,118,119,
2648 120,121,122,91,92,93,94,95,
2649 96,97,98,99,100,101,102,103,
2650 104,105,106,107,108,109,110,111,
2651 112,113,114,115,116,117,118,119,
2652 120,121,122,123,124,125,126,127,
2653 128,129,130,131,132,133,134,135,
2654 136,137,138,139,140,141,142,143,
2655 144,145,146,147,148,149,150,151,
2656 152,153,154,155,156,157,158,159,
2657 160,161,162,163,164,165,166,167,
2658 168,169,170,171,172,173,174,175,
2659 176,177,178,179,180,181,182,183,
2660 184,185,186,187,188,189,190,191,
2661 224,225,226,227,228,229,230,231,
2662 232,233,234,235,236,237,238,239,
2663 240,241,242,243,244,245,246,215,
2664 248,249,250,251,252,253,254,223,
2665 224,225,226,227,228,229,230,231,
2666 232,233,234,235,236,237,238,239,
2667 240,241,242,243,244,245,246,247,
2668 248,249,250,251,252,253,254,255,
2669 0,1,2,3,4,5,6,7,
2670 8,9,10,11,12,13,14,15,
2671 16,17,18,19,20,21,22,23,
2672 24,25,26,27,28,29,30,31,
2673 32,33,34,35,36,37,38,39,
2674 40,41,42,43,44,45,46,47,
2675 48,49,50,51,52,53,54,55,
2676 56,57,58,59,60,61,62,63,
2677 64,97,98,99,100,101,102,103,
2678 104,105,106,107,108,109,110,111,
2679 112,113,114,115,116,117,118,119,
2680 120,121,122,91,92,93,94,95,
2681 96,65,66,67,68,69,70,71,
2682 72,73,74,75,76,77,78,79,
2683 80,81,82,83,84,85,86,87,
2684 88,89,90,123,124,125,126,127,
2685 128,129,130,131,132,133,134,135,
2686 136,137,138,139,140,141,142,143,
2687 144,145,146,147,148,149,150,151,
2688 152,153,154,155,156,157,158,159,
2689 160,161,162,163,164,165,166,167,
2690 168,169,170,171,172,173,174,175,
2691 176,177,178,179,180,181,182,183,
2692 184,185,186,187,188,189,190,191,
2693 224,225,226,227,228,229,230,231,
2694 232,233,234,235,236,237,238,239,
2695 240,241,242,243,244,245,246,215,
2696 248,249,250,251,252,253,254,223,
2697 192,193,194,195,196,197,198,199,
2698 200,201,202,203,204,205,206,207,
2699 208,209,210,211,212,213,214,247,
2700 216,217,218,219,220,221,222,255,
2701 0,62,0,0,1,0,0,0,
2702 0,0,0,0,0,0,0,0,
2703 32,0,0,0,1,0,0,0,
2704 0,0,0,0,0,0,0,0,
2705 0,0,0,0,0,0,255,3,
2706 126,0,0,0,126,0,0,0,
2707 0,0,0,0,0,0,0,0,
2708 0,0,0,0,0,0,0,0,
2709 0,0,0,0,0,0,255,3,
2710 0,0,0,0,0,0,0,0,
2711 0,0,0,0,0,0,12,2,
2712 0,0,0,0,0,0,0,0,
2713 0,0,0,0,0,0,0,0,
2714 254,255,255,7,0,0,0,0,
2715 0,0,0,0,0,0,0,0,
2716 255,255,127,127,0,0,0,0,
2717 0,0,0,0,0,0,0,0,
2718 0,0,0,0,254,255,255,7,
2719 0,0,0,0,0,4,32,4,
2720 0,0,0,128,255,255,127,255,
2721 0,0,0,0,0,0,255,3,
2722 254,255,255,135,254,255,255,7,
2723 0,0,0,0,0,4,44,6,
2724 255,255,127,255,255,255,127,255,
2725 0,0,0,0,254,255,255,255,
2726 255,255,255,255,255,255,255,127,
2727 0,0,0,0,254,255,255,255,
2728 255,255,255,255,255,255,255,255,
2729 0,2,0,0,255,255,255,255,
2730 255,255,255,255,255,255,255,127,
2731 0,0,0,0,255,255,255,255,
2732 255,255,255,255,255,255,255,255,
2733 0,0,0,0,254,255,0,252,
2734 1,0,0,248,1,0,0,120,
2735 0,0,0,0,254,255,255,255,
2736 0,0,128,0,0,0,128,0,
2737 255,255,255,255,0,0,0,0,
2738 0,0,0,0,0,0,0,128,
2739 255,255,255,255,0,0,0,0,
2740 0,0,0,0,0,0,0,0,
2741 128,0,0,0,0,0,0,0,
2742 0,1,1,0,1,1,0,0,
2743 0,0,0,0,0,0,0,0,
2744 0,0,0,0,0,0,0,0,
2745 1,0,0,0,128,0,0,0,
2746 128,128,128,128,0,0,128,0,
2747 28,28,28,28,28,28,28,28,
2748 28,28,0,0,0,0,0,128,
2749 0,26,26,26,26,26,26,18,
2750 18,18,18,18,18,18,18,18,
2751 18,18,18,18,18,18,18,18,
2752 18,18,18,128,128,0,128,16,
2753 0,26,26,26,26,26,26,18,
2754 18,18,18,18,18,18,18,18,
2755 18,18,18,18,18,18,18,18,
2756 18,18,18,128,128,0,0,0,
2757 0,0,0,0,0,1,0,0,
2758 0,0,0,0,0,0,0,0,
2759 0,0,0,0,0,0,0,0,
2760 0,0,0,0,0,0,0,0,
2761 1,0,0,0,0,0,0,0,
2762 0,0,18,0,0,0,0,0,
2763 0,0,20,20,0,18,0,0,
2764 0,20,18,0,0,0,0,0,
2765 18,18,18,18,18,18,18,18,
2766 18,18,18,18,18,18,18,18,
2767 18,18,18,18,18,18,18,0,
2768 18,18,18,18,18,18,18,18,
2769 18,18,18,18,18,18,18,18,
2770 18,18,18,18,18,18,18,18,
2771 18,18,18,18,18,18,18,0,
2772 18,18,18,18,18,18,18,18
2773 };
2774
2775
2776
2777 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2778 /*************************************************
2779 * Emulated memmove() for systems without it *
2780 *************************************************/
2781
2782 /* This function can make use of bcopy() if it is available. Otherwise do it by
2783 steam, as there are some non-Unix environments that lack both memmove() and
2784 bcopy(). */
2785
2786 static void *
emulated_memmove(void * d,const void * s,size_t n)2787 emulated_memmove(void *d, const void *s, size_t n)
2788 {
2789 #ifdef HAVE_BCOPY
2790 bcopy(s, d, n);
2791 return d;
2792 #else
2793 size_t i;
2794 unsigned char *dest = (unsigned char *)d;
2795 const unsigned char *src = (const unsigned char *)s;
2796 if (dest > src)
2797 {
2798 dest += n;
2799 src += n;
2800 for (i = 0; i < n; ++i) *(--dest) = *(--src);
2801 return (void *)dest;
2802 }
2803 else
2804 {
2805 for (i = 0; i < n; ++i) *dest++ = *src++;
2806 return (void *)(dest - n);
2807 }
2808 #endif /* not HAVE_BCOPY */
2809 }
2810 #undef memmove
2811 #define memmove(d,s,n) emulated_memmove(d,s,n)
2812 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
2813
2814
2815
2816 #ifndef HAVE_STRERROR
2817 /*************************************************
2818 * Provide strerror() for non-ANSI libraries *
2819 *************************************************/
2820
2821 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2822 libraries. They may no longer be around, but just in case, we can try to
2823 provide the same facility by this simple alternative function. */
2824
2825 extern int sys_nerr;
2826 extern char *sys_errlist[];
2827
2828 char *
strerror(int n)2829 strerror(int n)
2830 {
2831 if (n < 0 || n >= sys_nerr) return "unknown error number";
2832 return sys_errlist[n];
2833 }
2834 #endif /* HAVE_STRERROR */
2835
2836
2837
2838 /*************************************************
2839 * Local memory functions *
2840 *************************************************/
2841
2842 /* Alternative memory functions, to test functionality. */
2843
my_malloc(size_t size,void * data)2844 static void *my_malloc(size_t size, void *data)
2845 {
2846 void *block = malloc(size);
2847 (void)data;
2848 if (show_memory)
2849 {
2850 if (block == NULL)
2851 {
2852 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size);
2853 }
2854 else
2855 {
2856 fprintf(outfile, "malloc %5" SIZ_FORM, size);
2857 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2858 fprintf(outfile, " %p", block); /* Not portable */
2859 #endif
2860 if (malloclistptr < MALLOCLISTSIZE)
2861 {
2862 malloclist[malloclistptr] = block;
2863 malloclistlength[malloclistptr++] = size;
2864 }
2865 else
2866 fprintf(outfile, " (not remembered)");
2867 fprintf(outfile, "\n");
2868 }
2869 }
2870 return block;
2871 }
2872
my_free(void * block,void * data)2873 static void my_free(void *block, void *data)
2874 {
2875 (void)data;
2876 if (show_memory && block != NULL)
2877 {
2878 uint32_t i, j;
2879 BOOL found = FALSE;
2880
2881 fprintf(outfile, "free");
2882 for (i = 0; i < malloclistptr; i++)
2883 {
2884 if (block == malloclist[i])
2885 {
2886 fprintf(outfile, " %5" SIZ_FORM, malloclistlength[i]);
2887 malloclistptr--;
2888 for (j = i; j < malloclistptr; j++)
2889 {
2890 malloclist[j] = malloclist[j+1];
2891 malloclistlength[j] = malloclistlength[j+1];
2892 }
2893 found = TRUE;
2894 break;
2895 }
2896 }
2897 if (!found) fprintf(outfile, " unremembered block");
2898 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2899 fprintf(outfile, " %p", block); /* Not portable */
2900 #endif
2901 fprintf(outfile, "\n");
2902 }
2903 free(block);
2904 }
2905
2906
2907
2908 /*************************************************
2909 * Callback function for stack guard *
2910 *************************************************/
2911
2912 /* This is set up to be called from pcre2_compile() when the stackguard=n
2913 modifier sets a value greater than zero. The test we do is whether the
2914 parenthesis nesting depth is greater than the value set by the modifier.
2915
2916 Argument: the current parenthesis nesting depth
2917 Returns: non-zero to kill the compilation
2918 */
2919
2920 static int
stack_guard(uint32_t depth,void * user_data)2921 stack_guard(uint32_t depth, void *user_data)
2922 {
2923 (void)user_data;
2924 return depth > pat_patctl.stackguard_test;
2925 }
2926
2927
2928 /*************************************************
2929 * JIT memory callback *
2930 *************************************************/
2931
2932 static PCRE2_JIT_STACK*
jit_callback(void * arg)2933 jit_callback(void *arg)
2934 {
2935 jit_was_used = TRUE;
2936 return (PCRE2_JIT_STACK *)arg;
2937 }
2938
2939
2940 /*************************************************
2941 * Convert UTF-8 character to code point *
2942 *************************************************/
2943
2944 /* This function reads one or more bytes that represent a UTF-8 character,
2945 and returns the codepoint of that character. Note that the function supports
2946 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2947 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2948 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2949 checking, and also for generating 32-bit non-UTF data values above the UTF
2950 limit.
2951
2952 Argument:
2953 utf8bytes a pointer to the byte vector
2954 end a pointer to the end of the byte vector
2955 vptr a pointer to an int to receive the value
2956
2957 Returns: > 0 => the number of bytes consumed
2958 -6 to 0 => malformed UTF-8 character at offset = (-return)
2959 */
2960
2961 static int
utf82ord(PCRE2_SPTR8 utf8bytes,PCRE2_SPTR8 end,uint32_t * vptr)2962 utf82ord(PCRE2_SPTR8 utf8bytes, PCRE2_SPTR8 end, uint32_t *vptr)
2963 {
2964 uint32_t c = *utf8bytes++;
2965 uint32_t d = c;
2966 int i, j, s;
2967
2968 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2969 {
2970 if ((d & 0x80) == 0) break;
2971 d <<= 1;
2972 }
2973
2974 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2975 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2976
2977 /* i now has a value in the range 1-5 */
2978
2979 s = 6*i;
2980 d = (c & utf8_table3[i]) << s;
2981
2982 for (j = 0; j < i; j++)
2983 {
2984 if (utf8bytes >= end) return 0;
2985
2986 c = *utf8bytes++;
2987 if ((c & 0xc0) != 0x80) return -(j+1);
2988 s -= 6;
2989 d |= (c & 0x3f) << s;
2990 }
2991
2992 /* Check that encoding was the correct unique one */
2993
2994 for (j = 0; j < utf8_table1_size; j++)
2995 if (d <= (uint32_t)utf8_table1[j]) break;
2996 if (j != i) return -(i+1);
2997
2998 /* Valid value */
2999
3000 *vptr = d;
3001 return i+1;
3002 }
3003
3004
3005
3006 /*************************************************
3007 * Print one character *
3008 *************************************************/
3009
3010 /* Print a single character either literally, or as a hex escape, and count how
3011 many printed characters are used.
3012
3013 Arguments:
3014 c the character
3015 utf TRUE in UTF mode
3016 f the FILE to print to, or NULL just to count characters
3017
3018 Returns: number of characters written
3019 */
3020
3021 static int
pchar(uint32_t c,BOOL utf,FILE * f)3022 pchar(uint32_t c, BOOL utf, FILE *f)
3023 {
3024 int n = 0;
3025 char tempbuffer[16];
3026
3027 if (PRINTOK(c))
3028 {
3029 if (f != NULL) fprintf(f, "%c", c);
3030 return 1;
3031 }
3032
3033 if (c < 0x100)
3034 {
3035 if (utf)
3036 {
3037 if (f != NULL) fprintf(f, "\\x{%02x}", c);
3038 return 6;
3039 }
3040 else
3041 {
3042 if (f != NULL) fprintf(f, "\\x%02x", c);
3043 return 4;
3044 }
3045 }
3046
3047 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
3048 else n = sprintf(tempbuffer, "\\x{%02x}", c);
3049
3050 return n >= 0 ? n : 0;
3051 }
3052
3053
3054
3055 #ifdef SUPPORT_PCRE2_16
3056 /*************************************************
3057 * Find length of 0-terminated 16-bit string *
3058 *************************************************/
3059
strlen16(PCRE2_SPTR16 p)3060 static size_t strlen16(PCRE2_SPTR16 p)
3061 {
3062 PCRE2_SPTR16 pp = p;
3063 while (*pp != 0) pp++;
3064 return (int)(pp - p);
3065 }
3066 #endif /* SUPPORT_PCRE2_16 */
3067
3068
3069
3070 #ifdef SUPPORT_PCRE2_32
3071 /*************************************************
3072 * Find length of 0-terminated 32-bit string *
3073 *************************************************/
3074
strlen32(PCRE2_SPTR32 p)3075 static size_t strlen32(PCRE2_SPTR32 p)
3076 {
3077 PCRE2_SPTR32 pp = p;
3078 while (*pp != 0) pp++;
3079 return (int)(pp - p);
3080 }
3081 #endif /* SUPPORT_PCRE2_32 */
3082
3083
3084 #ifdef SUPPORT_PCRE2_8
3085 /*************************************************
3086 * Print 8-bit character string *
3087 *************************************************/
3088
3089 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
3090 For printing *MARK strings, a negative length is given, indicating that the
3091 length is in the first code unit. If handed a NULL file, this function just
3092 counts chars without printing (because pchar() does that). */
3093
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)3094 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
3095 {
3096 PCRE2_SPTR8 end;
3097 uint32_t c = 0;
3098 int yield = 0;
3099 if (length < 0) length = *p++;
3100 end = p + length;
3101 while (length-- > 0)
3102 {
3103 if (utf)
3104 {
3105 int rc = utf82ord(p, end, &c);
3106 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
3107 {
3108 length -= rc - 1;
3109 p += rc;
3110 yield += pchar(c, utf, f);
3111 continue;
3112 }
3113 }
3114 c = *p++;
3115 yield += pchar(c, utf, f);
3116 }
3117
3118 return yield;
3119 }
3120 #endif
3121
3122
3123 #ifdef SUPPORT_PCRE2_16
3124 /*************************************************
3125 * Print 16-bit character string *
3126 *************************************************/
3127
3128 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3129 For printing *MARK strings, a negative length is given, indicating that the
3130 length is in the first code unit. If handed a NULL file, just counts chars
3131 without printing. */
3132
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3133 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3134 {
3135 int yield = 0;
3136 if (length < 0) length = *p++;
3137 while (length-- > 0)
3138 {
3139 uint32_t c = *p++ & 0xffff;
3140 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3141 {
3142 int d = *p & 0xffff;
3143 if (d >= 0xDC00 && d <= 0xDFFF)
3144 {
3145 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3146 length--;
3147 p++;
3148 }
3149 }
3150 yield += pchar(c, utf, f);
3151 }
3152 return yield;
3153 }
3154 #endif /* SUPPORT_PCRE2_16 */
3155
3156
3157
3158 #ifdef SUPPORT_PCRE2_32
3159 /*************************************************
3160 * Print 32-bit character string *
3161 *************************************************/
3162
3163 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3164 For printing *MARK strings, a negative length is given, indicating that the
3165 length is in the first code unit. If handed a NULL file, just counts chars
3166 without printing. */
3167
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3168 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3169 {
3170 int yield = 0;
3171 (void)(utf); /* Avoid compiler warning */
3172 if (length < 0) length = *p++;
3173 while (length-- > 0)
3174 {
3175 uint32_t c = *p++;
3176 yield += pchar(c, utf, f);
3177 }
3178 return yield;
3179 }
3180 #endif /* SUPPORT_PCRE2_32 */
3181
3182
3183
3184
3185 /*************************************************
3186 * Convert character value to UTF-8 *
3187 *************************************************/
3188
3189 /* This function takes an integer value in the range 0 - 0x7fffffff
3190 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3191 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3192 characters.
3193
3194 Arguments:
3195 cvalue the character value
3196 utf8bytes pointer to buffer for result - at least 6 bytes long
3197
3198 Returns: number of characters placed in the buffer
3199 */
3200
3201 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3202 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3203 {
3204 int i, j;
3205 if (cvalue > 0x7fffffffu)
3206 return -1;
3207 for (i = 0; i < utf8_table1_size; i++)
3208 if (cvalue <= (uint32_t)utf8_table1[i]) break;
3209 utf8bytes += i;
3210 for (j = i; j > 0; j--)
3211 {
3212 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3213 cvalue >>= 6;
3214 }
3215 *utf8bytes = utf8_table2[i] | cvalue;
3216 return i + 1;
3217 }
3218
3219
3220
3221 #ifdef SUPPORT_PCRE2_16
3222 /*************************************************
3223 * Convert string to 16-bit *
3224 *************************************************/
3225
3226 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3227 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3228 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3229 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3230 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3231 greater than 0xffff.
3232
3233 If all the input bytes are ASCII, the space needed for a 16-bit string is
3234 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3235 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3236 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3237 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3238 save repeated re-sizing.
3239
3240 Note that this function does not object to surrogate values. This is
3241 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3242 for the purpose of testing that they are correctly faulted.
3243
3244 Arguments:
3245 p points to a byte string
3246 utf true in UTF mode
3247 lenptr points to number of bytes in the string (excluding trailing zero)
3248
3249 Returns: 0 on success, with the length updated to the number of 16-bit
3250 data items used (excluding the trailing zero)
3251 OR -1 if a UTF-8 string is malformed
3252 OR -2 if a value > 0x10ffff is encountered in UTF mode
3253 OR -3 if a value > 0xffff is encountered when not in UTF mode
3254 */
3255
3256 static int
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3257 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3258 {
3259 uint16_t *pp;
3260 PCRE2_SIZE len = *lenptr;
3261
3262 if (pbuffer16_size < 2*len + 2)
3263 {
3264 if (pbuffer16 != NULL) free(pbuffer16);
3265 pbuffer16_size = 2*len + 2;
3266 if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3267 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3268 if (pbuffer16 == NULL)
3269 {
3270 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3271 pbuffer16_size);
3272 exit(1);
3273 }
3274 }
3275
3276 pp = pbuffer16;
3277 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3278 {
3279 for (; len > 0; len--) *pp++ = *p++;
3280 }
3281 else while (len > 0)
3282 {
3283 uint32_t c;
3284 const uint8_t *end = p + len;
3285 int chlen = utf82ord(p, end, &c);
3286 if (chlen <= 0) return -1;
3287 if (!utf && c > 0xffff) return -3;
3288 if (c > 0x10ffff) return -2;
3289 p += chlen;
3290 len -= chlen;
3291 if (c < 0x10000) *pp++ = c; else
3292 {
3293 c -= 0x10000;
3294 *pp++ = 0xD800 | (c >> 10);
3295 *pp++ = 0xDC00 | (c & 0x3ff);
3296 }
3297 }
3298
3299 *pp = 0;
3300 *lenptr = pp - pbuffer16;
3301 return 0;
3302 }
3303 #endif
3304
3305
3306
3307 #ifdef SUPPORT_PCRE2_32
3308 /*************************************************
3309 * Convert string to 32-bit *
3310 *************************************************/
3311
3312 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3313 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3314 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3315 limit of 0x10ffff cause an error.
3316
3317 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3318 is set, and no limit is imposed. There is special interpretation of the 0xff
3319 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3320 next character to be set. This provides a way of generating 32-bit characters
3321 greater than 0x7fffffff.
3322
3323 If all the input bytes are ASCII, the space needed for a 32-bit string is
3324 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3325 string is no more than four times, because the number of characters must be
3326 less than the number of bytes. The result is always left in pbuffer32. Impose a
3327 minimum size to save repeated re-sizing.
3328
3329 Note that this function does not object to surrogate values. This is
3330 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3331 for the purpose of testing that they are correctly faulted.
3332
3333 Arguments:
3334 p points to a byte string
3335 utf true in UTF mode
3336 lenptr points to number of bytes in the string (excluding trailing zero)
3337
3338 Returns: 0 on success, with the length updated to the number of 32-bit
3339 data items used (excluding the trailing zero)
3340 OR -1 if a UTF-8 string is malformed
3341 OR -2 if a value > 0x10ffff is encountered in UTF mode
3342 */
3343
3344 static int
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3345 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3346 {
3347 uint32_t *pp;
3348 PCRE2_SIZE len = *lenptr;
3349
3350 if (pbuffer32_size < 4*len + 4)
3351 {
3352 if (pbuffer32 != NULL) free(pbuffer32);
3353 pbuffer32_size = 4*len + 4;
3354 if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3355 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3356 if (pbuffer32 == NULL)
3357 {
3358 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3359 pbuffer32_size);
3360 exit(1);
3361 }
3362 }
3363
3364 pp = pbuffer32;
3365
3366 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3367 {
3368 for (; len > 0; len--) *pp++ = *p++;
3369 }
3370
3371 else while (len > 0)
3372 {
3373 int chlen;
3374 uint32_t c;
3375 uint32_t topbit = 0;
3376 const uint8_t *end = p + len;
3377 if (!utf && *p == 0xff && len > 1)
3378 {
3379 topbit = 0x80000000u;
3380 p++;
3381 len--;
3382 }
3383 chlen = utf82ord(p, end, &c);
3384 if (chlen <= 0) return -1;
3385 if (utf && c > 0x10ffff) return -2;
3386 p += chlen;
3387 len -= chlen;
3388 *pp++ = c | topbit;
3389 }
3390
3391 *pp = 0;
3392 *lenptr = pp - pbuffer32;
3393 return 0;
3394 }
3395 #endif /* SUPPORT_PCRE2_32 */
3396
3397
3398
3399 /* This function is no longer used. Keep it around for a while, just in case it
3400 needs to be re-instated. */
3401
3402 #ifdef NEVERNEVERNEVER
3403
3404 /*************************************************
3405 * Move back by so many characters *
3406 *************************************************/
3407
3408 /* Given a code unit offset in a subject string, move backwards by a number of
3409 characters, and return the resulting offset.
3410
3411 Arguments:
3412 subject pointer to the string
3413 offset start offset
3414 count count to move back by
3415 utf TRUE if in UTF mode
3416
3417 Returns: a possibly changed offset
3418 */
3419
3420 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3421 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3422 {
3423 if (!utf || test_mode == PCRE32_MODE)
3424 return (count >= offset)? 0 : (offset - count);
3425
3426 else if (test_mode == PCRE8_MODE)
3427 {
3428 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3429 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3430 {
3431 pp--;
3432 while ((*pp & 0xc0) == 0x80) pp--;
3433 }
3434 return pp - (PCRE2_SPTR8)subject;
3435 }
3436
3437 else /* 16-bit mode */
3438 {
3439 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3440 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3441 {
3442 pp--;
3443 if ((*pp & 0xfc00) == 0xdc00) pp--;
3444 }
3445 return pp - (PCRE2_SPTR16)subject;
3446 }
3447 }
3448 #endif /* NEVERNEVERNEVER */
3449
3450
3451
3452 /*************************************************
3453 * Expand input buffers *
3454 *************************************************/
3455
3456 /* This function doubles the size of the input buffer and the buffer for
3457 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3458 the new ones.
3459
3460 Arguments: none
3461 Returns: nothing (aborts if malloc() fails)
3462 */
3463
3464 static void
expand_input_buffers(void)3465 expand_input_buffers(void)
3466 {
3467 int new_pbuffer8_size = 2*pbuffer8_size;
3468 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3469 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3470
3471 if (new_buffer == NULL || new_pbuffer8 == NULL)
3472 {
3473 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3474 exit(1);
3475 }
3476
3477 memcpy(new_buffer, buffer, pbuffer8_size);
3478 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3479
3480 pbuffer8_size = new_pbuffer8_size;
3481
3482 free(buffer);
3483 free(pbuffer8);
3484
3485 buffer = new_buffer;
3486 pbuffer8 = new_pbuffer8;
3487 }
3488
3489
3490
3491 /*************************************************
3492 * Read or extend an input line *
3493 *************************************************/
3494
3495 /* Input lines are read into buffer, but both patterns and data lines can be
3496 continued over multiple input lines. In addition, if the buffer fills up, we
3497 want to automatically expand it so as to be able to handle extremely large
3498 lines that are needed for certain stress tests, although this is less likely
3499 now that there are repetition features for both patterns and data. When the
3500 input buffer is expanded, the other two buffers must also be expanded likewise,
3501 and the contents of pbuffer, which are a copy of the input for callouts, must
3502 be preserved (for when expansion happens for a data line). This is not the most
3503 optimal way of handling this, but hey, this is just a test program!
3504
3505 Arguments:
3506 f the file to read
3507 start where in buffer to start (this *must* be within buffer)
3508 prompt for stdin or readline()
3509
3510 Returns: pointer to the start of new data
3511 could be a copy of start, or could be moved
3512 NULL if no data read and EOF reached
3513 */
3514
3515 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3516 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3517 {
3518 uint8_t *here = start;
3519
3520 for (;;)
3521 {
3522 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3523
3524 if (rlen > 1000)
3525 {
3526 size_t dlen;
3527
3528 /* If libreadline or libedit support is required, use readline() to read a
3529 line if the input is a terminal. Note that readline() removes the trailing
3530 newline, so we must put it back again, to be compatible with fgets(). */
3531
3532 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3533 if (INTERACTIVE(f))
3534 {
3535 size_t len;
3536 char *s = readline(prompt);
3537 if (s == NULL) return (here == start)? NULL : start;
3538 len = strlen(s);
3539 if (len > 0) add_history(s);
3540 if (len > rlen - 1) len = rlen - 1;
3541 memcpy(here, s, len);
3542 here[len] = '\n';
3543 here[len+1] = 0;
3544 free(s);
3545 }
3546 else
3547 #endif
3548
3549 /* Read the next line by normal means, prompting if the file is a tty. */
3550
3551 {
3552 if (INTERACTIVE(f)) printf("%s", prompt);
3553 if (fgets((char *)here, rlen, f) == NULL)
3554 return (here == start)? NULL : start;
3555 }
3556
3557 dlen = strlen((char *)here);
3558 here += dlen;
3559
3560 /* Check for end of line reached. Take care not to read data from before
3561 start (dlen will be zero for a file starting with a binary zero). */
3562
3563 if (here > start && here[-1] == '\n') return start;
3564
3565 /* If we have not read a newline when reading a file, we have either filled
3566 the buffer or reached the end of the file. We can detect the former by
3567 checking that the string fills the buffer, and the latter by feof(). If
3568 neither of these is true, it means we read a binary zero which has caused
3569 strlen() to give a short length. This is a hard error because pcre2test
3570 expects to work with C strings. */
3571
3572 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3573 {
3574 fprintf(outfile, "** Binary zero encountered in input\n");
3575 fprintf(outfile, "** pcre2test run abandoned\n");
3576 exit(1);
3577 }
3578 }
3579
3580 else
3581 {
3582 size_t start_offset = start - buffer;
3583 size_t here_offset = here - buffer;
3584 expand_input_buffers();
3585 start = buffer + start_offset;
3586 here = buffer + here_offset;
3587 }
3588 }
3589
3590 /* Control never gets here */
3591 }
3592
3593
3594
3595 /*************************************************
3596 * Case-independent strncmp() function *
3597 *************************************************/
3598
3599 /*
3600 Arguments:
3601 s first string
3602 t second string
3603 n number of characters to compare
3604
3605 Returns: < 0, = 0, or > 0, according to the comparison
3606 */
3607
3608 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3609 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3610 {
3611 while (n--)
3612 {
3613 int c = tolower(*s++) - tolower(*t++);
3614 if (c != 0) return c;
3615 }
3616 return 0;
3617 }
3618
3619
3620
3621 /*************************************************
3622 * Scan the main modifier list *
3623 *************************************************/
3624
3625 /* This function searches the modifier list for a long modifier name.
3626
3627 Argument:
3628 p start of the name
3629 lenp length of the name
3630
3631 Returns: an index in the modifier list, or -1 on failure
3632 */
3633
3634 static int
scan_modifiers(const uint8_t * p,unsigned int len)3635 scan_modifiers(const uint8_t *p, unsigned int len)
3636 {
3637 int bot = 0;
3638 int top = MODLISTCOUNT;
3639
3640 while (top > bot)
3641 {
3642 int mid = (bot + top)/2;
3643 unsigned int mlen = strlen(modlist[mid].name);
3644 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3645 if (c == 0)
3646 {
3647 if (len == mlen) return mid;
3648 c = (int)len - (int)mlen;
3649 }
3650 if (c > 0) bot = mid + 1; else top = mid;
3651 }
3652
3653 return -1;
3654
3655 }
3656
3657
3658
3659 /*************************************************
3660 * Check a modifer and find its field *
3661 *************************************************/
3662
3663 /* This function is called when a modifier has been identified. We check that
3664 it is allowed here and find the field that is to be changed.
3665
3666 Arguments:
3667 m the modifier list entry
3668 ctx CTX_PAT => pattern context
3669 CTX_POPPAT => pattern context for popped pattern
3670 CTX_DEFPAT => default pattern context
3671 CTX_DAT => data context
3672 CTX_DEFDAT => default data context
3673 pctl point to pattern control block
3674 dctl point to data control block
3675 c a single character or 0
3676
3677 Returns: a field pointer or NULL
3678 */
3679
3680 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3681 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3682 {
3683 void *field = NULL;
3684 PCRE2_SIZE offset = m->offset;
3685
3686 if (restrict_for_perl_test) switch(m->which)
3687 {
3688 case MOD_PNDP:
3689 case MOD_PATP:
3690 case MOD_DATP:
3691 case MOD_PDP:
3692 break;
3693
3694 default:
3695 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3696 m->name);
3697 return NULL;
3698 }
3699
3700 switch (m->which)
3701 {
3702 case MOD_CTC: /* Compile context modifier */
3703 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3704 else if (ctx == CTX_PAT) field = PTR(pat_context);
3705 break;
3706
3707 case MOD_CTM: /* Match context modifier */
3708 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3709 else if (ctx == CTX_DAT) field = PTR(dat_context);
3710 break;
3711
3712 case MOD_DAT: /* Data line modifier */
3713 case MOD_DATP: /* Allowed for Perl test */
3714 if (dctl != NULL) field = dctl;
3715 break;
3716
3717 case MOD_PAT: /* Pattern modifier */
3718 case MOD_PATP: /* Allowed for Perl test */
3719 if (pctl != NULL) field = pctl;
3720 break;
3721
3722 case MOD_PD: /* Pattern or data line modifier */
3723 case MOD_PDP: /* Ditto, allowed for Perl test */
3724 case MOD_PND: /* Ditto, but not default pattern */
3725 case MOD_PNDP: /* Ditto, allowed for Perl test */
3726 if (dctl != NULL) field = dctl;
3727 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3728 ctx != CTX_DEFPAT))
3729 field = pctl;
3730 break;
3731 }
3732
3733 if (field == NULL)
3734 {
3735 if (c == 0)
3736 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3737 else
3738 fprintf(outfile, "** /%c is not valid here\n", c);
3739 return NULL;
3740 }
3741
3742 return (char *)field + offset;
3743 }
3744
3745
3746
3747 /*************************************************
3748 * Decode a modifier list *
3749 *************************************************/
3750
3751 /* A pointer to a control block is NULL when called in cases when that block is
3752 not relevant. They are never all relevant in one call. At least one of patctl
3753 and datctl is NULL. The second argument specifies which context to use for
3754 modifiers that apply to contexts.
3755
3756 Arguments:
3757 p point to modifier string
3758 ctx CTX_PAT => pattern context
3759 CTX_POPPAT => pattern context for popped pattern
3760 CTX_DEFPAT => default pattern context
3761 CTX_DAT => data context
3762 CTX_DEFDAT => default data context
3763 pctl point to pattern control block
3764 dctl point to data control block
3765
3766 Returns: TRUE if successful decode, FALSE otherwise
3767 */
3768
3769 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3770 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3771 {
3772 uint8_t *ep, *pp;
3773 long li;
3774 unsigned long uli;
3775 BOOL first = TRUE;
3776
3777 for (;;)
3778 {
3779 void *field;
3780 modstruct *m;
3781 BOOL off = FALSE;
3782 unsigned int i, len;
3783 int index;
3784 char *endptr;
3785
3786 /* Skip white space and commas. */
3787
3788 while (isspace(*p) || *p == ',') p++;
3789 if (*p == 0) break;
3790
3791 /* Find the end of the item; lose trailing whitespace at end of line. */
3792
3793 for (ep = p; *ep != 0 && *ep != ','; ep++);
3794 if (*ep == 0)
3795 {
3796 while (ep > p && isspace(ep[-1])) ep--;
3797 *ep = 0;
3798 }
3799
3800 /* Remember if the first character is '-'. */
3801
3802 if (*p == '-')
3803 {
3804 off = TRUE;
3805 p++;
3806 }
3807
3808 /* Find the length of a full-length modifier name, and scan for it. */
3809
3810 pp = p;
3811 while (pp < ep && *pp != '=') pp++;
3812 index = scan_modifiers(p, pp - p);
3813
3814 /* If the first modifier is unrecognized, try to interpret it as a sequence
3815 of single-character abbreviated modifiers. None of these modifiers have any
3816 associated data. They just set options or control bits. */
3817
3818 if (index < 0)
3819 {
3820 uint32_t cc;
3821 uint8_t *mp = p;
3822
3823 if (!first)
3824 {
3825 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3826 if (ep - p == 1)
3827 fprintf(outfile, "** Single-character modifiers must come first\n");
3828 return FALSE;
3829 }
3830
3831 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3832 {
3833 for (i = 0; i < C1MODLISTCOUNT; i++)
3834 if (cc == c1modlist[i].onechar) break;
3835
3836 if (i >= C1MODLISTCOUNT)
3837 {
3838 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3839 *p, (int)(ep-mp), mp);
3840 return FALSE;
3841 }
3842
3843 if (c1modlist[i].index >= 0)
3844 {
3845 index = c1modlist[i].index;
3846 }
3847
3848 else
3849 {
3850 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3851 strlen(c1modlist[i].fullname));
3852 if (index < 0)
3853 {
3854 fprintf(outfile, "** Internal error: single-character equivalent "
3855 "modifier '%s' not found\n", c1modlist[i].fullname);
3856 return FALSE;
3857 }
3858 c1modlist[i].index = index; /* Cache for next time */
3859 }
3860
3861 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3862 if (field == NULL) return FALSE;
3863
3864 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3865 PCRE2_EXTENDED_MORE. */
3866
3867 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3868 {
3869 *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3870 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3871 }
3872 else
3873 *((uint32_t *)field) |= modlist[index].value;
3874 }
3875
3876 continue; /* With tne next (fullname) modifier */
3877 }
3878
3879 /* We have a match on a full-name modifier. Check for the existence of data
3880 when needed. */
3881
3882 m = modlist + index; /* Save typing */
3883 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3884 (m->type != MOD_IND || *pp == '='))
3885 {
3886 if (*pp++ != '=')
3887 {
3888 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3889 return FALSE;
3890 }
3891 if (off)
3892 {
3893 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3894 return FALSE;
3895 }
3896 }
3897
3898 /* These on/off types have no data. */
3899
3900 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3901 {
3902 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3903 return FALSE;
3904 }
3905
3906 /* Set the data length for those types that have data. Then find the field
3907 that is to be set. If check_modifier() returns NULL, it has already output an
3908 error message. */
3909
3910 len = ep - pp;
3911 field = check_modifier(m, ctx, pctl, dctl, 0);
3912 if (field == NULL) return FALSE;
3913
3914 /* Process according to data type. */
3915
3916 switch (m->type)
3917 {
3918 case MOD_CTL:
3919 case MOD_OPT:
3920 if (off) *((uint32_t *)field) &= ~m->value;
3921 else *((uint32_t *)field) |= m->value;
3922 break;
3923
3924 case MOD_BSR:
3925 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3926 {
3927 #ifdef BSR_ANYCRLF
3928 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3929 #else
3930 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3931 #endif
3932 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3933 else dctl->control2 &= ~CTL2_BSR_SET;
3934 }
3935 else
3936 {
3937 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3938 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3939 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3940 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3941 else goto INVALID_VALUE;
3942 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3943 else dctl->control2 |= CTL2_BSR_SET;
3944 }
3945 pp = ep;
3946 break;
3947
3948 case MOD_CHR: /* A single character */
3949 *((uint32_t *)field) = *pp++;
3950 break;
3951
3952 case MOD_CON: /* A convert type/options list */
3953 for (;; pp++)
3954 {
3955 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3956 len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3957 for (i = 0; i < convertlistcount; i++)
3958 {
3959 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3960 {
3961 if (*((uint32_t *)field) == CONVERT_UNSET)
3962 *((uint32_t *)field) = convertlist[i].option;
3963 else
3964 *((uint32_t *)field) |= convertlist[i].option;
3965 break;
3966 }
3967 }
3968 if (i >= convertlistcount) goto INVALID_VALUE;
3969 pp += len;
3970 if (*pp != ':') break;
3971 }
3972 break;
3973
3974 case MOD_IN2: /* One or two unsigned integers */
3975 if (!isdigit(*pp)) goto INVALID_VALUE;
3976 uli = strtoul((const char *)pp, &endptr, 10);
3977 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3978 ((uint32_t *)field)[0] = (uint32_t)uli;
3979 if (*endptr == ':')
3980 {
3981 uli = strtoul((const char *)endptr+1, &endptr, 10);
3982 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3983 ((uint32_t *)field)[1] = (uint32_t)uli;
3984 }
3985 else ((uint32_t *)field)[1] = 0;
3986 pp = (uint8_t *)endptr;
3987 break;
3988
3989 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3990 less than ULONG_MAX. So first test for overflowing the long int, and then
3991 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3992
3993 case MOD_SIZ: /* PCRE2_SIZE value */
3994 if (!isdigit(*pp)) goto INVALID_VALUE;
3995 uli = strtoul((const char *)pp, &endptr, 10);
3996 if (uli == ULONG_MAX) goto INVALID_VALUE;
3997 #if ULONG_MAX > PCRE2_SIZE_MAX
3998 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3999 #endif
4000 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
4001 pp = (uint8_t *)endptr;
4002 break;
4003
4004 case MOD_IND: /* Unsigned integer with default */
4005 if (len == 0)
4006 {
4007 *((uint32_t *)field) = (uint32_t)(m->value);
4008 break;
4009 }
4010 /* Fall through */
4011
4012 case MOD_INT: /* Unsigned integer */
4013 if (!isdigit(*pp)) goto INVALID_VALUE;
4014 uli = strtoul((const char *)pp, &endptr, 10);
4015 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
4016 *((uint32_t *)field) = (uint32_t)uli;
4017 pp = (uint8_t *)endptr;
4018 break;
4019
4020 case MOD_INS: /* Signed integer */
4021 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
4022 li = strtol((const char *)pp, &endptr, 10);
4023 if (S32OVERFLOW(li)) goto INVALID_VALUE;
4024 *((int32_t *)field) = (int32_t)li;
4025 pp = (uint8_t *)endptr;
4026 break;
4027
4028 case MOD_NL:
4029 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
4030 if (len == strlen(newlines[i]) &&
4031 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
4032 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
4033 if (i == 0)
4034 {
4035 *((uint16_t *)field) = NEWLINE_DEFAULT;
4036 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
4037 else dctl->control2 &= ~CTL2_NL_SET;
4038 }
4039 else
4040 {
4041 *((uint16_t *)field) = i;
4042 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
4043 else dctl->control2 |= CTL2_NL_SET;
4044 }
4045 pp = ep;
4046 break;
4047
4048 case MOD_NN: /* Name or (signed) number; may be several */
4049 if (isdigit(*pp) || *pp == '-')
4050 {
4051 int ct = MAXCPYGET - 1;
4052 int32_t value;
4053 li = strtol((const char *)pp, &endptr, 10);
4054 if (S32OVERFLOW(li)) goto INVALID_VALUE;
4055 value = (int32_t)li;
4056 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
4057 if (value >= 0) /* Add new number */
4058 {
4059 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
4060 field = (char *)field + sizeof(int32_t);
4061 if (ct <= 0)
4062 {
4063 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
4064 return FALSE;
4065 }
4066 }
4067 *((int32_t *)field) = value;
4068 if (ct > 0) ((int32_t *)field)[1] = -1;
4069 pp = (uint8_t *)endptr;
4070 }
4071
4072 /* Multiple strings are put end to end. */
4073
4074 else
4075 {
4076 char *nn = (char *)field;
4077 if (len > 0) /* Add new name */
4078 {
4079 if (len > MAX_NAME_SIZE)
4080 {
4081 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
4082 return FALSE;
4083 }
4084 while (*nn != 0) nn += strlen(nn) + 1;
4085 if (nn + len + 2 - (char *)field > LENCPYGET)
4086 {
4087 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
4088 m->name);
4089 return FALSE;
4090 }
4091 memcpy(nn, pp, len);
4092 }
4093 nn[len] = 0 ;
4094 nn[len+1] = 0;
4095 pp = ep;
4096 }
4097 break;
4098
4099 case MOD_STR:
4100 if (len + 1 > m->value)
4101 {
4102 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
4103 m->name, m->value - 1);
4104 return FALSE;
4105 }
4106 memcpy(field, pp, len);
4107 ((uint8_t *)field)[len] = 0;
4108 pp = ep;
4109 break;
4110 }
4111
4112 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4113 {
4114 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4115 return FALSE;
4116 }
4117
4118 p = pp;
4119 first = FALSE;
4120
4121 if (ctx == CTX_POPPAT &&
4122 (pctl->options != 0 ||
4123 pctl->tables_id != 0 ||
4124 pctl->locale[0] != 0 ||
4125 (pctl->control & NOTPOP_CONTROLS) != 0))
4126 {
4127 fprintf(outfile, "** '%s' is not valid here\n", m->name);
4128 return FALSE;
4129 }
4130 }
4131
4132 return TRUE;
4133
4134 INVALID_VALUE:
4135 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4136 return FALSE;
4137 }
4138
4139
4140 /*************************************************
4141 * Get info from a pattern *
4142 *************************************************/
4143
4144 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4145 pattern.
4146
4147 Arguments:
4148 what code for the required information
4149 where where to put the answer
4150 unsetok PCRE2_ERROR_UNSET is an "expected" result
4151
4152 Returns: the return from pcre2_pattern_info()
4153 */
4154
4155 static int
pattern_info(int what,void * where,BOOL unsetok)4156 pattern_info(int what, void *where, BOOL unsetok)
4157 {
4158 int rc;
4159 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
4160 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4161 if (rc >= 0) return 0;
4162 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4163 {
4164 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4165 what);
4166 if (rc == PCRE2_ERROR_BADMODE)
4167 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4168 "%d-bit mode\n", test_mode,
4169 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4170 }
4171 return rc;
4172 }
4173
4174
4175
4176 #ifdef SUPPORT_PCRE2_8
4177 /*************************************************
4178 * Show something in a list *
4179 *************************************************/
4180
4181 /* This function just helps to keep the code that uses it tidier. It's used for
4182 various lists of things where there needs to be introductory text before the
4183 first item. As these calls are all in the POSIX-support code, they happen only
4184 when 8-bit mode is supported. */
4185
4186 static void
prmsg(const char ** msg,const char * s)4187 prmsg(const char **msg, const char *s)
4188 {
4189 fprintf(outfile, "%s %s", *msg, s);
4190 *msg = "";
4191 }
4192 #endif /* SUPPORT_PCRE2_8 */
4193
4194
4195
4196 /*************************************************
4197 * Show control bits *
4198 *************************************************/
4199
4200 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4201 Because the bits are unique, this can be used for both pattern and data control
4202 words.
4203
4204 Arguments:
4205 controls control bits
4206 controls2 more control bits
4207 before text to print before
4208
4209 Returns: nothing
4210 */
4211
4212 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4213 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4214 {
4215 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4216 before,
4217 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4218 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4219 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4220 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4221 ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4222 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4223 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4224 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4225 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4226 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4227 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4228 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4229 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4230 ((controls & CTL_DFA) != 0)? " dfa" : "",
4231 ((controls & CTL_EXPAND) != 0)? " expand" : "",
4232 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4233 ((controls & CTL_FINDLIMITS_NOHEAP) != 0)? " find_limits_noheap" : "",
4234 ((controls2 & CTL2_FRAMESIZE) != 0)? " framesize" : "",
4235 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4236 ((controls & CTL_GETALL) != 0)? " getall" : "",
4237 ((controls & CTL_GLOBAL) != 0)? " global" : "",
4238 ((controls2 & CTL2_HEAPFRAMES_SIZE) != 0)? " heapframes_size" : "",
4239 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4240 ((controls & CTL_INFO) != 0)? " info" : "",
4241 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4242 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4243 ((controls & CTL_MARK) != 0)? " mark" : "",
4244 ((controls & CTL_MEMORY) != 0)? " memory" : "",
4245 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4246 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4247 ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "",
4248 ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "",
4249 ((controls & CTL_POSIX) != 0)? " posix" : "",
4250 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4251 ((controls & CTL_PUSH) != 0)? " push" : "",
4252 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4253 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4254 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4255 ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4256 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4257 ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4258 ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4259 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4260 ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4261 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4262 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4263 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4264 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4265 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4266 }
4267
4268
4269
4270 /*************************************************
4271 * Show compile options *
4272 *************************************************/
4273
4274 /* Called from show_pattern_info() and for unsupported POSIX options.
4275
4276 Arguments:
4277 options an options word
4278 before text to print before
4279 after text to print after
4280
4281 Returns: nothing
4282 */
4283
4284 static void
show_compile_options(uint32_t options,const char * before,const char * after)4285 show_compile_options(uint32_t options, const char *before, const char *after)
4286 {
4287 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4288 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4289 before,
4290 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4291 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4292 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4293 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4294 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4295 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4296 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4297 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4298 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4299 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4300 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4301 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4302 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4303 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4304 ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4305 ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4306 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4307 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4308 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4309 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4310 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4311 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4312 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4313 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4314 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4315 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4316 ((options & PCRE2_UCP) != 0)? " ucp" : "",
4317 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4318 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4319 ((options & PCRE2_UTF) != 0)? " utf" : "",
4320 after);
4321 }
4322
4323
4324 /*************************************************
4325 * Show compile extra options *
4326 *************************************************/
4327
4328 /* Called from show_pattern_info() and for unsupported POSIX options.
4329
4330 Arguments:
4331 options an options word
4332 before text to print before
4333 after text to print after
4334
4335 Returns: nothing
4336 */
4337
4338 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4339 show_compile_extra_options(uint32_t options, const char *before,
4340 const char *after)
4341 {
4342 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4343 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4344 before,
4345 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4346 ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "",
4347 ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "",
4348 ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "",
4349 ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "",
4350 ((options & PCRE2_EXTRA_ASCII_DIGIT) != 0)? " ascii_digit" : "",
4351 ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "",
4352 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4353 ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
4354 ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4355 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4356 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4357 after);
4358 }
4359
4360
4361
4362 #ifdef SUPPORT_PCRE2_8
4363 /*************************************************
4364 * Show match options *
4365 *************************************************/
4366
4367 /* Called for unsupported POSIX options. */
4368
4369 static void
show_match_options(uint32_t options)4370 show_match_options(uint32_t options)
4371 {
4372 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4373 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4374 ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4375 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4376 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4377 ((options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
4378 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4379 ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4380 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4381 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4382 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4383 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4384 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4385 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4386 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4387 }
4388 #endif /* SUPPORT_PCRE2_8 */
4389
4390
4391
4392 /*************************************************
4393 * Show memory usage info for a pattern *
4394 *************************************************/
4395
4396 static void
show_memory_info(void)4397 show_memory_info(void)
4398 {
4399 uint32_t name_count, name_entry_size;
4400 PCRE2_SIZE size, cblock_size;
4401
4402 /* One of the test_mode values will always be true, but to stop a compiler
4403 warning we must initialize cblock_size. */
4404
4405 cblock_size = 0;
4406 #ifdef SUPPORT_PCRE2_8
4407 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4408 #endif
4409 #ifdef SUPPORT_PCRE2_16
4410 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4411 #endif
4412 #ifdef SUPPORT_PCRE2_32
4413 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4414 #endif
4415
4416 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4417 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4418 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4419
4420 /* The uint32_t variables are cast before multiplying to stop code analyzers
4421 grumbling about potential overflow. */
4422
4423 fprintf(outfile, "Memory allocation - compiled block : %" SIZ_FORM "\n", size);
4424 fprintf(outfile, "Memory allocation - code portion : %" SIZ_FORM "\n", size -
4425 (PCRE2_SIZE)name_count * (PCRE2_SIZE)name_entry_size * (PCRE2_SIZE)code_unit_size -
4426 cblock_size);
4427
4428 if (pat_patctl.jit != 0)
4429 {
4430 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4431 fprintf(outfile, "Memory allocation - JIT code : %" SIZ_FORM "\n", size);
4432 }
4433 }
4434
4435
4436
4437 /*************************************************
4438 * Show frame size info for a pattern *
4439 *************************************************/
4440
4441 static void
show_framesize(void)4442 show_framesize(void)
4443 {
4444 PCRE2_SIZE frame_size;
4445 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4446 fprintf(outfile, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size);
4447 }
4448
4449
4450
4451 /*************************************************
4452 * Show heapframes size info for a match_data *
4453 *************************************************/
4454
4455 static void
show_heapframes_size(void)4456 show_heapframes_size(void)
4457 {
4458 PCRE2_SIZE heapframes_size;
4459 PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(heapframes_size, match_data);
4460 fprintf(outfile, "Heapframes size in match_data: %" SIZ_FORM "\n",
4461 heapframes_size);
4462 }
4463
4464
4465
4466 /*************************************************
4467 * Get and output an error message *
4468 *************************************************/
4469
4470 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4471 print_error_message(int errorcode, const char *before, const char *after)
4472 {
4473 int len;
4474 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4475 if (len < 0)
4476 {
4477 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4478 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4479 }
4480 else
4481 {
4482 fprintf(outfile, "%s", before);
4483 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4484 fprintf(outfile, "%s", after);
4485 }
4486 return len >= 0;
4487 }
4488
4489
4490 /*************************************************
4491 * Callback function for callout enumeration *
4492 *************************************************/
4493
4494 /* The only differences in the callout emumeration block for different code
4495 unit widths are that the pointers to the subject, the most recent MARK, and a
4496 callout argument string point to strings of the appropriate width. Casts can be
4497 used to deal with this.
4498
4499 Argument:
4500 cb pointer to enumerate block
4501 callout_data user data
4502
4503 Returns: 0
4504 */
4505
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4506 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4507 void *callout_data)
4508 {
4509 uint32_t i;
4510 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4511
4512 (void)callout_data; /* Not currently displayed */
4513
4514 fprintf(outfile, "Callout ");
4515 if (cb->callout_string != NULL)
4516 {
4517 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4518 fprintf(outfile, "%c", delimiter);
4519 PCHARSV(cb->callout_string, 0,
4520 cb->callout_string_length, utf, outfile);
4521 for (i = 0; callout_start_delims[i] != 0; i++)
4522 if (delimiter == callout_start_delims[i])
4523 {
4524 delimiter = callout_end_delims[i];
4525 break;
4526 }
4527 fprintf(outfile, "%c ", delimiter);
4528 }
4529 else fprintf(outfile, "%d ", cb->callout_number);
4530
4531 fprintf(outfile, "%.*s\n",
4532 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4533 pbuffer8 + cb->pattern_position);
4534
4535 return 0;
4536 }
4537
4538
4539
4540 /*************************************************
4541 * Show information about a pattern *
4542 *************************************************/
4543
4544 /* This function is called after a pattern has been compiled if any of the
4545 information-requesting controls have been set.
4546
4547 Arguments: none
4548
4549 Returns: PR_OK continue processing next line
4550 PR_SKIP skip to a blank line
4551 PR_ABEND abort the pcre2test run
4552 */
4553
4554 static int
show_pattern_info(void)4555 show_pattern_info(void)
4556 {
4557 uint32_t compile_options, overall_options, extra_options;
4558 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4559
4560 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4561 {
4562 fprintf(outfile, "------------------------------------------------------------------\n");
4563 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4564 }
4565
4566 if ((pat_patctl.control & CTL_INFO) != 0)
4567 {
4568 int rc;
4569 void *nametable;
4570 uint8_t *start_bits;
4571 BOOL heap_limit_set, match_limit_set, depth_limit_set;
4572 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4573 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4574 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4575 newline_convention;
4576
4577 /* Exercise the error route. */
4578
4579 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4580 (void)rc;
4581
4582 /* These info requests may return PCRE2_ERROR_UNSET. */
4583
4584 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4585 {
4586 case 0:
4587 heap_limit_set = TRUE;
4588 break;
4589
4590 case PCRE2_ERROR_UNSET:
4591 heap_limit_set = FALSE;
4592 break;
4593
4594 default:
4595 return PR_ABEND;
4596 }
4597
4598 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4599 {
4600 case 0:
4601 match_limit_set = TRUE;
4602 break;
4603
4604 case PCRE2_ERROR_UNSET:
4605 match_limit_set = FALSE;
4606 break;
4607
4608 default:
4609 return PR_ABEND;
4610 }
4611
4612 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4613 {
4614 case 0:
4615 depth_limit_set = TRUE;
4616 break;
4617
4618 case PCRE2_ERROR_UNSET:
4619 depth_limit_set = FALSE;
4620 break;
4621
4622 default:
4623 return PR_ABEND;
4624 }
4625
4626 /* These info requests should always succeed. */
4627
4628 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4629 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4630 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4631 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4632 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4633 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4634 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4635 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4636 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4637 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4638 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4639 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4640 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4641 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4642 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4643 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4644 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4645 != 0)
4646 return PR_ABEND;
4647
4648 fprintf(outfile, "Capture group count = %d\n", capture_count);
4649
4650 if (backrefmax > 0)
4651 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4652
4653 if (maxlookbehind > 0)
4654 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4655
4656 if (heap_limit_set)
4657 fprintf(outfile, "Heap limit = %u\n", heap_limit);
4658
4659 if (match_limit_set)
4660 fprintf(outfile, "Match limit = %u\n", match_limit);
4661
4662 if (depth_limit_set)
4663 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4664
4665 if (namecount > 0)
4666 {
4667 fprintf(outfile, "Named capture groups:\n");
4668 for (; namecount > 0; namecount--)
4669 {
4670 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4671 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4672 fprintf(outfile, " ");
4673
4674 /* In UTF mode the name may be a UTF string containing non-ASCII
4675 letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4676 use the normal string printing functions, which use escapes for all
4677 non-ASCII characters. */
4678
4679 if (utf)
4680 {
4681 #ifdef SUPPORT_PCRE2_32
4682 if (test_mode == PCRE32_MODE)
4683 {
4684 PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4685 while (*nameptr != 0)
4686 {
4687 uint8_t u8buff[6];
4688 int len = ord2utf8(*nameptr++, u8buff);
4689 fprintf(outfile, "%.*s", len, u8buff);
4690 }
4691 }
4692 #endif
4693 #ifdef SUPPORT_PCRE2_16
4694 if (test_mode == PCRE16_MODE)
4695 {
4696 PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4697 while (*nameptr != 0)
4698 {
4699 int len;
4700 uint8_t u8buff[6];
4701 uint32_t c = *nameptr++ & 0xffff;
4702 if (c >= 0xD800 && c < 0xDC00)
4703 c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4704 len = ord2utf8(c, u8buff);
4705 fprintf(outfile, "%.*s", len, u8buff);
4706 }
4707 }
4708 #endif
4709 #ifdef SUPPORT_PCRE2_8
4710 if (test_mode == PCRE8_MODE)
4711 fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4712 #endif
4713 }
4714 else /* Not UTF mode */
4715 {
4716 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4717 }
4718
4719 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4720
4721 #ifdef SUPPORT_PCRE2_32
4722 if (test_mode == PCRE32_MODE)
4723 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4724 #endif
4725 #ifdef SUPPORT_PCRE2_16
4726 if (test_mode == PCRE16_MODE)
4727 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4728 #endif
4729 #ifdef SUPPORT_PCRE2_8
4730 if (test_mode == PCRE8_MODE)
4731 fprintf(outfile, "%3d\n", (int)(
4732 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4733 #endif
4734
4735 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4736 }
4737 }
4738
4739 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4740 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4741 if (match_empty) fprintf(outfile, "May match empty string\n");
4742
4743 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4744 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4745 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4746
4747 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4748 cluttering up the verification output of non-UTF test files. */
4749
4750 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4751 {
4752 compile_options &= ~PCRE2_NEVER_UTF;
4753 overall_options &= ~PCRE2_NEVER_UTF;
4754 }
4755
4756 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4757 {
4758 compile_options &= ~PCRE2_NEVER_UCP;
4759 overall_options &= ~PCRE2_NEVER_UCP;
4760 }
4761
4762 if ((compile_options|overall_options) != 0)
4763 {
4764 if (compile_options == overall_options)
4765 show_compile_options(compile_options, "Options:", "\n");
4766 else
4767 {
4768 show_compile_options(compile_options, "Compile options:", "\n");
4769 show_compile_options(overall_options, "Overall options:", "\n");
4770 }
4771 }
4772
4773 if (extra_options != 0)
4774 show_compile_extra_options(extra_options, "Extra options:", "\n");
4775
4776 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4777
4778 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4779 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4780 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4781 "any Unicode newline" : "CR, LF, or CRLF");
4782
4783 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4784 {
4785 switch (newline_convention)
4786 {
4787 case PCRE2_NEWLINE_CR:
4788 fprintf(outfile, "Forced newline is CR\n");
4789 break;
4790
4791 case PCRE2_NEWLINE_LF:
4792 fprintf(outfile, "Forced newline is LF\n");
4793 break;
4794
4795 case PCRE2_NEWLINE_CRLF:
4796 fprintf(outfile, "Forced newline is CRLF\n");
4797 break;
4798
4799 case PCRE2_NEWLINE_ANYCRLF:
4800 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4801 break;
4802
4803 case PCRE2_NEWLINE_ANY:
4804 fprintf(outfile, "Forced newline is any Unicode newline\n");
4805 break;
4806
4807 case PCRE2_NEWLINE_NUL:
4808 fprintf(outfile, "Forced newline is NUL\n");
4809 break;
4810
4811 default:
4812 break;
4813 }
4814 }
4815
4816 if (first_ctype == 2)
4817 {
4818 fprintf(outfile, "First code unit at start or follows newline\n");
4819 }
4820 else if (first_ctype == 1)
4821 {
4822 const char *caseless =
4823 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4824 "" : " (caseless)";
4825 if (PRINTOK(first_cunit))
4826 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4827 else
4828 {
4829 fprintf(outfile, "First code unit = ");
4830 pchar(first_cunit, FALSE, outfile);
4831 fprintf(outfile, "%s\n", caseless);
4832 }
4833 }
4834 else if (start_bits != NULL)
4835 {
4836 int i;
4837 int c = 24;
4838 fprintf(outfile, "Starting code units: ");
4839 for (i = 0; i < 256; i++)
4840 {
4841 if ((start_bits[i/8] & (1u << (i&7))) != 0)
4842 {
4843 if (c > 75)
4844 {
4845 fprintf(outfile, "\n ");
4846 c = 2;
4847 }
4848 if (PRINTOK(i) && i != ' ')
4849 {
4850 fprintf(outfile, "%c ", i);
4851 c += 2;
4852 }
4853 else
4854 {
4855 fprintf(outfile, "\\x%02x ", i);
4856 c += 5;
4857 }
4858 }
4859 }
4860 fprintf(outfile, "\n");
4861 }
4862
4863 if (last_ctype != 0)
4864 {
4865 const char *caseless =
4866 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4867 "" : " (caseless)";
4868 if (PRINTOK(last_cunit))
4869 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4870 else
4871 {
4872 fprintf(outfile, "Last code unit = ");
4873 pchar(last_cunit, FALSE, outfile);
4874 fprintf(outfile, "%s\n", caseless);
4875 }
4876 }
4877
4878 if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4879 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4880
4881 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4882 {
4883 #ifdef SUPPORT_JIT
4884 if (FLD(compiled_code, executable_jit) != NULL)
4885 fprintf(outfile, "JIT compilation was successful\n");
4886 else
4887 {
4888 fprintf(outfile, "JIT compilation was not successful");
4889 if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4890 return PR_ABEND;
4891 fprintf(outfile, "\n");
4892 }
4893 #else
4894 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4895 #endif
4896 }
4897 }
4898
4899 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4900 {
4901 int errorcode;
4902 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4903 if (errorcode != 0)
4904 {
4905 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4906 if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4907 return PR_ABEND;
4908 return PR_SKIP;
4909 }
4910 }
4911
4912 return PR_OK;
4913 }
4914
4915
4916
4917 /*************************************************
4918 * Handle serialization error *
4919 *************************************************/
4920
4921 /* Print an error message after a serialization failure.
4922
4923 Arguments:
4924 rc the error code
4925 msg an initial message for what failed
4926
4927 Returns: FALSE if print_error_message() fails
4928 */
4929
4930 static BOOL
serial_error(int rc,const char * msg)4931 serial_error(int rc, const char *msg)
4932 {
4933 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4934 return print_error_message(rc, "", "\n");
4935 }
4936
4937
4938
4939 /*************************************************
4940 * Open file for save/load commands *
4941 *************************************************/
4942
4943 /* This function decodes the file name and opens the file.
4944
4945 Arguments:
4946 buffptr point after the #command
4947 mode open mode
4948 fptr points to the FILE variable
4949 name name of # command
4950
4951 Returns: PR_OK or PR_ABEND
4952 */
4953
4954 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4955 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4956 {
4957 char *endf;
4958 char *filename = (char *)buffptr;
4959 while (isspace(*filename)) filename++;
4960 endf = filename + strlen8(filename);
4961 while (endf > filename && isspace(endf[-1])) endf--;
4962
4963 if (endf == filename)
4964 {
4965 fprintf(outfile, "** File name expected after %s\n", name);
4966 return PR_ABEND;
4967 }
4968
4969 *endf = 0;
4970 *fptr = fopen((const char *)filename, mode);
4971 if (*fptr == NULL)
4972 {
4973 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4974 return PR_ABEND;
4975 }
4976
4977 return PR_OK;
4978 }
4979
4980
4981
4982 /*************************************************
4983 * Process command line *
4984 *************************************************/
4985
4986 /* This function is called for lines beginning with # and a character that is
4987 not ! or whitespace, when encountered between tests, which means that there is
4988 no compiled pattern (compiled_code is NULL). The line is in buffer.
4989
4990 Arguments: none
4991
4992 Returns: PR_OK continue processing next line
4993 PR_SKIP skip to a blank line
4994 PR_ABEND abort the pcre2test run
4995 */
4996
4997 static int
process_command(void)4998 process_command(void)
4999 {
5000 FILE *f;
5001 PCRE2_SIZE serial_size;
5002 size_t i;
5003 int rc, cmd, cmdlen, yield;
5004 uint16_t first_listed_newline;
5005 const char *cmdname;
5006 uint8_t *argptr, *serial;
5007
5008 yield = PR_OK;
5009 cmd = CMD_UNKNOWN;
5010 cmdlen = 0;
5011
5012 for (i = 0; i < cmdlistcount; i++)
5013 {
5014 cmdname = cmdlist[i].name;
5015 cmdlen = strlen(cmdname);
5016 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
5017 isspace(buffer[cmdlen+1]))
5018 {
5019 cmd = cmdlist[i].value;
5020 break;
5021 }
5022 }
5023
5024 argptr = buffer + cmdlen + 1;
5025
5026 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
5027 {
5028 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
5029 return PR_ABEND;
5030 }
5031
5032 switch(cmd)
5033 {
5034 case CMD_UNKNOWN:
5035 fprintf(outfile, "** Unknown command: %s", buffer);
5036 break;
5037
5038 case CMD_FORBID_UTF:
5039 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
5040 break;
5041
5042 case CMD_PERLTEST:
5043 restrict_for_perl_test = TRUE;
5044 break;
5045
5046 /* Set default pattern modifiers */
5047
5048 case CMD_PATTERN:
5049 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
5050 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
5051 def_patctl.jit = JIT_DEFAULT;
5052 break;
5053
5054 /* Set default subject modifiers */
5055
5056 case CMD_SUBJECT:
5057 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
5058 break;
5059
5060 /* Check the default newline, and if not one of those listed, set up the
5061 first one to be forced. An empty list unsets. */
5062
5063 case CMD_NEWLINE_DEFAULT:
5064 local_newline_default = 0; /* Unset */
5065 first_listed_newline = 0;
5066 for (;;)
5067 {
5068 while (isspace(*argptr)) argptr++;
5069 if (*argptr == 0) break;
5070 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
5071 {
5072 size_t nlen = strlen(newlines[i]);
5073 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
5074 isspace(argptr[nlen]))
5075 {
5076 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
5077 if (first_listed_newline == 0) first_listed_newline = i;
5078 }
5079 }
5080 while (*argptr != 0 && !isspace(*argptr)) argptr++;
5081 }
5082 local_newline_default = first_listed_newline;
5083 break;
5084
5085 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
5086 the compiled pattern (e.g. to give information) are permitted. The default
5087 pattern modifiers are ignored. */
5088
5089 case CMD_POP:
5090 case CMD_POPCOPY:
5091 if (patstacknext <= 0)
5092 {
5093 fprintf(outfile, "** Can't pop off an empty stack\n");
5094 return PR_SKIP;
5095 }
5096 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
5097 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
5098 return PR_SKIP;
5099
5100 if (cmd == CMD_POP)
5101 {
5102 SET(compiled_code, patstack[--patstacknext]);
5103 }
5104 else
5105 {
5106 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
5107 }
5108
5109 if (pat_patctl.jit != 0)
5110 {
5111 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5112 }
5113 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5114 if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize();
5115 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5116 {
5117 rc = show_pattern_info();
5118 if (rc != PR_OK) return rc;
5119 }
5120 break;
5121
5122 /* Save the stack of compiled patterns to a file, then empty the stack. */
5123
5124 case CMD_SAVE:
5125 if (patstacknext <= 0)
5126 {
5127 fprintf(outfile, "** No stacked patterns to save\n");
5128 return PR_OK;
5129 }
5130
5131 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
5132 if (rc != PR_OK) return rc;
5133
5134 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
5135 general_context);
5136 if (rc < 0)
5137 {
5138 fclose(f);
5139 if (!serial_error(rc, "Serialization")) return PR_ABEND;
5140 break;
5141 }
5142
5143 /* Write the length at the start of the file to make it straightforward to
5144 get the right memory when re-loading. This saves having to read the file size
5145 in different operating systems. To allow for different endianness (even
5146 though reloading with the opposite endianness does not work), write the
5147 length byte-by-byte. */
5148
5149 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5150 if (fwrite(serial, 1, serial_size, f) != serial_size)
5151 {
5152 fprintf(outfile, "** Wrong return from fwrite()\n");
5153 fclose(f);
5154 return PR_ABEND;
5155 }
5156
5157 fclose(f);
5158 PCRE2_SERIALIZE_FREE(serial);
5159 while(patstacknext > 0)
5160 {
5161 SET(compiled_code, patstack[--patstacknext]);
5162 SUB1(pcre2_code_free, compiled_code);
5163 }
5164 SET(compiled_code, NULL);
5165 break;
5166
5167 /* Load a set of compiled patterns from a file onto the stack */
5168
5169 case CMD_LOAD:
5170 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5171 if (rc != PR_OK) return rc;
5172
5173 serial_size = 0;
5174 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5175
5176 serial = malloc(serial_size);
5177 if (serial == NULL)
5178 {
5179 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5180 serial_size);
5181 fclose(f);
5182 return PR_ABEND;
5183 }
5184
5185 i = fread(serial, 1, serial_size, f);
5186 fclose(f);
5187
5188 if (i != serial_size)
5189 {
5190 fprintf(outfile, "** Wrong return from fread()\n");
5191 yield = PR_ABEND;
5192 }
5193 else
5194 {
5195 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5196 if (rc < 0)
5197 {
5198 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5199 }
5200 else
5201 {
5202 if (rc + patstacknext > PATSTACKSIZE)
5203 {
5204 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5205 rc, (rc == 1)? "" : "s");
5206 rc = PATSTACKSIZE - patstacknext;
5207 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5208 (rc == 1)? "" : "s");
5209 }
5210 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5211 general_context);
5212 if (rc < 0)
5213 {
5214 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5215 }
5216 else patstacknext += rc;
5217 }
5218 }
5219
5220 free(serial);
5221 break;
5222
5223 /* Load a set of binary tables into tables3. */
5224
5225 case CMD_LOADTABLES:
5226 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5227 if (rc != PR_OK) return rc;
5228
5229 if (tables3 == NULL)
5230 {
5231 (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5232 tables3 = malloc(loadtables_length);
5233 }
5234
5235 if (tables3 == NULL)
5236 {
5237 fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5238 yield = PR_ABEND;
5239 }
5240 else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5241 {
5242 fprintf(outfile, "** Wrong return from fread()\n");
5243 yield = PR_ABEND;
5244 }
5245
5246 fclose(f);
5247 break;
5248 }
5249
5250 return yield;
5251 }
5252
5253
5254
5255 /*************************************************
5256 * Process pattern line *
5257 *************************************************/
5258
5259 /* This function is called when the input buffer contains the start of a
5260 pattern. The first character is known to be a valid delimiter. The pattern is
5261 read, modifiers are interpreted, and a suitable local context is set up for
5262 this test. The pattern is then compiled.
5263
5264 Arguments: none
5265
5266 Returns: PR_OK continue processing next line
5267 PR_SKIP skip to a blank line
5268 PR_ABEND abort the pcre2test run
5269 */
5270
5271 static int
process_pattern(void)5272 process_pattern(void)
5273 {
5274 BOOL utf;
5275 uint32_t k;
5276 uint8_t *p = buffer;
5277 unsigned int delimiter = *p++;
5278 int errorcode;
5279 void *use_pat_context;
5280 void *use_pbuffer = NULL;
5281 uint32_t use_forbid_utf = forbid_utf;
5282 PCRE2_SIZE patlen;
5283 PCRE2_SIZE valgrind_access_length;
5284 PCRE2_SIZE erroroffset;
5285
5286 /* The perltest.sh script supports only / as a delimiter. */
5287
5288 if (restrict_for_perl_test && delimiter != '/')
5289 {
5290 fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5291 return PR_ABEND;
5292 }
5293
5294 /* Initialize the context and pattern/data controls for this test from the
5295 defaults. */
5296
5297 PATCTXCPY(pat_context, default_pat_context);
5298 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5299
5300 /* Find the end of the pattern, reading more lines if necessary. */
5301
5302 for(;;)
5303 {
5304 while (*p != 0)
5305 {
5306 if (*p == '\\' && p[1] != 0) p++;
5307 else if (*p == delimiter) break;
5308 p++;
5309 }
5310 if (*p != 0) break;
5311 if ((p = extend_inputline(infile, p, " > ")) == NULL)
5312 {
5313 fprintf(outfile, "** Unexpected EOF\n");
5314 return PR_ABEND;
5315 }
5316 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5317 }
5318
5319 /* If the first character after the delimiter is backslash, make the pattern
5320 end with backslash. This is purely to provide a way of testing for the error
5321 message when a pattern ends with backslash. */
5322
5323 if (p[1] == '\\') *p++ = '\\';
5324
5325 /* Terminate the pattern at the delimiter, and compute the length. */
5326
5327 *p++ = 0;
5328 patlen = p - buffer - 2;
5329
5330 /* Look for modifiers and options after the final delimiter. */
5331
5332 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5333
5334 /* Note that the match_invalid_utf option also sets utf when passed to
5335 pcre2_compile(). */
5336
5337 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5338
5339 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5340 exclusive with the utf modifier. */
5341
5342 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5343 {
5344 if (test_mode == PCRE8_MODE)
5345 {
5346 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5347 return PR_SKIP;
5348 }
5349 if (utf)
5350 {
5351 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5352 return PR_SKIP;
5353 }
5354 }
5355
5356 /* The convert and posix modifiers are mutually exclusive. */
5357
5358 if (pat_patctl.convert_type != CONVERT_UNSET &&
5359 (pat_patctl.control & CTL_POSIX) != 0)
5360 {
5361 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5362 return PR_SKIP;
5363 }
5364
5365 /* Check for mutually exclusive control modifiers. At present, these are all in
5366 the first control word. */
5367
5368 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5369 {
5370 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5371 if (c != 0 && c != (c & (~c+1)))
5372 {
5373 show_controls(c, 0, "** Not allowed together:");
5374 fprintf(outfile, "\n");
5375 return PR_SKIP;
5376 }
5377 }
5378
5379 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5380 specified. */
5381
5382 if (pat_patctl.jit == 0 &&
5383 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5384 pat_patctl.jit = JIT_DEFAULT;
5385
5386 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5387 in callouts. Convert from hex if requested (literal strings in quotes may be
5388 present within the hexadecimal pairs). The result must necessarily be fewer
5389 characters so will always fit in pbuffer8. */
5390
5391 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5392 {
5393 uint8_t *pp, *pt;
5394 uint32_t c, d;
5395
5396 pt = pbuffer8;
5397 for (pp = buffer + 1; *pp != 0; pp++)
5398 {
5399 if (isspace(*pp)) continue;
5400 c = *pp++;
5401
5402 /* Handle a literal substring */
5403
5404 if (c == '\'' || c == '"')
5405 {
5406 uint8_t *pq = pp;
5407 for (;; pp++)
5408 {
5409 d = *pp;
5410 if (d == 0)
5411 {
5412 fprintf(outfile, "** Missing closing quote in hex pattern: "
5413 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5414 return PR_SKIP;
5415 }
5416 if (d == c) break;
5417 *pt++ = d;
5418 }
5419 }
5420
5421 /* Expect a hex pair */
5422
5423 else
5424 {
5425 if (!isxdigit(c))
5426 {
5427 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5428 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5429 return PR_SKIP;
5430 }
5431 if (*pp == 0)
5432 {
5433 fprintf(outfile, "** Odd number of digits in hex pattern\n");
5434 return PR_SKIP;
5435 }
5436 d = *pp;
5437 if (!isxdigit(d))
5438 {
5439 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5440 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5441 return PR_SKIP;
5442 }
5443 c = toupper(c);
5444 d = toupper(d);
5445 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5446 (isdigit(d)? (d - '0') : (d - 'A' + 10));
5447 }
5448 }
5449 *pt = 0;
5450 patlen = pt - pbuffer8;
5451 }
5452
5453 /* If not a hex string, process for repetition expansion if requested. */
5454
5455 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5456 {
5457 uint8_t *pp, *pt;
5458
5459 pt = pbuffer8;
5460 for (pp = buffer + 1; *pp != 0; pp++)
5461 {
5462 uint8_t *pc = pp;
5463 uint32_t count = 1;
5464 size_t length = 1;
5465
5466 /* Check for replication syntax; if not found, the defaults just set will
5467 prevail and one character will be copied. */
5468
5469 if (pp[0] == '\\' && pp[1] == '[')
5470 {
5471 uint8_t *pe;
5472 for (pe = pp + 2; *pe != 0; pe++)
5473 {
5474 if (pe[0] == ']' && pe[1] == '{')
5475 {
5476 uint32_t clen = pe - pc - 2;
5477 uint32_t i = 0;
5478 unsigned long uli;
5479 char *endptr;
5480
5481 pe += 2;
5482 uli = strtoul((const char *)pe, &endptr, 10);
5483 if (U32OVERFLOW(uli))
5484 {
5485 fprintf(outfile, "** Pattern repeat count too large\n");
5486 return PR_SKIP;
5487 }
5488
5489 i = (uint32_t)uli;
5490 pe = (uint8_t *)endptr;
5491 if (*pe == '}')
5492 {
5493 if (i == 0)
5494 {
5495 fprintf(outfile, "** Zero repeat not allowed\n");
5496 return PR_SKIP;
5497 }
5498 pc += 2;
5499 count = i;
5500 length = clen;
5501 pp = pe;
5502 break;
5503 }
5504 }
5505 }
5506 }
5507
5508 /* Add to output. If the buffer is too small expand it. The function for
5509 expanding buffers always keeps buffer and pbuffer8 in step as far as their
5510 size goes. */
5511
5512 while (pt + count * length > pbuffer8 + pbuffer8_size)
5513 {
5514 size_t pc_offset = pc - buffer;
5515 size_t pp_offset = pp - buffer;
5516 size_t pt_offset = pt - pbuffer8;
5517 expand_input_buffers();
5518 pc = buffer + pc_offset;
5519 pp = buffer + pp_offset;
5520 pt = pbuffer8 + pt_offset;
5521 }
5522
5523 for (; count > 0; count--)
5524 {
5525 memcpy(pt, pc, length);
5526 pt += length;
5527 }
5528 }
5529
5530 *pt = 0;
5531 patlen = pt - pbuffer8;
5532
5533 if ((pat_patctl.control & CTL_INFO) != 0)
5534 fprintf(outfile, "Expanded: %s\n", pbuffer8);
5535 }
5536
5537 /* Neither hex nor expanded, just copy the input verbatim. */
5538
5539 else
5540 {
5541 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5542 }
5543
5544 /* Sort out character tables */
5545
5546 if (pat_patctl.locale[0] != 0)
5547 {
5548 if (pat_patctl.tables_id != 0)
5549 {
5550 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5551 return PR_SKIP;
5552 }
5553 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5554 {
5555 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5556 return PR_SKIP;
5557 }
5558 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5559 {
5560 strcpy((char *)locale_name, (char *)pat_patctl.locale);
5561 if (locale_tables != NULL)
5562 {
5563 PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables);
5564 }
5565 PCRE2_MAKETABLES(locale_tables, general_context);
5566 }
5567 use_tables = locale_tables;
5568 }
5569
5570 else switch (pat_patctl.tables_id)
5571 {
5572 case 0: use_tables = NULL; break;
5573 case 1: use_tables = tables1; break;
5574 case 2: use_tables = tables2; break;
5575
5576 case 3:
5577 if (tables3 == NULL)
5578 {
5579 fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5580 "been loaded\n");
5581 return PR_SKIP;
5582 }
5583 use_tables = tables3;
5584 break;
5585
5586 default:
5587 fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5588 return PR_SKIP;
5589 }
5590
5591 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5592
5593 /* Set up for the stackguard test. */
5594
5595 if (pat_patctl.stackguard_test != 0)
5596 {
5597 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5598 }
5599
5600 /* Handle compiling via the POSIX interface, which doesn't support the
5601 timing, showing, or debugging options, nor the ability to pass over
5602 local character tables. Neither does it have 16-bit or 32-bit support. */
5603
5604 if ((pat_patctl.control & CTL_POSIX) != 0)
5605 {
5606 #ifdef SUPPORT_PCRE2_8
5607 int rc;
5608 int cflags = 0;
5609 const char *msg = "** Ignored with POSIX interface:";
5610 #endif
5611
5612 if (test_mode != PCRE8_MODE)
5613 {
5614 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5615 return PR_SKIP;
5616 }
5617
5618 #ifdef SUPPORT_PCRE2_8
5619 /* Check for features that the POSIX interface does not support. */
5620
5621 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5622 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5623 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5624 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5625 if (timeit > 0) prmsg(&msg, "timing");
5626 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5627
5628 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5629 {
5630 show_compile_options(
5631 pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
5632 msg, "");
5633 msg = "";
5634 }
5635
5636 if ((FLD(pat_context, extra_options) &
5637 (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
5638 {
5639 show_compile_extra_options(
5640 FLD(pat_context, extra_options) &
5641 (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
5642 msg = "";
5643 }
5644
5645 if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
5646 (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
5647 {
5648 show_controls(
5649 pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
5650 pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
5651 msg);
5652 msg = "";
5653
5654 /* Remove ignored options so as not to get a repeated message for those
5655 that are actually subject controls. */
5656
5657 pat_patctl.control &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS);
5658 pat_patctl.control2 &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS2);
5659 }
5660
5661 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5662 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5663 prmsg(&msg, "max_pattern_length");
5664 if (FLD(pat_context, max_pattern_compiled_length) != PCRE2_UNSET)
5665 prmsg(&msg, "max_pattern_compiled_length");
5666 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5667 prmsg(&msg, "parens_nest_limit");
5668
5669 if (msg[0] == 0) fprintf(outfile, "\n");
5670
5671 /* Translate PCRE2 options to POSIX options and then compile. */
5672
5673 if (utf) cflags |= REG_UTF;
5674 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5675 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5676 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5677 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5678 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5679 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5680 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5681
5682 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5683 {
5684 preg.re_endp = (char *)pbuffer8 + patlen;
5685 cflags |= REG_PEND;
5686 }
5687
5688 rc = regcomp(&preg, (char *)pbuffer8, cflags);
5689
5690 /* Compiling failed */
5691
5692 if (rc != 0)
5693 {
5694 size_t bsize, usize;
5695 int psize;
5696
5697 preg.re_pcre2_code = NULL; /* In case something was left in there */
5698 preg.re_match_data = NULL;
5699
5700 bsize = (pat_patctl.regerror_buffsize != 0)?
5701 pat_patctl.regerror_buffsize : pbuffer8_size;
5702 if (bsize + 8 < pbuffer8_size)
5703 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5704 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5705
5706 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5707 versions of snprintf() put a zero byte at the end, but others do not.
5708 Therefore, we print a maximum of one less than the size of the buffer. */
5709
5710 psize = (int)bsize - 1;
5711 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5712 if (usize > bsize)
5713 {
5714 fprintf(outfile, "** regerror() message truncated\n");
5715 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5716 fprintf(outfile, "** regerror() buffer overflow\n");
5717 }
5718 return PR_SKIP;
5719 }
5720
5721 /* Compiling succeeded. Check that the values in the preg block are sensible.
5722 It can happen that pcre2test is accidentally linked with a different POSIX
5723 library which succeeds, but of course puts different things into preg. In
5724 this situation, calling regfree() may cause a segfault (or invalid free() in
5725 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5726 calling of regfree() on exit. */
5727
5728 if (preg.re_pcre2_code == NULL ||
5729 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5730 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5731 preg.re_match_data == NULL ||
5732 preg.re_cflags != cflags)
5733 {
5734 fprintf(outfile,
5735 "** The regcomp() function returned zero (success), but the values set\n"
5736 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5737 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5738 "** some other POSIX regex library.\n**\n");
5739 preg.re_pcre2_code = NULL;
5740 return PR_ABEND;
5741 }
5742
5743 return PR_OK;
5744 #endif /* SUPPORT_PCRE2_8 */
5745 }
5746
5747 /* Handle compiling via the native interface. Controls that act later are
5748 ignored with "push". Replacements are locked out. */
5749
5750 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5751 {
5752 if (pat_patctl.replacement[0] != 0)
5753 {
5754 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5755 return PR_OK;
5756 }
5757 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5758 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5759 {
5760 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5761 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5762 "** Ignored when compiled pattern is stacked with 'push':");
5763 fprintf(outfile, "\n");
5764 }
5765 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5766 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5767 {
5768 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5769 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5770 "** Applies only to compile when pattern is stacked with 'push':");
5771 fprintf(outfile, "\n");
5772 }
5773 }
5774
5775 /* Convert the input in non-8-bit modes. */
5776
5777 errorcode = 0;
5778
5779 #ifdef SUPPORT_PCRE2_16
5780 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5781 #endif
5782
5783 #ifdef SUPPORT_PCRE2_32
5784 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5785 #endif
5786
5787 switch(errorcode)
5788 {
5789 case -1:
5790 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5791 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5792 return PR_SKIP;
5793
5794 case -2:
5795 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5796 "cannot be converted to UTF\n");
5797 return PR_SKIP;
5798
5799 case -3:
5800 fprintf(outfile, "** Failed: character value greater than 0xffff "
5801 "cannot be converted to 16-bit in non-UTF mode\n");
5802 return PR_SKIP;
5803
5804 default:
5805 break;
5806 }
5807
5808 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5809 patlen. If it is to be converted, copy the result back afterwards so that it
5810 ends up back in the usual place. */
5811
5812 if (pat_patctl.convert_type != CONVERT_UNSET)
5813 {
5814 int rc;
5815 int convert_return = PR_OK;
5816 uint32_t convert_options = pat_patctl.convert_type;
5817 void *converted_pattern;
5818 PCRE2_SIZE converted_length;
5819
5820 if (pat_patctl.convert_length != 0)
5821 {
5822 converted_length = pat_patctl.convert_length;
5823 converted_pattern = malloc(converted_length * code_unit_size);
5824 if (converted_pattern == NULL)
5825 {
5826 fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5827 return PR_SKIP;
5828 }
5829 }
5830 else converted_pattern = NULL; /* Let the library allocate */
5831
5832 if (utf) convert_options |= PCRE2_CONVERT_UTF;
5833 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5834 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5835
5836 CONCTXCPY(con_context, default_con_context);
5837
5838 if (pat_patctl.convert_glob_escape != 0)
5839 {
5840 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5841 pat_patctl.convert_glob_escape;
5842 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5843 if (rc != 0)
5844 {
5845 fprintf(outfile, "** Invalid glob escape '%c'\n",
5846 pat_patctl.convert_glob_escape);
5847 convert_return = PR_SKIP;
5848 goto CONVERT_FINISH;
5849 }
5850 }
5851
5852 if (pat_patctl.convert_glob_separator != 0)
5853 {
5854 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5855 if (rc != 0)
5856 {
5857 fprintf(outfile, "** Invalid glob separator '%c'\n",
5858 pat_patctl.convert_glob_separator);
5859 convert_return = PR_SKIP;
5860 goto CONVERT_FINISH;
5861 }
5862 }
5863
5864 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5865 &converted_pattern, &converted_length, con_context);
5866
5867 if (rc != 0)
5868 {
5869 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5870 converted_length);
5871 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5872 }
5873
5874 /* Output the converted pattern, then copy it. */
5875
5876 else
5877 {
5878 BOOL toolong;
5879 PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5880 fprintf(outfile, "\n");
5881
5882 if (test_mode == PCRE8_MODE)
5883 toolong = (converted_length + 1 > pbuffer8_size);
5884 else if (test_mode == PCRE16_MODE)
5885 toolong = (2*(converted_length + 1) > pbuffer8_size);
5886 else /* 32-bit */
5887 toolong = (4*(converted_length + 1) > pbuffer8_size);
5888
5889 if (toolong)
5890 {
5891 fprintf(outfile, "** Pattern conversion is too long for the buffer\n");
5892 convert_return = PR_SKIP;
5893 }
5894 else
5895 {
5896 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5897 patlen = converted_length;
5898 }
5899 }
5900
5901 /* Free the converted pattern. */
5902
5903 CONVERT_FINISH:
5904 if (pat_patctl.convert_length != 0)
5905 free(converted_pattern);
5906 else
5907 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5908
5909 /* Return if conversion was unsuccessful. */
5910
5911 if (convert_return != PR_OK) return convert_return;
5912 }
5913
5914 /* By default we pass a zero-terminated pattern, but a length is passed if
5915 "use_length" was specified or this is a hex pattern (which might contain binary
5916 zeros). When valgrind is supported, arrange for the unused part of the buffer
5917 to be marked as no access. */
5918
5919 valgrind_access_length = patlen;
5920 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5921 {
5922 patlen = PCRE2_ZERO_TERMINATED;
5923 valgrind_access_length += 1; /* For the terminating zero */
5924 }
5925
5926 #ifdef SUPPORT_VALGRIND
5927 #ifdef SUPPORT_PCRE2_8
5928 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5929 {
5930 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5931 pbuffer8_size - valgrind_access_length);
5932 }
5933 #endif
5934 #ifdef SUPPORT_PCRE2_16
5935 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5936 {
5937 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5938 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5939 }
5940 #endif
5941 #ifdef SUPPORT_PCRE2_32
5942 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5943 {
5944 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5945 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5946 }
5947 #endif
5948 #else /* Valgrind not supported */
5949 (void)valgrind_access_length; /* Avoid compiler warning */
5950 #endif
5951
5952 /* If #newline_default has been used and the library was not compiled with an
5953 appropriate default newline setting, local_newline_default will be non-zero. We
5954 use this if there is no explicit newline modifier. */
5955
5956 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5957 {
5958 SETFLD(pat_context, newline_convention, local_newline_default);
5959 }
5960
5961 /* The null_context modifier is used to test calling pcre2_compile() with a
5962 NULL context. */
5963
5964 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5965 NULL : PTR(pat_context);
5966
5967 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5968 and PCRE2_NEVER_UCP are invalid with it. */
5969
5970 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5971
5972 /* Set use_pbuffer to the input buffer, or leave it as NULL if requested. */
5973
5974 if ((pat_patctl.control2 & CTL2_NULL_PATTERN) == 0)
5975 {
5976 #ifdef SUPPORT_PCRE2_8
5977 if (test_mode == PCRE8_MODE) use_pbuffer = pbuffer8;
5978 #endif
5979 #ifdef SUPPORT_PCRE2_16
5980 if (test_mode == PCRE16_MODE) use_pbuffer = pbuffer16;
5981 #endif
5982 #ifdef SUPPORT_PCRE2_32
5983 if (test_mode == PCRE32_MODE) use_pbuffer = pbuffer32;
5984 #endif
5985 }
5986
5987 /* Compile many times when timing. */
5988
5989 if (timeit > 0)
5990 {
5991 int i;
5992 clock_t time_taken = 0;
5993 for (i = 0; i < timeit; i++)
5994 {
5995 clock_t start_time = clock();
5996 PCRE2_COMPILE(compiled_code, use_pbuffer, patlen,
5997 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5998 use_pat_context);
5999 time_taken += clock() - start_time;
6000 if (TEST(compiled_code, !=, NULL))
6001 { SUB1(pcre2_code_free, compiled_code); }
6002 }
6003 total_compile_time += time_taken;
6004 fprintf(outfile, "Compile time %8.4f microseconds\n",
6005 ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit);
6006 }
6007
6008 /* A final compile that is used "for real". */
6009
6010 PCRE2_COMPILE(compiled_code, use_pbuffer, patlen,
6011 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, use_pat_context);
6012
6013 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
6014 and 32-bit buffers can be marked completely undefined, but we must leave the
6015 pattern in the 8-bit buffer defined because it may be read from a callout
6016 during matching. */
6017
6018 #ifdef SUPPORT_VALGRIND
6019 #ifdef SUPPORT_PCRE2_8
6020 if (test_mode == PCRE8_MODE)
6021 {
6022 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
6023 pbuffer8_size - valgrind_access_length);
6024 }
6025 #endif
6026 #ifdef SUPPORT_PCRE2_16
6027 if (test_mode == PCRE16_MODE)
6028 {
6029 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
6030 }
6031 #endif
6032 #ifdef SUPPORT_PCRE2_32
6033 if (test_mode == PCRE32_MODE)
6034 {
6035 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
6036 }
6037 #endif
6038 #endif
6039
6040 /* Call the JIT compiler if requested. When timing, we must free and recompile
6041 the pattern each time because that is the only way to free the JIT compiled
6042 code. We know that compilation will always succeed. */
6043
6044 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
6045 {
6046 if (timeit > 0)
6047 {
6048 int i;
6049 clock_t time_taken = 0;
6050
6051 for (i = 0; i < timeit; i++)
6052 {
6053 clock_t start_time;
6054 SUB1(pcre2_code_free, compiled_code);
6055 PCRE2_COMPILE(compiled_code, use_pbuffer, patlen,
6056 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
6057 use_pat_context);
6058 start_time = clock();
6059 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
6060 time_taken += clock() - start_time;
6061 if (jitrc != 0)
6062 {
6063 fprintf(outfile, "JIT compilation was not successful");
6064 if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND;
6065 break;
6066 }
6067 }
6068 total_jit_compile_time += time_taken;
6069 if (jitrc == 0)
6070 fprintf(outfile, "JIT compile %8.4f microseconds\n",
6071 ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit);
6072 }
6073 else
6074 {
6075 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
6076 if (jitrc != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
6077 {
6078 fprintf(outfile, "JIT compilation was not successful");
6079 if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND;
6080 }
6081 }
6082 }
6083
6084 /* Compilation failed; go back for another re, skipping to blank line
6085 if non-interactive. */
6086
6087 if (TEST(compiled_code, ==, NULL))
6088 {
6089 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
6090 (int)erroroffset);
6091 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
6092 return PR_SKIP;
6093 }
6094
6095 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
6096 locked out at compile time, but we must also check for occurrences of \P, \p,
6097 and \X, which are only supported when Unicode is supported. */
6098
6099 if (forbid_utf != 0)
6100 {
6101 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
6102 {
6103 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
6104 "#forbid_utf command\n");
6105 return PR_SKIP;
6106 }
6107 }
6108
6109 /* Remember the maximum lookbehind, for partial matching. */
6110
6111 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
6112 return PR_ABEND;
6113
6114 /* Remember the number of captures. */
6115
6116 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
6117 return PR_ABEND;
6118
6119 /* If an explicit newline modifier was given, set the information flag in the
6120 pattern so that it is preserved over push/pop. */
6121
6122 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
6123 {
6124 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
6125 }
6126
6127 /* Output code size and other information if requested. */
6128
6129 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
6130 if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize();
6131 if ((pat_patctl.control & CTL_ANYINFO) != 0)
6132 {
6133 int rc = show_pattern_info();
6134 if (rc != PR_OK) return rc;
6135 }
6136
6137 /* The "push" control requests that the compiled pattern be remembered on a
6138 stack. This is mainly for testing the serialization functionality. */
6139
6140 if ((pat_patctl.control & CTL_PUSH) != 0)
6141 {
6142 if (patstacknext >= PATSTACKSIZE)
6143 {
6144 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
6145 return PR_ABEND;
6146 }
6147 patstack[patstacknext++] = PTR(compiled_code);
6148 SET(compiled_code, NULL);
6149 }
6150
6151 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
6152 copy of the pattern, the latter with a copy of its character tables. This tests
6153 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
6154
6155 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
6156 {
6157 if (patstacknext >= PATSTACKSIZE)
6158 {
6159 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
6160 return PR_ABEND;
6161 }
6162 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
6163 {
6164 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
6165 }
6166 else
6167 {
6168 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
6169 compiled_code); }
6170 }
6171
6172 return PR_OK;
6173 }
6174
6175
6176
6177 /*************************************************
6178 * Check heap, match or depth limit *
6179 *************************************************/
6180
6181 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
6182 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
6183
6184 Arguments:
6185 pp the subject string
6186 ulen length of subject or PCRE2_ZERO_TERMINATED
6187 errnumber defines which limit to test
6188 msg string to include in final message
6189
6190 Returns: the return from the final match function call
6191 */
6192
6193 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)6194 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
6195 {
6196 int capcount;
6197 uint32_t min = 0;
6198 uint32_t mid = 64;
6199 uint32_t max = UINT32_MAX;
6200
6201 PCRE2_SET_MATCH_LIMIT(dat_context, max);
6202 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6203 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6204
6205 for (;;)
6206 {
6207 uint32_t stack_start = 0;
6208
6209 /* If we are checking the heap limit, free any frames vector that is cached
6210 in the match_data so we always start without one. */
6211
6212 if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6213 {
6214 PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6215
6216 #ifdef SUPPORT_PCRE2_8
6217 if (code_unit_size == 1)
6218 {
6219 match_data8->memctl.free(match_data8->heapframes,
6220 match_data8->memctl.memory_data);
6221 match_data8->heapframes = NULL;
6222 match_data8->heapframes_size = 0;
6223 }
6224 #endif
6225
6226 #ifdef SUPPORT_PCRE2_16
6227 if (code_unit_size == 2)
6228 {
6229 match_data16->memctl.free(match_data16->heapframes,
6230 match_data16->memctl.memory_data);
6231 match_data16->heapframes = NULL;
6232 match_data16->heapframes_size = 0;
6233 }
6234 #endif
6235
6236 #ifdef SUPPORT_PCRE2_32
6237 if (code_unit_size == 4)
6238 {
6239 match_data32->memctl.free(match_data32->heapframes,
6240 match_data32->memctl.memory_data);
6241 match_data32->heapframes = NULL;
6242 match_data32->heapframes_size = 0;
6243 }
6244 #endif
6245 }
6246
6247 /* No need to mess with the frames vector for match or depth limits. */
6248
6249 else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6250 {
6251 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6252 }
6253 else
6254 {
6255 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6256 }
6257
6258 /* Do the appropriate match */
6259
6260 if ((dat_datctl.control & CTL_DFA) != 0)
6261 {
6262 stack_start = DFA_START_RWS_SIZE/1024;
6263 if (dfa_workspace == NULL)
6264 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6265 if (dfa_matched++ == 0)
6266 dfa_workspace[0] = -1; /* To catch bad restart */
6267 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6268 dat_datctl.options, match_data,
6269 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6270 }
6271
6272 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6273 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6274 dat_datctl.options, match_data, PTR(dat_context));
6275
6276 else
6277 {
6278 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6279 dat_datctl.options, match_data, PTR(dat_context));
6280 }
6281
6282 if (capcount == errnumber)
6283 {
6284 if ((mid & 0x80000000u) != 0)
6285 {
6286 fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6287 "restriction\n", msg);
6288 break;
6289 }
6290
6291 min = mid;
6292 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6293 }
6294 else if (capcount >= 0 ||
6295 capcount == PCRE2_ERROR_NOMATCH ||
6296 capcount == PCRE2_ERROR_PARTIAL)
6297 {
6298 /* If we've not hit the error with a heap limit less than the size of the
6299 initial stack frame vector (for pcre2_match()) or the initial stack
6300 workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6301 the minimum limit is zero; there's no need to go on. The other limits are
6302 always greater than zero. */
6303
6304 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6305 {
6306 fprintf(outfile, "Minimum %s limit = 0\n", msg);
6307 break;
6308 }
6309 if (mid == min + 1)
6310 {
6311 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6312 break;
6313 }
6314 max = mid;
6315 mid = (min + max)/2;
6316 }
6317 else break; /* Some other error */
6318 }
6319
6320 return capcount;
6321 }
6322
6323
6324
6325 /*************************************************
6326 * Substitute callout function *
6327 *************************************************/
6328
6329 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6330 Print out the data that is passed back. The substitute callout block is
6331 identical for all code unit widths, so we just pick one.
6332
6333 Arguments:
6334 scb pointer to substitute callout block
6335 data_ptr callout data
6336
6337 Returns: nothing
6338 */
6339
6340 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6341 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6342 void *data_ptr)
6343 {
6344 int yield = 0;
6345 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6346 (void)data_ptr; /* Not used */
6347
6348 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6349 scb->subscount, scb->oveccount,
6350 scb->ovector[0], scb->ovector[1]);
6351
6352 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6353 utf, outfile);
6354
6355 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6356 scb->output_offsets[0], scb->output_offsets[1]);
6357
6358 PCHARSV(scb->output, scb->output_offsets[0],
6359 scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6360
6361 if (scb->subscount == dat_datctl.substitute_stop)
6362 {
6363 yield = -1;
6364 fprintf(outfile, " STOPPED");
6365 }
6366 else if (scb->subscount == dat_datctl.substitute_skip)
6367 {
6368 yield = +1;
6369 fprintf(outfile, " SKIPPED");
6370 }
6371
6372 fprintf(outfile, "\"\n");
6373 return yield;
6374 }
6375
6376
6377 /*************************************************
6378 * Callout function *
6379 *************************************************/
6380
6381 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6382 we are in the match (unless suppressed). Yield zero unless more callouts than
6383 the fail count, or the callout data is not zero. The only differences in the
6384 callout block for different code unit widths are that the pointers to the
6385 subject, the most recent MARK, and a callout argument string point to strings
6386 of the appropriate width. Casts can be used to deal with this.
6387
6388 Arguments:
6389 cb a pointer to a callout block
6390 callout_data_ptr the provided callout data
6391
6392 Returns: 0 or 1 or an error, as determined by settings
6393 */
6394
6395 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6396 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6397 {
6398 FILE *f, *fdefault;
6399 uint32_t i, pre_start, post_start, subject_length;
6400 PCRE2_SIZE current_position;
6401 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6402 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6403 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6404
6405 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6406 happens only once in simple cases, but we want to repeat after any additional
6407 output caused by CALLOUT_EXTRA. */
6408
6409 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6410 NULL : outfile;
6411
6412 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6413 {
6414 f = outfile;
6415 switch (cb->callout_flags)
6416 {
6417 case PCRE2_CALLOUT_BACKTRACK:
6418 fprintf(f, "Backtrack\n");
6419 break;
6420
6421 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6422 fprintf(f, "Backtrack\nNo other matching paths\n");
6423 /* Fall through */
6424
6425 case PCRE2_CALLOUT_STARTMATCH:
6426 fprintf(f, "New match attempt\n");
6427 break;
6428
6429 default:
6430 f = fdefault;
6431 break;
6432 }
6433 }
6434 else f = fdefault;
6435
6436 /* For a callout with a string argument, show the string first because there
6437 isn't a tidy way to fit it in the rest of the data. */
6438
6439 if (cb->callout_string != NULL)
6440 {
6441 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6442 fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6443 cb->callout_string_offset, delimiter);
6444 PCHARSV(cb->callout_string, 0,
6445 cb->callout_string_length, utf, outfile);
6446 for (i = 0; callout_start_delims[i] != 0; i++)
6447 if (delimiter == callout_start_delims[i])
6448 {
6449 delimiter = callout_end_delims[i];
6450 break;
6451 }
6452 fprintf(outfile, "%c", delimiter);
6453 if (!callout_capture) fprintf(outfile, "\n");
6454 }
6455
6456 /* Show captured strings if required */
6457
6458 if (callout_capture)
6459 {
6460 if (cb->callout_string == NULL)
6461 fprintf(outfile, "Callout %d:", cb->callout_number);
6462 fprintf(outfile, " last capture = %d\n", cb->capture_last);
6463 for (i = 2; i < cb->capture_top * 2; i += 2)
6464 {
6465 fprintf(outfile, "%2d: ", i/2);
6466 if (cb->offset_vector[i] == PCRE2_UNSET)
6467 fprintf(outfile, "<unset>");
6468 else
6469 {
6470 PCHARSV(cb->subject, cb->offset_vector[i],
6471 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6472 }
6473 fprintf(outfile, "\n");
6474 }
6475 }
6476
6477 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6478 non-printing characters), the first time, or if giving full details. On
6479 subsequent calls in the same match, we use PCHARS() just to find the printed
6480 lengths of the substrings. */
6481
6482 if (callout_where)
6483 {
6484 if (f != NULL) fprintf(f, "--->");
6485
6486 /* The subject before the match start. */
6487
6488 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6489
6490 /* If a lookbehind is involved, the current position may be earlier than the
6491 match start. If so, use the match start instead. */
6492
6493 current_position = (cb->current_position >= cb->start_match)?
6494 cb->current_position : cb->start_match;
6495
6496 /* The subject between the match start and the current position. */
6497
6498 PCHARS(post_start, cb->subject, cb->start_match,
6499 current_position - cb->start_match, utf, f);
6500
6501 /* Print from the current position to the end. */
6502
6503 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6504 utf, f);
6505
6506 /* Calculate the total subject printed length (no print). */
6507
6508 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6509
6510 if (f != NULL) fprintf(f, "\n");
6511
6512 /* For automatic callouts, show the pattern offset. Otherwise, for a
6513 numerical callout whose number has not already been shown with captured
6514 strings, show the number here. A callout with a string argument has been
6515 displayed above. */
6516
6517 if (cb->callout_number == 255)
6518 {
6519 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6520 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
6521 }
6522 else
6523 {
6524 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
6525 else fprintf(outfile, "%3d ", cb->callout_number);
6526 }
6527
6528 /* Now show position indicators */
6529
6530 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6531 fprintf(outfile, "^");
6532
6533 if (post_start > 0)
6534 {
6535 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6536 fprintf(outfile, "^");
6537 }
6538
6539 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6540 fprintf(outfile, " ");
6541
6542 if (cb->next_item_length != 0)
6543 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6544 pbuffer8 + cb->pattern_position);
6545 else
6546 fprintf(outfile, "End of pattern");
6547
6548 fprintf(outfile, "\n");
6549 }
6550
6551 first_callout = FALSE;
6552
6553 /* Show any mark info */
6554
6555 if (cb->mark != last_callout_mark)
6556 {
6557 if (cb->mark == NULL)
6558 fprintf(outfile, "Latest Mark: <unset>\n");
6559 else
6560 {
6561 fprintf(outfile, "Latest Mark: ");
6562 PCHARSV(cb->mark, -1, -1, utf, outfile);
6563 putc('\n', outfile);
6564 }
6565 last_callout_mark = cb->mark;
6566 }
6567
6568 /* Show callout data */
6569
6570 if (callout_data_ptr != NULL)
6571 {
6572 int callout_data = *((int32_t *)callout_data_ptr);
6573 if (callout_data != 0)
6574 {
6575 fprintf(outfile, "Callout data = %d\n", callout_data);
6576 return callout_data;
6577 }
6578 }
6579
6580 /* Keep count and give the appropriate return code */
6581
6582 callout_count++;
6583
6584 if (cb->callout_number == dat_datctl.cerror[0] &&
6585 callout_count >= dat_datctl.cerror[1])
6586 return PCRE2_ERROR_CALLOUT;
6587
6588 if (cb->callout_number == dat_datctl.cfail[0] &&
6589 callout_count >= dat_datctl.cfail[1])
6590 return 1;
6591
6592 return 0;
6593 }
6594
6595
6596
6597 /*************************************************
6598 * Handle *MARK and copy/get tests *
6599 *************************************************/
6600
6601 /* This function is called after complete and partial matches. It runs the
6602 tests for substring extraction.
6603
6604 Arguments:
6605 utf TRUE for utf
6606 capcount return from pcre2_match()
6607
6608 Returns: FALSE if print_error_message() fails
6609 */
6610
6611 static BOOL
copy_and_get(BOOL utf,int capcount)6612 copy_and_get(BOOL utf, int capcount)
6613 {
6614 int i;
6615 uint8_t *nptr;
6616
6617 /* Test copy strings by number */
6618
6619 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6620 {
6621 int rc;
6622 PCRE2_SIZE length, length2;
6623 uint32_t copybuffer[256];
6624 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6625 length = sizeof(copybuffer)/code_unit_size;
6626 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6627 if (rc < 0)
6628 {
6629 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6630 if (!print_error_message(rc, "", "\n")) return FALSE;
6631 }
6632 else
6633 {
6634 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6635 if (rc < 0)
6636 {
6637 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6638 if (!print_error_message(rc, "", "\n")) return FALSE;
6639 }
6640 else if (length2 != length)
6641 {
6642 fprintf(outfile, "Mismatched substring lengths: %"
6643 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6644 }
6645 fprintf(outfile, "%2dC ", n);
6646 PCHARSV(copybuffer, 0, length, utf, outfile);
6647 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6648 }
6649 }
6650
6651 /* Test copy strings by name */
6652
6653 nptr = dat_datctl.copy_names;
6654 for (;;)
6655 {
6656 int rc;
6657 int groupnumber;
6658 PCRE2_SIZE length, length2;
6659 uint32_t copybuffer[256];
6660 int namelen = strlen((const char *)nptr);
6661 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6662 PCRE2_SIZE cnl = namelen;
6663 #endif
6664 if (namelen == 0) break;
6665
6666 #ifdef SUPPORT_PCRE2_8
6667 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6668 #endif
6669 #ifdef SUPPORT_PCRE2_16
6670 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6671 #endif
6672 #ifdef SUPPORT_PCRE2_32
6673 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6674 #endif
6675
6676 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6677 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6678 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6679
6680 length = sizeof(copybuffer)/code_unit_size;
6681 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6682 if (rc < 0)
6683 {
6684 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6685 if (!print_error_message(rc, "", "\n")) return FALSE;
6686 }
6687 else
6688 {
6689 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6690 if (rc < 0)
6691 {
6692 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6693 if (!print_error_message(rc, "", "\n")) return FALSE;
6694 }
6695 else if (length2 != length)
6696 {
6697 fprintf(outfile, "Mismatched substring lengths: %"
6698 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6699 }
6700 fprintf(outfile, " C ");
6701 PCHARSV(copybuffer, 0, length, utf, outfile);
6702 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6703 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6704 else fprintf(outfile, " (non-unique)\n");
6705 }
6706 nptr += namelen + 1;
6707 }
6708
6709 /* Test get strings by number */
6710
6711 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6712 {
6713 int rc;
6714 PCRE2_SIZE length;
6715 void *gotbuffer;
6716 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6717 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6718 if (rc < 0)
6719 {
6720 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6721 if (!print_error_message(rc, "", "\n")) return FALSE;
6722 }
6723 else
6724 {
6725 fprintf(outfile, "%2dG ", n);
6726 PCHARSV(gotbuffer, 0, length, utf, outfile);
6727 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6728 PCRE2_SUBSTRING_FREE(gotbuffer);
6729 }
6730 }
6731
6732 /* Test get strings by name */
6733
6734 nptr = dat_datctl.get_names;
6735 for (;;)
6736 {
6737 PCRE2_SIZE length;
6738 void *gotbuffer;
6739 int rc;
6740 int groupnumber;
6741 int namelen = strlen((const char *)nptr);
6742 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6743 PCRE2_SIZE cnl = namelen;
6744 #endif
6745 if (namelen == 0) break;
6746
6747 #ifdef SUPPORT_PCRE2_8
6748 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6749 #endif
6750 #ifdef SUPPORT_PCRE2_16
6751 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6752 #endif
6753 #ifdef SUPPORT_PCRE2_32
6754 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6755 #endif
6756
6757 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6758 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6759 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6760
6761 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6762 if (rc < 0)
6763 {
6764 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6765 if (!print_error_message(rc, "", "\n")) return FALSE;
6766 }
6767 else
6768 {
6769 fprintf(outfile, " G ");
6770 PCHARSV(gotbuffer, 0, length, utf, outfile);
6771 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6772 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6773 else fprintf(outfile, " (non-unique)\n");
6774 PCRE2_SUBSTRING_FREE(gotbuffer);
6775 }
6776 nptr += namelen + 1;
6777 }
6778
6779 /* Test getting the complete list of captured strings. */
6780
6781 if ((dat_datctl.control & CTL_GETALL) != 0)
6782 {
6783 int rc;
6784 void **stringlist;
6785 PCRE2_SIZE *lengths;
6786 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6787 if (rc < 0)
6788 {
6789 fprintf(outfile, "get substring list failed (%d): ", rc);
6790 if (!print_error_message(rc, "", "\n")) return FALSE;
6791 }
6792 else
6793 {
6794 for (i = 0; i < capcount; i++)
6795 {
6796 fprintf(outfile, "%2dL ", i);
6797 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6798 putc('\n', outfile);
6799 }
6800 if (stringlist[i] != NULL)
6801 fprintf(outfile, "string list not terminated by NULL\n");
6802 PCRE2_SUBSTRING_LIST_FREE(stringlist);
6803 }
6804 }
6805
6806 return TRUE;
6807 }
6808
6809
6810
6811 /*************************************************
6812 * Show an entire ovector *
6813 *************************************************/
6814
6815 /* This function is called after partial matching or match failure, when the
6816 "allvector" modifier is set. It is a means of checking the contents of the
6817 entire ovector, to ensure no modification of fields that should be unchanged.
6818
6819 Arguments:
6820 ovector points to the ovector
6821 oveccount number of pairs
6822
6823 Returns: nothing
6824 */
6825
6826 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6827 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6828 {
6829 uint32_t i;
6830 for (i = 0; i < 2*oveccount; i += 2)
6831 {
6832 PCRE2_SIZE start = ovector[i];
6833 PCRE2_SIZE end = ovector[i+1];
6834
6835 fprintf(outfile, "%2d: ", i/2);
6836 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6837 fprintf(outfile, "<unset>\n");
6838 else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6839 fprintf(outfile, "<unchanged>\n");
6840 else
6841 fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6842 (unsigned long int)end);
6843 }
6844 }
6845
6846
6847 /*************************************************
6848 * Process a data line *
6849 *************************************************/
6850
6851 /* The line is in buffer; it will not be empty.
6852
6853 Arguments: none
6854
6855 Returns: PR_OK continue processing next line
6856 PR_SKIP skip to a blank line
6857 PR_ABEND abort the pcre2test run
6858 */
6859
6860 static int
process_data(void)6861 process_data(void)
6862 {
6863 PCRE2_SIZE len, ulen, arg_ulen;
6864 uint32_t gmatched;
6865 uint32_t c, k;
6866 uint32_t g_notempty = 0;
6867 uint8_t *p, *pp, *start_rep;
6868 size_t needlen;
6869 void *use_dat_context;
6870 BOOL utf;
6871 BOOL subject_literal;
6872
6873 PCRE2_SIZE *ovector;
6874 PCRE2_SIZE ovecsave[3];
6875 uint32_t oveccount;
6876
6877 #ifdef SUPPORT_PCRE2_8
6878 uint8_t *q8 = NULL;
6879 #endif
6880 #ifdef SUPPORT_PCRE2_16
6881 uint16_t *q16 = NULL;
6882 #endif
6883 #ifdef SUPPORT_PCRE2_32
6884 uint32_t *q32 = NULL;
6885 #endif
6886
6887 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6888
6889 /* Copy the default context and data control blocks to the active ones. Then
6890 copy from the pattern the controls that can be set in either the pattern or the
6891 data. This allows them to be overridden in the data line. We do not do this for
6892 options because those that are common apply separately to compiling and
6893 matching. */
6894
6895 DATCTXCPY(dat_context, default_dat_context);
6896 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6897 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6898 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6899 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6900 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6901
6902 if (dat_datctl.substitute_skip == 0)
6903 dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6904 if (dat_datctl.substitute_stop == 0)
6905 dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6906
6907 /* Initialize for scanning the data line. */
6908
6909 #ifdef SUPPORT_PCRE2_8
6910 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6911 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6912 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6913 #else
6914 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6915 #endif
6916
6917 start_rep = NULL;
6918 len = strlen((const char *)buffer);
6919 while (len > 0 && isspace(buffer[len-1])) len--;
6920 buffer[len] = 0;
6921 p = buffer;
6922 while (isspace(*p)) p++;
6923
6924 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6925 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6926
6927 if (utf)
6928 {
6929 uint8_t *q;
6930 uint32_t cc;
6931 int n = 1;
6932 uint8_t *q_end = p + len;
6933
6934 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, q_end, &cc);
6935 if (n <= 0)
6936 {
6937 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6938 "in UTF mode\n");
6939 return PR_OK;
6940 }
6941 }
6942
6943 #ifdef SUPPORT_VALGRIND
6944 /* Mark the dbuffer as addressable but undefined again. */
6945 if (dbuffer != NULL)
6946 {
6947 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6948 }
6949 #endif
6950
6951 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6952 the number of code units that will be needed (though the buffer may have to be
6953 extended if replication is involved). */
6954
6955 needlen = (len+1) * code_unit_size;
6956 if (dbuffer == NULL || needlen >= dbuffer_size)
6957 {
6958 while (needlen >= dbuffer_size)
6959 {
6960 if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2;
6961 else dbuffer_size = needlen + 1;
6962 }
6963 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6964 if (dbuffer == NULL)
6965 {
6966 fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n", dbuffer_size);
6967 exit(1);
6968 }
6969 }
6970 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
6971
6972 /* Scan the data line, interpreting data escapes, and put the result into a
6973 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6974 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6975 */
6976
6977 while ((c = *p++) != 0)
6978 {
6979 int32_t i = 0;
6980 size_t replen;
6981
6982 /* ] may mark the end of a replicated sequence */
6983
6984 if (c == ']' && start_rep != NULL)
6985 {
6986 PCRE2_SIZE d;
6987 long li;
6988 char *endptr;
6989
6990 if (*p++ != '{')
6991 {
6992 fprintf(outfile, "** Expected '{' after \\[....]\n");
6993 return PR_OK;
6994 }
6995
6996 li = strtol((const char *)p, &endptr, 10);
6997 if (S32OVERFLOW(li))
6998 {
6999 fprintf(outfile, "** Repeat count too large\n");
7000 return PR_OK;
7001 }
7002
7003 p = (uint8_t *)endptr;
7004 if (*p++ != '}')
7005 {
7006 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
7007 return PR_OK;
7008 }
7009
7010 i = (int32_t)li;
7011 if (i-- <= 0)
7012 {
7013 fprintf(outfile, "** Zero or negative repeat not allowed\n");
7014 return PR_OK;
7015 }
7016
7017 replen = CAST8VAR(q) - start_rep;
7018 if (PRIV(ckd_smul)(&d, replen, i))
7019 {
7020 fprintf(outfile, "** Expanded content too large\n");
7021 return PR_OK;
7022 }
7023 needlen += d;
7024
7025 if (needlen >= dbuffer_size)
7026 {
7027 size_t qoffset = CAST8VAR(q) - dbuffer;
7028 size_t rep_offset = start_rep - dbuffer;
7029 while (needlen >= dbuffer_size)
7030 {
7031 if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2;
7032 else dbuffer_size = needlen + 1;
7033 }
7034 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
7035 if (dbuffer == NULL)
7036 {
7037 fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n",
7038 dbuffer_size);
7039 exit(1);
7040 }
7041 SETCASTPTR(q, dbuffer + qoffset);
7042 start_rep = dbuffer + rep_offset;
7043 }
7044
7045 while (i-- > 0)
7046 {
7047 memcpy(CAST8VAR(q), start_rep, replen);
7048 SETPLUS(q, replen/code_unit_size);
7049 }
7050
7051 start_rep = NULL;
7052 continue;
7053 }
7054
7055 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
7056 set, do the fudge for setting the top bit. */
7057
7058 if (c != '\\' || subject_literal)
7059 {
7060 uint32_t topbit = 0;
7061 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
7062 {
7063 topbit = 0x80000000;
7064 c = *p++;
7065 }
7066 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
7067 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
7068 c |= topbit;
7069 }
7070
7071 /* Handle backslash escapes */
7072
7073 else switch ((c = *p++))
7074 {
7075 case '\\': break;
7076 case 'a': c = CHAR_BEL; break;
7077 case 'b': c = '\b'; break;
7078 case 'e': c = CHAR_ESC; break;
7079 case 'f': c = '\f'; break;
7080 case 'n': c = '\n'; break;
7081 case 'r': c = '\r'; break;
7082 case 't': c = '\t'; break;
7083 case 'v': c = '\v'; break;
7084
7085 case '0': case '1': case '2': case '3':
7086 case '4': case '5': case '6': case '7':
7087 c -= '0';
7088 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
7089 c = c * 8 + *p++ - '0';
7090 break;
7091
7092 case 'o':
7093 if (*p == '{')
7094 {
7095 uint8_t *pt = p;
7096 c = 0;
7097 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
7098 {
7099 if (++i == 12)
7100 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
7101 "using only the first twelve.\n");
7102 else c = c * 8 + *pt - '0';
7103 }
7104 if (*pt == '}') p = pt + 1;
7105 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
7106 }
7107 break;
7108
7109 case 'x':
7110 if (*p == '{')
7111 {
7112 uint8_t *pt = p;
7113 c = 0;
7114
7115 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
7116 when isxdigit() is a macro that refers to its argument more than
7117 once. This is banned by the C Standard, but apparently happens in at
7118 least one MacOS environment. */
7119
7120 for (pt++; isxdigit(*pt); pt++)
7121 {
7122 if (++i == 9)
7123 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
7124 "using only the first eight.\n");
7125 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
7126 }
7127 if (*pt == '}')
7128 {
7129 p = pt + 1;
7130 break;
7131 }
7132 /* Not correct form for \x{...}; fall through */
7133 }
7134
7135 /* \x without {} always defines just one byte in 8-bit mode. This
7136 allows UTF-8 characters to be constructed byte by byte, and also allows
7137 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
7138 Otherwise, pass it down as data. */
7139
7140 c = 0;
7141 while (i++ < 2 && isxdigit(*p))
7142 {
7143 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
7144 p++;
7145 }
7146 #if defined SUPPORT_PCRE2_8
7147 if (utf && (test_mode == PCRE8_MODE))
7148 {
7149 *q8++ = c;
7150 continue;
7151 }
7152 #endif
7153 break;
7154
7155 case 0: /* \ followed by EOF allows for an empty line */
7156 p--;
7157 continue;
7158
7159 case '=': /* \= terminates the data, starts modifiers */
7160 goto ENDSTRING;
7161
7162 case '[': /* \[ introduces a replicated character sequence */
7163 if (start_rep != NULL)
7164 {
7165 fprintf(outfile, "** Nested replication is not supported\n");
7166 return PR_OK;
7167 }
7168 start_rep = CAST8VAR(q);
7169 continue;
7170
7171 default:
7172 if (isalnum(c))
7173 {
7174 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
7175 return PR_OK;
7176 }
7177 }
7178
7179 /* We now have a character value in c that may be greater than 255.
7180 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
7181 than 127 in UTF mode must have come from \x{...} or octal constructs
7182 because values from \x.. get this far only in non-UTF mode. */
7183
7184 #ifdef SUPPORT_PCRE2_8
7185 if (test_mode == PCRE8_MODE)
7186 {
7187 if (utf)
7188 {
7189 if (c > 0x7fffffff)
7190 {
7191 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
7192 "and so cannot be converted to UTF-8\n", c);
7193 return PR_OK;
7194 }
7195 q8 += ord2utf8(c, q8);
7196 }
7197 else
7198 {
7199 if (c > 0xffu)
7200 {
7201 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
7202 "and UTF-8 mode is not enabled.\n", c);
7203 fprintf(outfile, "** Truncation will probably give the wrong "
7204 "result.\n");
7205 }
7206 *q8++ = (uint8_t)c;
7207 }
7208 }
7209 #endif
7210 #ifdef SUPPORT_PCRE2_16
7211 if (test_mode == PCRE16_MODE)
7212 {
7213 if (utf)
7214 {
7215 if (c > 0x10ffffu)
7216 {
7217 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
7218 "0x10ffff and so cannot be converted to UTF-16\n", c);
7219 return PR_OK;
7220 }
7221 else if (c >= 0x10000u)
7222 {
7223 c-= 0x10000u;
7224 *q16++ = 0xD800 | (c >> 10);
7225 *q16++ = 0xDC00 | (c & 0x3ff);
7226 }
7227 else
7228 *q16++ = c;
7229 }
7230 else
7231 {
7232 if (c > 0xffffu)
7233 {
7234 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
7235 "and UTF-16 mode is not enabled.\n", c);
7236 fprintf(outfile, "** Truncation will probably give the wrong "
7237 "result.\n");
7238 }
7239
7240 *q16++ = (uint16_t)c;
7241 }
7242 }
7243 #endif
7244 #ifdef SUPPORT_PCRE2_32
7245 if (test_mode == PCRE32_MODE)
7246 {
7247 *q32++ = c;
7248 }
7249 #endif
7250 }
7251
7252 ENDSTRING:
7253 SET(*q, 0);
7254 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
7255 ulen = len/code_unit_size; /* Length in code units */
7256 arg_ulen = ulen; /* Value to use in match arg */
7257
7258 /* If the string was terminated by \= we must now interpret modifiers. */
7259
7260 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7261 return PR_OK;
7262
7263 /* Setting substitute_{skip,fail} implies a substitute callout. */
7264
7265 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7266 dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7267
7268 /* Check for mutually exclusive modifiers. At present, these are all in the
7269 first control word. */
7270
7271 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7272 {
7273 c = dat_datctl.control & exclusive_dat_controls[k];
7274 if (c != 0 && c != (c & (~c+1)))
7275 {
7276 show_controls(c, 0, "** Not allowed together:");
7277 fprintf(outfile, "\n");
7278 return PR_OK;
7279 }
7280 }
7281
7282 if (pat_patctl.replacement[0] != 0)
7283 {
7284 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7285 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7286 {
7287 fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7288 return PR_OK;
7289 }
7290
7291 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7292 fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7293 }
7294
7295 /* Warn for modifiers that are ignored for DFA. */
7296
7297 if ((dat_datctl.control & CTL_DFA) != 0)
7298 {
7299 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7300 fprintf(outfile, "** Ignored for DFA matching: allcaptures\n");
7301 if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0)
7302 fprintf(outfile, "** Ignored for DFA matching: heapframes_size\n");
7303 }
7304
7305 /* We now have the subject in dbuffer, with len containing the byte length, and
7306 ulen containing the code unit length, with a copy in arg_ulen for use in match
7307 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7308 zero_terminate modifier is present).
7309
7310 Move the data to the end of the buffer so that a read over the end can be
7311 caught by valgrind or other means. If we have explicit valgrind support, mark
7312 the unused start of the buffer unaddressable. If we are using the POSIX
7313 interface, or testing zero-termination, we must include the terminating zero in
7314 the usable data. */
7315
7316 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7317 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7318 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7319 #ifdef SUPPORT_VALGRIND
7320 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7321 #endif
7322
7323 /* Now pp points to the subject string, but if null_subject was specified, set
7324 it to NULL to test PCRE2's behaviour. */
7325
7326 if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL;
7327
7328 /* POSIX matching is only possible in 8-bit mode, and it does not support
7329 timing or other fancy features. Some were checked at compile time, but we need
7330 to check the match-time settings here. */
7331
7332 #ifdef SUPPORT_PCRE2_8
7333 if ((pat_patctl.control & CTL_POSIX) != 0)
7334 {
7335 int rc;
7336 int eflags = 0;
7337 regmatch_t *pmatch = NULL;
7338 const char *msg = "** Ignored with POSIX interface:";
7339
7340 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7341 prmsg(&msg, "callout_error");
7342 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7343 prmsg(&msg, "callout_fail");
7344 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7345 prmsg(&msg, "copy");
7346 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7347 prmsg(&msg, "get");
7348 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7349 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7350
7351 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7352 {
7353 fprintf(outfile, "%s", msg);
7354 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7355 msg = "";
7356 }
7357
7358 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7359 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7360 {
7361 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7362 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7363 msg = "";
7364 }
7365
7366 if (msg[0] == 0) fprintf(outfile, "\n");
7367
7368 if (dat_datctl.oveccount > 0)
7369 {
7370 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7371 if (pmatch == NULL)
7372 {
7373 fprintf(outfile, "** Failed to get memory for recording matching "
7374 "information (size set = %du)\n", dat_datctl.oveccount);
7375 return PR_OK;
7376 }
7377 }
7378
7379 if (dat_datctl.startend[0] != CFORE_UNSET)
7380 {
7381 pmatch[0].rm_so = dat_datctl.startend[0];
7382 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7383 dat_datctl.startend[1] : len;
7384 eflags |= REG_STARTEND;
7385 }
7386
7387 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7388 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7389 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7390
7391 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7392 if (rc != 0)
7393 {
7394 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7395 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7396 }
7397 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7398 fprintf(outfile, "Matched with REG_NOSUB\n");
7399 else if (dat_datctl.oveccount == 0)
7400 fprintf(outfile, "Matched without capture\n");
7401 else
7402 {
7403 size_t i, j;
7404 size_t last_printed = (size_t)dat_datctl.oveccount;
7405 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7406 {
7407 if (pmatch[i].rm_so >= 0)
7408 {
7409 PCRE2_SIZE start = pmatch[i].rm_so;
7410 PCRE2_SIZE end = pmatch[i].rm_eo;
7411 for (j = last_printed + 1; j < i; j++)
7412 fprintf(outfile, "%2d: <unset>\n", (int)j);
7413 last_printed = i;
7414 if (start > end)
7415 {
7416 start = pmatch[i].rm_eo;
7417 end = pmatch[i].rm_so;
7418 fprintf(outfile, "Start of matched string is beyond its end - "
7419 "displaying from end to start.\n");
7420 }
7421 fprintf(outfile, "%2d: ", (int)i);
7422 PCHARSV(pp, start, end - start, utf, outfile);
7423 fprintf(outfile, "\n");
7424
7425 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7426 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7427 {
7428 fprintf(outfile, "%2d+ ", (int)i);
7429 /* Note: don't use the start/end variables here because we want to
7430 show the text from what is reported as the end. */
7431 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7432 fprintf(outfile, "\n"); }
7433 }
7434 }
7435 }
7436 free(pmatch);
7437 return PR_OK;
7438 }
7439 #endif /* SUPPORT_PCRE2_8 */
7440
7441 /* Handle matching via the native interface. Check for consistency of
7442 modifiers. */
7443
7444 if (dat_datctl.startend[0] != CFORE_UNSET)
7445 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7446
7447 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7448 matching, even if the JIT compiler was used. */
7449
7450 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7451 FLD(compiled_code, executable_jit) != NULL)
7452 {
7453 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7454 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7455 }
7456
7457 /* Handle passing the subject as zero-terminated. */
7458
7459 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7460 arg_ulen = PCRE2_ZERO_TERMINATED;
7461
7462 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7463 NULL context. */
7464
7465 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7466 NULL : PTR(dat_context);
7467
7468 /* Enable display of malloc/free if wanted. We can do this only if either the
7469 pattern or the subject is processed with a context. */
7470
7471 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7472
7473 if (show_memory &&
7474 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7475 fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7476 "context: ignored\n");
7477
7478 /* Create and assign a JIT stack if requested. */
7479
7480 if (dat_datctl.jitstack != 0)
7481 {
7482 if (dat_datctl.jitstack != jit_stack_size)
7483 {
7484 PCRE2_JIT_STACK_FREE(jit_stack);
7485 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7486 jit_stack_size = dat_datctl.jitstack;
7487 }
7488 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7489 }
7490
7491 /* Or de-assign */
7492
7493 else if (jit_stack != NULL)
7494 {
7495 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7496 PCRE2_JIT_STACK_FREE(jit_stack);
7497 jit_stack = NULL;
7498 jit_stack_size = 0;
7499 }
7500
7501 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7502 if we want to verify that JIT was actually used. */
7503
7504 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7505 {
7506 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7507 }
7508
7509 /* Adjust match_data according to size of offsets required. A size of zero
7510 causes a new match data block to be obtained that exactly fits the pattern. */
7511
7512 if (dat_datctl.oveccount == 0)
7513 {
7514 PCRE2_MATCH_DATA_FREE(match_data);
7515 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code,
7516 general_context);
7517 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7518 }
7519 else if (dat_datctl.oveccount <= max_oveccount)
7520 {
7521 SETFLD(match_data, oveccount, dat_datctl.oveccount);
7522 }
7523 else
7524 {
7525 max_oveccount = dat_datctl.oveccount;
7526 PCRE2_MATCH_DATA_FREE(match_data);
7527 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, general_context);
7528 }
7529
7530 if (CASTVAR(void *, match_data) == NULL)
7531 {
7532 fprintf(outfile, "** Failed to get memory for recording matching "
7533 "information (size requested: %d)\n", dat_datctl.oveccount);
7534 max_oveccount = 0;
7535 return PR_OK;
7536 }
7537
7538 ovector = FLD(match_data, ovector);
7539 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7540
7541 /* Replacement processing is ignored for DFA matching. */
7542
7543 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7544 {
7545 fprintf(outfile, "** Ignored for DFA matching: replace\n");
7546 dat_datctl.replacement[0] = 0;
7547 }
7548
7549 /* If a replacement string is provided, call pcre2_substitute() instead of or
7550 after one of the matching functions. First we have to convert the replacement
7551 string to the appropriate width. */
7552
7553 if (dat_datctl.replacement[0] != 0)
7554 {
7555 int rc;
7556 uint8_t *pr;
7557 uint8_t rbuffer[REPLACE_BUFFSIZE];
7558 uint8_t nbuffer[REPLACE_BUFFSIZE];
7559 uint8_t *rbptr;
7560 uint32_t xoptions;
7561 uint32_t emoption; /* External match option */
7562 PCRE2_SIZE j, rlen, nsize, erroroffset;
7563 BOOL badutf = FALSE;
7564
7565 #ifdef SUPPORT_PCRE2_8
7566 uint8_t *r8 = NULL;
7567 #endif
7568 #ifdef SUPPORT_PCRE2_16
7569 uint16_t *r16 = NULL;
7570 #endif
7571 #ifdef SUPPORT_PCRE2_32
7572 uint32_t *r32 = NULL;
7573 #endif
7574
7575 /* Fill the ovector with junk to detect elements that do not get set
7576 when they should be (relevant only when "allvector" is specified). */
7577
7578 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7579
7580 if (timeitm)
7581 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7582
7583 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7584 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7585
7586 /* Check for a test that does substitution after an initial external match.
7587 If this is set, we run the external match, but leave the interpretation of
7588 its output to pcre2_substitute(). */
7589
7590 emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7591 PCRE2_SUBSTITUTE_MATCHED;
7592
7593 if (emoption != 0)
7594 {
7595 if ((pat_patctl.control & CTL_JITFAST) != 0)
7596 {
7597 PCRE2_JIT_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7598 dat_datctl.options, match_data, use_dat_context);
7599 }
7600 else
7601 {
7602 PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7603 dat_datctl.options, match_data, use_dat_context);
7604 }
7605 }
7606
7607 xoptions = emoption |
7608 (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7609 PCRE2_SUBSTITUTE_GLOBAL) |
7610 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7611 PCRE2_SUBSTITUTE_EXTENDED) |
7612 (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7613 PCRE2_SUBSTITUTE_LITERAL) |
7614 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7615 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7616 (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7617 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7618 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7619 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7620 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7621 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7622
7623 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
7624 pr = dat_datctl.replacement;
7625
7626 /* If the replacement starts with '[<number>]' we interpret that as length
7627 value for the replacement buffer. */
7628
7629 nsize = REPLACE_BUFFSIZE/code_unit_size;
7630 if (*pr == '[')
7631 {
7632 PCRE2_SIZE n = 0;
7633 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7634 if (*pr++ != ']')
7635 {
7636 fprintf(outfile, "Bad buffer size in replacement string\n");
7637 return PR_OK;
7638 }
7639 if (n > nsize)
7640 {
7641 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7642 "large (max %" SIZ_FORM ")\n", n, nsize);
7643 return PR_OK;
7644 }
7645 nsize = n;
7646 }
7647
7648 /* Now copy the replacement string to a buffer of the appropriate width. No
7649 escape processing is done for replacements. In UTF mode, check for an invalid
7650 UTF-8 input string, and if it is invalid, just copy its code units without
7651 UTF interpretation. This provides a means of checking that an invalid string
7652 is detected. Otherwise, UTF-8 can be used to include wide characters in a
7653 replacement. */
7654
7655 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7656
7657 /* Not UTF or invalid UTF-8: just copy the code units. */
7658
7659 if (!utf || badutf)
7660 {
7661 while ((c = *pr++) != 0)
7662 {
7663 #ifdef SUPPORT_PCRE2_8
7664 if (test_mode == PCRE8_MODE) *r8++ = c;
7665 #endif
7666 #ifdef SUPPORT_PCRE2_16
7667 if (test_mode == PCRE16_MODE) *r16++ = c;
7668 #endif
7669 #ifdef SUPPORT_PCRE2_32
7670 if (test_mode == PCRE32_MODE) *r32++ = c;
7671 #endif
7672 }
7673 }
7674
7675 /* Valid UTF-8 replacement string */
7676
7677 else while ((c = *pr++) != 0)
7678 {
7679 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7680
7681 #ifdef SUPPORT_PCRE2_8
7682 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7683 #endif
7684
7685 #ifdef SUPPORT_PCRE2_16
7686 if (test_mode == PCRE16_MODE)
7687 {
7688 if (c >= 0x10000u)
7689 {
7690 c-= 0x10000u;
7691 *r16++ = 0xD800 | (c >> 10);
7692 *r16++ = 0xDC00 | (c & 0x3ff);
7693 }
7694 else *r16++ = c;
7695 }
7696 #endif
7697
7698 #ifdef SUPPORT_PCRE2_32
7699 if (test_mode == PCRE32_MODE) *r32++ = c;
7700 #endif
7701 }
7702
7703 SET(*r, 0);
7704 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7705 rlen = PCRE2_ZERO_TERMINATED;
7706 else
7707 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7708
7709 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7710 {
7711 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7712 }
7713 else
7714 {
7715 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
7716 }
7717
7718 /* There is a special option to set the replacement to NULL in order to test
7719 that case. */
7720
7721 rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
7722
7723 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7724 dat_datctl.options|xoptions, match_data, use_dat_context,
7725 rbptr, rlen, nbuffer, &nsize);
7726
7727 if (rc < 0)
7728 {
7729 fprintf(outfile, "Failed: error %d", rc);
7730 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7731 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7732 fprintf(outfile, ": ");
7733 if (!print_error_message(rc, "", "")) return PR_ABEND;
7734 if (rc == PCRE2_ERROR_NOMEMORY &&
7735 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7736 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7737 }
7738 else
7739 {
7740 fprintf(outfile, "%2d: ", rc);
7741 PCHARSV(nbuffer, 0, nsize, utf, outfile);
7742 }
7743
7744 fprintf(outfile, "\n");
7745 show_memory = FALSE;
7746
7747 /* Show final ovector contents and resulting heapframe size if requested. */
7748
7749 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7750 show_ovector(ovector, oveccount);
7751
7752 if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 &&
7753 (dat_datctl.control & CTL_DFA) == 0)
7754 show_heapframes_size();
7755
7756 return PR_OK;
7757 } /* End of substitution handling */
7758
7759 /* When a replacement string is not provided, run a loop for global matching
7760 with one of the basic matching functions. For altglobal (or first time round
7761 the loop), set an "unset" value for the previous match info. */
7762
7763 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7764
7765 for (gmatched = 0;; gmatched++)
7766 {
7767 PCRE2_SIZE j;
7768 int capcount;
7769
7770 /* Fill the ovector with junk to detect elements that do not get set
7771 when they should be. */
7772
7773 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7774
7775 /* When matching is via pcre2_match(), we will detect the use of JIT via the
7776 stack callback function. */
7777
7778 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7779
7780 /* Do timing if required. */
7781
7782 if (timeitm > 0)
7783 {
7784 int i;
7785 clock_t start_time, time_taken;
7786
7787 if ((dat_datctl.control & CTL_DFA) != 0)
7788 {
7789 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7790 {
7791 fprintf(outfile, "Timing DFA restarts is not supported\n");
7792 return PR_OK;
7793 }
7794 if (dfa_workspace == NULL)
7795 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7796 start_time = clock();
7797 for (i = 0; i < timeitm; i++)
7798 {
7799 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7800 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7801 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7802 }
7803 }
7804
7805 else if ((pat_patctl.control & CTL_JITFAST) != 0)
7806 {
7807 start_time = clock();
7808 for (i = 0; i < timeitm; i++)
7809 {
7810 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7811 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7812 use_dat_context);
7813 }
7814 }
7815
7816 else
7817 {
7818 start_time = clock();
7819 for (i = 0; i < timeitm; i++)
7820 {
7821 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7822 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7823 use_dat_context);
7824 }
7825 }
7826 total_match_time += (time_taken = clock() - start_time);
7827 fprintf(outfile, "Match time %7.4f microseconds\n",
7828 ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeitm);
7829 }
7830
7831 /* Find the heap, match and depth limits if requested. The depth and heap
7832 limits are not relevant for JIT. The return from check_match_limit() is the
7833 return from the final call to pcre2_match() or pcre2_dfa_match(). */
7834
7835 if ((dat_datctl.control & (CTL_FINDLIMITS|CTL_FINDLIMITS_NOHEAP)) != 0)
7836 {
7837 capcount = 0; /* This stops compiler warnings */
7838
7839 if ((dat_datctl.control & CTL_FINDLIMITS_NOHEAP) == 0 &&
7840 (FLD(compiled_code, executable_jit) == NULL ||
7841 (dat_datctl.options & PCRE2_NO_JIT) != 0))
7842 {
7843 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7844 }
7845
7846 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7847 "match");
7848
7849 if (FLD(compiled_code, executable_jit) == NULL ||
7850 (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7851 (dat_datctl.control & CTL_DFA) != 0)
7852 {
7853 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7854 "depth");
7855 }
7856
7857 if (capcount == 0)
7858 {
7859 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7860 capcount = dat_datctl.oveccount;
7861 }
7862 }
7863
7864 /* Otherwise just run a single match, setting up a callout if required (the
7865 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7866
7867 else
7868 {
7869 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7870 {
7871 PCRE2_SET_CALLOUT(dat_context, callout_function,
7872 (void *)(&dat_datctl.callout_data));
7873 first_callout = TRUE;
7874 last_callout_mark = NULL;
7875 callout_count = 0;
7876 }
7877 else
7878 {
7879 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
7880 }
7881
7882 /* Run a single DFA or NFA match. */
7883
7884 if ((dat_datctl.control & CTL_DFA) != 0)
7885 {
7886 if (dfa_workspace == NULL)
7887 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7888 if (dfa_matched++ == 0)
7889 dfa_workspace[0] = -1; /* To catch bad restart */
7890 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7891 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7892 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7893 if (capcount == 0)
7894 {
7895 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7896 capcount = dat_datctl.oveccount;
7897 }
7898 }
7899 else
7900 {
7901 if ((pat_patctl.control & CTL_JITFAST) != 0)
7902 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7903 dat_datctl.options | g_notempty, match_data, use_dat_context);
7904 else
7905 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7906 dat_datctl.options | g_notempty, match_data, use_dat_context);
7907 if (capcount == 0)
7908 {
7909 fprintf(outfile, "Matched, but too many substrings\n");
7910 capcount = dat_datctl.oveccount;
7911 }
7912 }
7913 }
7914
7915 /* The result of the match is now in capcount. First handle a successful
7916 match. If pp was forced to be NULL (to test NULL handling) it will have been
7917 treated as an empty string if the length was zero. So re-create that for
7918 outputting. */
7919
7920 if (capcount >= 0)
7921 {
7922 int i;
7923
7924 if (pp == NULL) pp = (uint8_t *)"";
7925
7926 if (capcount > (int)oveccount) /* Check for lunatic return value */
7927 {
7928 fprintf(outfile,
7929 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7930 capcount, oveccount);
7931 capcount = oveccount;
7932 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7933 {
7934 fprintf(outfile, "** Global loop abandoned\n");
7935 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7936 }
7937 }
7938
7939 /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7940 should be, but not for fast JIT, where it isn't supported. */
7941
7942 if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7943 (pat_patctl.control & CTL_JITFAST) == 0)
7944 {
7945 if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7946 fprintf(outfile,
7947 "** PCRE2 error: flag not set after copy_matched_subject\n");
7948
7949 if (CASTFLD(void *, match_data, subject) == pp)
7950 fprintf(outfile,
7951 "** PCRE2 error: copy_matched_subject has not copied\n");
7952
7953 if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7954 fprintf(outfile,
7955 "** PCRE2 error: copy_matched_subject mismatch\n");
7956 }
7957
7958 /* If this is not the first time round a global loop, check that the
7959 returned string has changed. If it has not, check for an empty string match
7960 at different starting offset from the previous match. This is a failed test
7961 retry for null-matching patterns that don't match at their starting offset,
7962 for example /(?<=\G.)/. A repeated match at the same point is not such a
7963 pattern, and must be discarded, and we then proceed to seek a non-null
7964 match at the current point. For any other repeated match, there is a bug
7965 somewhere and we must break the loop because it will go on for ever. We
7966 know that there are always at least two elements in the ovector. */
7967
7968 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7969 {
7970 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7971 {
7972 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7973 ovecsave[2] = dat_datctl.offset;
7974 continue; /* Back to the top of the loop */
7975 }
7976 fprintf(outfile,
7977 "** PCRE2 error: global repeat returned the same string as previous\n");
7978 fprintf(outfile, "** Global loop abandoned\n");
7979 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7980 }
7981
7982 /* "allcaptures" requests showing of all captures in the pattern, to check
7983 unset ones at the end. It may be set on the pattern or the data. Implement
7984 by setting capcount to the maximum. This is not relevant for DFA matching,
7985 so ignore it (warning given above). */
7986
7987 if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7988 {
7989 capcount = maxcapcount + 1; /* Allow for full match */
7990 if (capcount > (int)oveccount) capcount = oveccount;
7991 }
7992
7993 /* "allvector" request showing the entire ovector. */
7994
7995 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7996
7997 /* Output the captured substrings. Note that, for the matched string,
7998 the use of \K in an assertion can make the start later than the end. */
7999
8000 for (i = 0; i < 2*capcount; i += 2)
8001 {
8002 PCRE2_SIZE lleft, lmiddle, lright;
8003 PCRE2_SIZE start = ovector[i];
8004 PCRE2_SIZE end = ovector[i+1];
8005
8006 if (start > end)
8007 {
8008 start = ovector[i+1];
8009 end = ovector[i];
8010 fprintf(outfile, "Start of matched string is beyond its end - "
8011 "displaying from end to start.\n");
8012 }
8013
8014 fprintf(outfile, "%2d: ", i/2);
8015
8016 /* Check for an unset group */
8017
8018 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
8019 {
8020 fprintf(outfile, "<unset>\n");
8021 continue;
8022 }
8023
8024 /* Check for silly offsets, in particular, values that have not been
8025 set when they should have been. However, if we are past the end of the
8026 captures for this pattern ("allvector" causes this), or if we are DFA
8027 matching, it isn't an error if the entry is unchanged. */
8028
8029 if (start > ulen || end > ulen)
8030 {
8031 if (((dat_datctl.control & CTL_DFA) != 0 ||
8032 i >= (int)(2*maxcapcount + 2)) &&
8033 start == JUNK_OFFSET && end == JUNK_OFFSET)
8034 fprintf(outfile, "<unchanged>\n");
8035 else
8036 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
8037 (unsigned long int)start, (unsigned long int)end);
8038 continue;
8039 }
8040
8041 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
8042 JIT, it is disabled above, with a comment.) When the match is done by the
8043 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
8044 set, and if the leftmost consulted character is before the start of the
8045 match or the rightmost consulted character is past the end of the match,
8046 we want to show all consulted characters for the main matched string, and
8047 indicate which were lookarounds. */
8048
8049 if (i == 0)
8050 {
8051 BOOL showallused;
8052 PCRE2_SIZE leftchar, rightchar;
8053
8054 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
8055 {
8056 leftchar = FLD(match_data, leftchar);
8057 rightchar = FLD(match_data, rightchar);
8058 showallused = i == 0 && (leftchar < start || rightchar > end);
8059 }
8060 else showallused = FALSE;
8061
8062 if (showallused)
8063 {
8064 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
8065 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
8066 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
8067 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8068 fprintf(outfile, " (JIT)");
8069 fprintf(outfile, "\n ");
8070 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
8071 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
8072 for (j = 0; j < lright; j++) fprintf(outfile, ">");
8073 }
8074
8075 /* When a pattern contains \K, the start of match position may be
8076 different to the start of the matched string. When this is the case,
8077 show it when requested. */
8078
8079 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
8080 {
8081 PCRE2_SIZE startchar;
8082 PCRE2_GET_STARTCHAR(startchar, match_data);
8083 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
8084 PCHARSV(pp, start, end - start, utf, outfile);
8085 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8086 fprintf(outfile, " (JIT)");
8087 if (startchar != start)
8088 {
8089 fprintf(outfile, "\n ");
8090 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
8091 }
8092 }
8093
8094 /* Otherwise, just show the matched string. */
8095
8096 else
8097 {
8098 PCHARSV(pp, start, end - start, utf, outfile);
8099 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8100 fprintf(outfile, " (JIT)");
8101 }
8102 }
8103
8104 /* Not the main matched string. Just show it unadorned. */
8105
8106 else
8107 {
8108 PCHARSV(pp, start, end - start, utf, outfile);
8109 }
8110
8111 fprintf(outfile, "\n");
8112
8113 /* Note: don't use the start/end variables here because we want to
8114 show the text from what is reported as the end. */
8115
8116 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
8117 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
8118 {
8119 fprintf(outfile, "%2d+ ", i/2);
8120 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
8121 fprintf(outfile, "\n");
8122 }
8123 }
8124
8125 /* Output (*MARK) data if requested */
8126
8127 if ((dat_datctl.control & CTL_MARK) != 0 &&
8128 TESTFLD(match_data, mark, !=, NULL))
8129 {
8130 fprintf(outfile, "MK: ");
8131 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
8132 fprintf(outfile, "\n");
8133 }
8134
8135 /* Process copy/get strings */
8136
8137 if (!copy_and_get(utf, capcount)) return PR_ABEND;
8138
8139 } /* End of handling a successful match */
8140
8141 /* There was a partial match. The value of ovector[0] is the bumpalong point,
8142 that is, startchar, not any \K point that might have been passed. When JIT is
8143 not in use, "allusedtext" may be set, in which case we indicate the leftmost
8144 consulted character. */
8145
8146 else if (capcount == PCRE2_ERROR_PARTIAL)
8147 {
8148 PCRE2_SIZE leftchar;
8149 int backlength;
8150 int rubriclength = 0;
8151
8152 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
8153 {
8154 leftchar = FLD(match_data, leftchar);
8155 }
8156 else leftchar = ovector[0];
8157
8158 fprintf(outfile, "Partial match");
8159 if ((dat_datctl.control & CTL_MARK) != 0 &&
8160 TESTFLD(match_data, mark, !=, NULL))
8161 {
8162 fprintf(outfile, ", mark=");
8163 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
8164 outfile);
8165 rubriclength += 7;
8166 }
8167 fprintf(outfile, ": ");
8168 rubriclength += 15;
8169
8170 PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
8171 PCHARSV(pp, ovector[0], ovector[1] - ovector[0], utf, outfile);
8172
8173 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8174 fprintf(outfile, " (JIT)");
8175 fprintf(outfile, "\n");
8176
8177 if (backlength != 0)
8178 {
8179 int i;
8180 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
8181 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
8182 fprintf(outfile, "\n");
8183 }
8184
8185 if (ulen != ovector[1])
8186 fprintf(outfile, "** ovector[1] is not equal to the subject length: "
8187 "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
8188
8189 /* Process copy/get strings */
8190
8191 if (!copy_and_get(utf, 1)) return PR_ABEND;
8192
8193 /* "allvector" outputs the entire vector */
8194
8195 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8196 show_ovector(ovector, oveccount);
8197
8198 break; /* Out of the /g loop */
8199 } /* End of handling partial match */
8200
8201 /* Failed to match. If this is a /g or /G loop, we might previously have
8202 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
8203 If that is the case, this is not necessarily the end. We want to advance the
8204 start offset, and continue. We won't be at the end of the string - that was
8205 checked before setting g_notempty. We achieve the effect by pretending that a
8206 single character was matched.
8207
8208 Complication arises in the case when the newline convention is "any", "crlf",
8209 or "anycrlf". If the previous match was at the end of a line terminated by
8210 CRLF, an advance of one character just passes the CR, whereas we should
8211 prefer the longer newline sequence, as does the code in pcre2_match().
8212
8213 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
8214 character, not one byte. */
8215
8216 else if (g_notempty != 0) /* There was a previous null match */
8217 {
8218 uint16_t nl = FLD(compiled_code, newline_convention);
8219 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
8220 PCRE2_SIZE end_offset = start_offset + 1;
8221
8222 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
8223 nl == PCRE2_NEWLINE_ANYCRLF) &&
8224 start_offset < ulen - 1 &&
8225 CODE_UNIT(pp, start_offset) == '\r' &&
8226 CODE_UNIT(pp, end_offset) == '\n')
8227 end_offset++;
8228
8229 else if (utf && test_mode != PCRE32_MODE)
8230 {
8231 if (test_mode == PCRE8_MODE)
8232 {
8233 for (; end_offset < ulen; end_offset++)
8234 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8235 }
8236 else /* 16-bit mode */
8237 {
8238 for (; end_offset < ulen; end_offset++)
8239 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8240 }
8241 }
8242
8243 SETFLDVEC(match_data, ovector, 0, start_offset);
8244 SETFLDVEC(match_data, ovector, 1, end_offset);
8245 } /* End of handling null match in a global loop */
8246
8247 /* A "normal" match failure. There will be a negative error number in
8248 capcount. */
8249
8250 else
8251 {
8252 switch(capcount)
8253 {
8254 case PCRE2_ERROR_NOMATCH:
8255 if (gmatched == 0)
8256 {
8257 fprintf(outfile, "No match");
8258 if ((dat_datctl.control & CTL_MARK) != 0 &&
8259 TESTFLD(match_data, mark, !=, NULL))
8260 {
8261 fprintf(outfile, ", mark = ");
8262 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
8263 }
8264 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8265 fprintf(outfile, " (JIT)");
8266 fprintf(outfile, "\n");
8267
8268 /* "allvector" outputs the entire vector */
8269
8270 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8271 show_ovector(ovector, oveccount);
8272 }
8273 break;
8274
8275 case PCRE2_ERROR_BADUTFOFFSET:
8276 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
8277 break;
8278
8279 default:
8280 fprintf(outfile, "Failed: error %d: ", capcount);
8281 if (!print_error_message(capcount, "", "")) return PR_ABEND;
8282 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
8283 capcount >= PCRE2_ERROR_UTF32_ERR2)
8284 {
8285 PCRE2_SIZE startchar;
8286 PCRE2_GET_STARTCHAR(startchar, match_data);
8287 fprintf(outfile, " at offset %" SIZ_FORM, startchar);
8288 }
8289 fprintf(outfile, "\n");
8290 break;
8291 }
8292
8293 break; /* Out of the /g loop */
8294 } /* End of failed match handling */
8295
8296 /* Control reaches here in two circumstances: (a) after a match, and (b)
8297 after a non-match that immediately followed a match on an empty string when
8298 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8299 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8300 of one character. So effectively we get here only after a match. If we
8301 are not doing a global search, we are done. */
8302
8303 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8304 {
8305 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8306 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8307
8308 /* We must now set up for the next iteration of a global search. If we have
8309 matched an empty string, first check to see if we are at the end of the
8310 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8311 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8312 at the same point. If this fails it will be picked up above, where a fake
8313 match is set up so that at this point we advance to the next character.
8314
8315 However, in order to cope with patterns that never match at their starting
8316 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8317 than the starting offset. This means there will be a retry with the
8318 starting offset at the match offset. If this returns the same match again,
8319 it is picked up above and ignored, and the special action is then taken. */
8320
8321 if (match_offset == end_offset)
8322 {
8323 if (end_offset == ulen) break; /* End of subject */
8324 if (match_offset <= dat_datctl.offset)
8325 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8326 }
8327
8328 /* However, even after matching a non-empty string, there is still one
8329 tricky case. If a pattern contains \K within a lookbehind assertion at the
8330 start, the end of the matched string can be at the offset where the match
8331 started. In the case of a normal /g iteration without special action, this
8332 leads to a loop that keeps on returning the same substring. The loop would
8333 be caught above, but we really want to move on to the next match. */
8334
8335 else
8336 {
8337 g_notempty = 0; /* Set for a "normal" repeat */
8338 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8339 {
8340 PCRE2_SIZE startchar;
8341 PCRE2_GET_STARTCHAR(startchar, match_data);
8342 if (end_offset <= startchar)
8343 {
8344 if (startchar >= ulen) break; /* End of subject */
8345 end_offset = startchar + 1;
8346 if (utf && test_mode != PCRE32_MODE)
8347 {
8348 if (test_mode == PCRE8_MODE)
8349 {
8350 for (; end_offset < ulen; end_offset++)
8351 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8352 }
8353 else /* 16-bit mode */
8354 {
8355 for (; end_offset < ulen; end_offset++)
8356 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8357 }
8358 }
8359 }
8360 }
8361 }
8362
8363 /* For a normal global (/g) iteration, save the current ovector[0,1] and
8364 the starting offset so that we can check that they do change each time.
8365 Otherwise a matching bug that returns the same string causes an infinite
8366 loop. It has happened! Then update the start offset, leaving other
8367 parameters alone. */
8368
8369 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8370 {
8371 ovecsave[0] = ovector[0];
8372 ovecsave[1] = ovector[1];
8373 ovecsave[2] = dat_datctl.offset;
8374 dat_datctl.offset = end_offset;
8375 }
8376
8377 /* For altglobal, just update the pointer and length. */
8378
8379 else
8380 {
8381 pp += end_offset * code_unit_size;
8382 len -= end_offset * code_unit_size;
8383 ulen -= end_offset;
8384 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8385 }
8386 }
8387 } /* End of global loop */
8388
8389 /* All matching is done; show the resulting heapframe size if requested. */
8390
8391 if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 &&
8392 (dat_datctl.control & CTL_DFA) == 0)
8393 show_heapframes_size();
8394
8395 show_memory = FALSE;
8396 return PR_OK;
8397 }
8398
8399
8400
8401
8402 /*************************************************
8403 * Print PCRE2 version *
8404 *************************************************/
8405
8406 static void
print_version(FILE * f,BOOL include_mode)8407 print_version(FILE *f, BOOL include_mode)
8408 {
8409 char buf[16];
8410 VERSION_TYPE *vp;
8411 fprintf(f, "PCRE2 version ");
8412 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8413 if (include_mode)
8414 {
8415 sprintf(buf, "%d-bit", test_mode);
8416 fprintf(f, " (%s)", buf);
8417 }
8418 fprintf(f, "\n");
8419 }
8420
8421
8422
8423 /*************************************************
8424 * Print Unicode version *
8425 *************************************************/
8426
8427 static void
print_unicode_version(FILE * f)8428 print_unicode_version(FILE *f)
8429 {
8430 VERSION_TYPE *vp;
8431 fprintf(f, "Unicode version ");
8432 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8433 }
8434
8435
8436
8437 /*************************************************
8438 * Print JIT target *
8439 *************************************************/
8440
8441 static void
print_jit_target(FILE * f)8442 print_jit_target(FILE *f)
8443 {
8444 VERSION_TYPE *vp;
8445 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8446 }
8447
8448
8449
8450 /*************************************************
8451 * Print newline configuration *
8452 *************************************************/
8453
8454 /* Output is always to stdout.
8455
8456 Arguments:
8457 rc the return code from PCRE2_CONFIG_NEWLINE
8458 isc TRUE if called from "-C newline"
8459 Returns: nothing
8460 */
8461
8462 static void
print_newline_config(uint32_t optval,BOOL isc)8463 print_newline_config(uint32_t optval, BOOL isc)
8464 {
8465 if (!isc) printf(" Default newline sequence is ");
8466 if (optval < sizeof(newlines)/sizeof(char *))
8467 printf("%s\n", newlines[optval]);
8468 else
8469 printf("a non-standard value: %d\n", optval);
8470 }
8471
8472
8473
8474 /*************************************************
8475 * Usage function *
8476 *************************************************/
8477
8478 static void
usage(void)8479 usage(void)
8480 {
8481 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
8482 printf("Input and output default to stdin and stdout.\n");
8483 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8484 printf("If input is a terminal, readline() is used to read from it.\n");
8485 #else
8486 printf("This version of pcre2test is not linked with readline().\n");
8487 #endif
8488 printf("\nOptions:\n");
8489 #ifdef SUPPORT_PCRE2_8
8490 printf(" -8 use the 8-bit library\n");
8491 #endif
8492 #ifdef SUPPORT_PCRE2_16
8493 printf(" -16 use the 16-bit library\n");
8494 #endif
8495 #ifdef SUPPORT_PCRE2_32
8496 printf(" -32 use the 32-bit library\n");
8497 #endif
8498 printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8499 printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
8500 printf(" -b set default pattern modifier 'fullbincode'\n");
8501 printf(" -C show PCRE2 compile-time options and exit\n");
8502 printf(" -C arg show a specific compile-time option and exit with its\n");
8503 printf(" value if numeric (else 0). The arg can be:\n");
8504 printf(" backslash-C use of \\C is enabled [0, 1]\n");
8505 printf(" bsr \\R type [ANYCRLF, ANY]\n");
8506 printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
8507 printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
8508 printf(" jit just-in-time compiler supported [0, 1]\n");
8509 printf(" linksize internal link size [2, 3, 4]\n");
8510 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8511 printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
8512 printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
8513 printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
8514 printf(" unicode Unicode and UTF support enabled [0, 1]\n");
8515 printf(" -d set default pattern modifier 'debug'\n");
8516 printf(" -dfa set default subject modifier 'dfa'\n");
8517 printf(" -error <n,m,..> show messages for error numbers, then exit\n");
8518 printf(" -help show usage information\n");
8519 printf(" -i set default pattern modifier 'info'\n");
8520 printf(" -jit set default pattern modifier 'jit'\n");
8521 printf(" -jitfast set default pattern modifier 'jitfast'\n");
8522 printf(" -jitverify set default pattern modifier 'jitverify'\n");
8523 printf(" -LM list pattern and subject modifiers, then exit\n");
8524 printf(" -LP list non-script properties, then exit\n");
8525 printf(" -LS list supported scripts, then exit\n");
8526 printf(" -q quiet: do not output PCRE2 version number at start\n");
8527 printf(" -pattern <s> set default pattern modifier fields\n");
8528 printf(" -subject <s> set default subject modifier fields\n");
8529 printf(" -S <n> set stack size to <n> mebibytes\n");
8530 printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
8531 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
8532 printf(" -T same as -t, but show total times at the end\n");
8533 printf(" -TM same as -tm, but show total time at the end\n");
8534 printf(" -v|--version show PCRE2 version and exit\n");
8535 }
8536
8537
8538
8539 /*************************************************
8540 * Handle -C option *
8541 *************************************************/
8542
8543 /* This option outputs configuration options and sets an appropriate return
8544 code when asked for a single option. The code is abstracted into a separate
8545 function because of its size. Use whichever pcre2_config() function is
8546 available.
8547
8548 Argument: an option name or NULL
8549 Returns: the return code
8550 */
8551
8552 static int
c_option(const char * arg)8553 c_option(const char *arg)
8554 {
8555 uint32_t optval;
8556 unsigned int i = COPTLISTCOUNT;
8557 int yield = 0;
8558
8559 if (arg != NULL && arg[0] != CHAR_MINUS)
8560 {
8561 for (i = 0; i < COPTLISTCOUNT; i++)
8562 if (strcmp(arg, coptlist[i].name) == 0) break;
8563
8564 if (i >= COPTLISTCOUNT)
8565 {
8566 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8567 return 0;
8568 }
8569
8570 switch (coptlist[i].type)
8571 {
8572 case CONF_BSR:
8573 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8574 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8575 break;
8576
8577 case CONF_FIX:
8578 yield = coptlist[i].value;
8579 printf("%d\n", yield);
8580 break;
8581
8582 case CONF_FIZ:
8583 optval = coptlist[i].value;
8584 printf("%d\n", optval);
8585 break;
8586
8587 case CONF_INT:
8588 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8589 printf("%d\n", yield);
8590 break;
8591
8592 case CONF_NL:
8593 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8594 print_newline_config(optval, TRUE);
8595 break;
8596 }
8597
8598 /* For VMS, return the value by setting a symbol, for certain values only. This
8599 is contributed code which the PCRE2 developers have no means of testing. */
8600
8601 #ifdef __VMS
8602
8603 /* This is the original code provided by the first VMS contributor. */
8604 #ifdef NEVER
8605 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8606 {
8607 char ucname[16];
8608 strcpy(ucname, coptlist[i].name);
8609 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8610 vms_setsymbol(ucname, 0, optval);
8611 }
8612 #endif
8613
8614 /* This is the new code, provided by a second VMS contributor. */
8615
8616 if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8617 {
8618 char nam_buf[22], val_buf[4];
8619 $DESCRIPTOR(nam, nam_buf);
8620 $DESCRIPTOR(val, val_buf);
8621
8622 strcpy(nam_buf, coptlist[i].name);
8623 nam.dsc$w_length = strlen(nam_buf);
8624 sprintf(val_buf, "%d", yield);
8625 val.dsc$w_length = strlen(val_buf);
8626 lib$set_symbol(&nam, &val);
8627 }
8628 #endif /* __VMS */
8629
8630 return yield;
8631 }
8632
8633 /* No argument for -C: output all configuration information. */
8634
8635 print_version(stdout, FALSE);
8636 printf("Compiled with\n");
8637
8638 #ifdef EBCDIC
8639 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8640 #if defined NATIVE_ZOS
8641 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
8642 #endif
8643 #endif
8644
8645 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8646 if (optval & 1) printf(" 8-bit support\n");
8647 if (optval & 2) printf(" 16-bit support\n");
8648 if (optval & 4) printf(" 32-bit support\n");
8649
8650 #ifdef SUPPORT_VALGRIND
8651 printf(" Valgrind support\n");
8652 #endif
8653
8654 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8655 if (optval != 0)
8656 {
8657 printf(" UTF and UCP support (");
8658 print_unicode_version(stdout);
8659 printf(")\n");
8660 }
8661 else printf(" No Unicode support\n");
8662
8663 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8664 if (optval != 0)
8665 {
8666 printf(" Just-in-time compiler support: ");
8667 print_jit_target(stdout);
8668 printf("\n");
8669 }
8670 else
8671 {
8672 printf(" No just-in-time compiler support\n");
8673 }
8674
8675 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8676 print_newline_config(optval, FALSE);
8677 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8678 printf(" \\R matches %s\n",
8679 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8680 "all Unicode newlines");
8681 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8682 printf(" \\C is %ssupported\n", optval? "not ":"");
8683 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8684 printf(" Internal link size = %d\n", optval);
8685 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8686 printf(" Parentheses nest limit = %d\n", optval);
8687 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8688 printf(" Default heap limit = %d kibibytes\n", optval);
8689 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8690 printf(" Default match limit = %d\n", optval);
8691 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8692 printf(" Default depth limit = %d\n", optval);
8693
8694 #if defined SUPPORT_LIBREADLINE
8695 printf(" pcre2test has libreadline support\n");
8696 #elif defined SUPPORT_LIBEDIT
8697 printf(" pcre2test has libedit support\n");
8698 #else
8699 printf(" pcre2test has neither libreadline nor libedit support\n");
8700 #endif
8701
8702 return 0;
8703 }
8704
8705
8706 /*************************************************
8707 * Format one property/script list item *
8708 *************************************************/
8709
8710 #ifdef SUPPORT_UNICODE
8711 static void
format_list_item(int16_t * ff,char * buff,BOOL isscript)8712 format_list_item(int16_t *ff, char *buff, BOOL isscript)
8713 {
8714 int count;
8715 int maxi = 0;
8716 const char *maxs = "";
8717 size_t max = 0;
8718
8719 for (count = 0; ff[count] >= 0; count++) {}
8720
8721 /* Find the name to put first. For scripts, any 3-character name is chosen.
8722 For non-scripts, or if there is no 3-character name, take the longest. */
8723
8724 for (int i = 0; ff[i] >= 0; i++)
8725 {
8726 const char *s = PRIV(utt_names) + ff[i];
8727 size_t len = strlen(s);
8728 if (isscript && len == 3)
8729 {
8730 maxi = i;
8731 max = len;
8732 maxs = s;
8733 break;
8734 }
8735 else if (len > max)
8736 {
8737 max = len;
8738 maxi = i;
8739 maxs = s;
8740 }
8741 }
8742
8743 strcpy(buff, maxs);
8744 buff += max;
8745
8746 if (count > 1)
8747 {
8748 const char *sep = " (";
8749 for (int i = 0; i < count; i++)
8750 {
8751 if (i == maxi) continue;
8752 buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
8753 sep = ", ";
8754 }
8755 (void)sprintf(buff, ")");
8756 }
8757 }
8758 #endif /* SUPPORT_UNICODE */
8759
8760
8761
8762 /*************************************************
8763 * Display scripts or properties *
8764 *************************************************/
8765
8766 #define MAX_SYNONYMS 5
8767
8768 static void
display_properties(BOOL wantscripts)8769 display_properties(BOOL wantscripts)
8770 {
8771 #ifndef SUPPORT_UNICODE
8772 (void)wantscripts;
8773 printf("** This version of PCRE2 was compiled without Unicode support.\n");
8774 #else
8775
8776 uint16_t seentypes[1024];
8777 uint16_t seenvalues[1024];
8778 int seencount = 0;
8779 int16_t found[256][MAX_SYNONYMS + 1];
8780 int fc = 0;
8781 int colwidth = 40;
8782 int n = wantscripts? ucp_Script_Count : ucp_Bprop_Count;
8783
8784 for (size_t i = 0; i < PRIV(utt_size); i++)
8785 {
8786 int k;
8787 int m = 0;
8788 int16_t *fv;
8789 const ucp_type_table *t = PRIV(utt) + i;
8790 unsigned int value = t->value;
8791
8792 if (wantscripts)
8793 {
8794 if (t->type != PT_SC && t->type != PT_SCX) continue;
8795 }
8796 else
8797 {
8798 if (t->type != PT_BOOL) continue;
8799 }
8800
8801 for (k = 0; k < seencount; k++)
8802 {
8803 if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
8804 }
8805 if (k < seencount) continue;
8806
8807 seentypes[seencount] = t->type;
8808 seenvalues[seencount++] = t->value;
8809
8810 fv = found[fc++];
8811 fv[m++] = t->name_offset;
8812
8813 for (size_t j = i + 1; j < PRIV(utt_size); j++)
8814 {
8815 const ucp_type_table *tt = PRIV(utt) + j;
8816 if (tt->type != t->type || tt->value != value) continue;
8817 if (m >= MAX_SYNONYMS)
8818 printf("** Too many synonyms: %s ignored\n",
8819 PRIV(utt_names) + tt->name_offset);
8820 else fv[m++] = tt->name_offset;
8821 }
8822
8823 fv[m] = -1;
8824 }
8825
8826 printf("-------------------------- SUPPORTED %s --------------------------\n\n",
8827 wantscripts? "SCRIPTS" : "PROPERTIES");
8828
8829 if (!wantscripts) printf(
8830 "This release of PCRE2 supports Unicode's general category properties such\n"
8831 "as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
8832 "and the following binary (yes/no) properties:\n\n");
8833
8834
8835 for (int k = 0; k < (n+1)/2; k++)
8836 {
8837 int x;
8838 char buff1[128];
8839 char buff2[128];
8840
8841 format_list_item(found[k], buff1, wantscripts);
8842 x = k + (n+1)/2;
8843 if (x < n) format_list_item(found[x], buff2, wantscripts);
8844 else buff2[0] = 0;
8845
8846 x = printf("%s", buff1);
8847 while (x++ < colwidth) printf(" ");
8848 printf("%s\n", buff2);
8849 }
8850
8851 #endif /* SUPPORT_UNICODE */
8852 }
8853
8854
8855
8856 /*************************************************
8857 * Display one modifier *
8858 *************************************************/
8859
8860 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8861 display_one_modifier(modstruct *m, BOOL for_pattern)
8862 {
8863 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8864 '*' : ' ';
8865 printf("%c%s", c, m->name);
8866 for (size_t i = 0; i < C1MODLISTCOUNT; i++)
8867 {
8868 if (strcmp(m->name, c1modlist[i].fullname) == 0)
8869 printf(" (%c)", c1modlist[i].onechar);
8870 }
8871 }
8872
8873
8874
8875 /*************************************************
8876 * Display pattern or subject modifiers *
8877 *************************************************/
8878
8879 /* In order to print in two columns, first scan without printing to get a list
8880 of the modifiers that are required.
8881
8882 Arguments:
8883 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
8884 title string to be used in title
8885
8886 Returns: nothing
8887 */
8888
8889 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8890 display_selected_modifiers(BOOL for_pattern, const char *title)
8891 {
8892 uint32_t i, j;
8893 uint32_t n = 0;
8894 uint32_t list[MODLISTCOUNT];
8895 uint32_t extra[MODLISTCOUNT];
8896
8897 for (i = 0; i < MODLISTCOUNT; i++)
8898 {
8899 BOOL is_pattern = TRUE;
8900 modstruct *m = modlist + i;
8901
8902 switch (m->which)
8903 {
8904 case MOD_CTC: /* Compile context */
8905 case MOD_PAT: /* Pattern */
8906 case MOD_PATP: /* Pattern, OK for Perl-compatible test */
8907 break;
8908
8909 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8910 subjects, but can be given with a pattern. We list them as subject
8911 modifiers, but marked with an asterisk.*/
8912
8913 case MOD_CTM: /* Match context */
8914 case MOD_DAT: /* Subject line */
8915 case MOD_DATP: /* Subject line, OK for Perl-compatible test */
8916 case MOD_PND: /* As PD, but not default pattern */
8917 case MOD_PNDP: /* As PND, OK for Perl-compatible test */
8918 is_pattern = FALSE;
8919 break;
8920
8921 default: printf("** Unknown type for modifier '%s'\n", m->name);
8922 /* Fall through */
8923 case MOD_PD: /* Pattern or subject */
8924 case MOD_PDP: /* As PD, OK for Perl-compatible test */
8925 is_pattern = for_pattern;
8926 break;
8927 }
8928
8929 if (for_pattern == is_pattern)
8930 {
8931 extra[n] = 0;
8932 for (size_t k = 0; k < C1MODLISTCOUNT; k++)
8933 {
8934 if (strcmp(m->name, c1modlist[k].fullname) == 0)
8935 {
8936 extra[n] += 4;
8937 break;
8938 }
8939 }
8940 list[n++] = i;
8941 }
8942 }
8943
8944 /* Now print from the list in two columns. */
8945
8946 printf("-------------- %s MODIFIERS --------------\n", title);
8947
8948 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8949 {
8950 modstruct *m = modlist + list[i];
8951 display_one_modifier(m, for_pattern);
8952 if (j < n)
8953 {
8954 uint32_t k = 27 - strlen(m->name) - extra[i];
8955 while (k-- > 0) printf(" ");
8956 display_one_modifier(modlist + list[j], for_pattern);
8957 }
8958 printf("\n");
8959 }
8960 }
8961
8962
8963
8964 /*************************************************
8965 * Display the list of modifiers *
8966 *************************************************/
8967
8968 static void
display_modifiers(void)8969 display_modifiers(void)
8970 {
8971 printf(
8972 "An asterisk on a subject modifier means that it may be given on a pattern\n"
8973 "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8974 "that are listed for both patterns and subjects have different effects in\n"
8975 "each case.\n\n");
8976 display_selected_modifiers(TRUE, "PATTERN");
8977 printf("\n");
8978 display_selected_modifiers(FALSE, "SUBJECT");
8979 }
8980
8981
8982
8983 /*************************************************
8984 * Main Program *
8985 *************************************************/
8986
8987 int
main(int argc,char ** argv)8988 main(int argc, char **argv)
8989 {
8990 uint32_t temp;
8991 uint32_t yield = 0;
8992 uint32_t op = 1;
8993 BOOL notdone = TRUE;
8994 BOOL quiet = FALSE;
8995 BOOL showtotaltimes = FALSE;
8996 BOOL skipping = FALSE;
8997 char *arg_subject = NULL;
8998 char *arg_pattern = NULL;
8999 char *arg_error = NULL;
9000
9001 /* The offsets to the options and control bits fields of the pattern and data
9002 control blocks must be the same so that common options and controls such as
9003 "anchored" or "memory" can work for either of them from a single table entry.
9004 We cannot test this till runtime because "offsetof" does not work in the
9005 preprocessor. */
9006
9007 if (PO(options) != DO(options) || PO(control) != DO(control) ||
9008 PO(control2) != DO(control2))
9009 {
9010 fprintf(stderr, "** Coding error: "
9011 "options and control offsets for pattern and data must be the same.\n");
9012 return 1;
9013 }
9014
9015 /* Get the PCRE2 and Unicode version number and JIT target information, at the
9016 same time checking that a request for the length gives the same answer. Also
9017 check lengths for non-string items. */
9018
9019 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
9020 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
9021
9022 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
9023 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
9024
9025 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
9026 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
9027
9028 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
9029 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
9030 {
9031 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
9032 return 1;
9033 }
9034
9035 /* Check that bad options are diagnosed. */
9036
9037 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
9038 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
9039 {
9040 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
9041 return 1;
9042 }
9043
9044 /* This configuration option is now obsolete, but running a quick check ensures
9045 that its code is covered. */
9046
9047 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
9048
9049 /* Get buffers from malloc() so that valgrind will check their misuse when
9050 debugging. They grow automatically when very long lines are read. The 16-
9051 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
9052
9053 buffer = (uint8_t *)malloc(pbuffer8_size);
9054 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
9055
9056 /* The following _setmode() stuff is some Windows magic that tells its runtime
9057 library to translate CRLF into a single LF character. At least, that's what
9058 I've been told: never having used Windows I take this all on trust. Originally
9059 it set 0x8000, but then I was advised that _O_BINARY was better. */
9060
9061 #if defined(_WIN32) || defined(WIN32)
9062 _setmode( _fileno( stdout ), _O_BINARY );
9063 #endif
9064
9065 /* Initialization that does not depend on the running mode. */
9066
9067 locale_name[0] = 0;
9068
9069 memset(&def_patctl, 0, sizeof(patctl));
9070 def_patctl.convert_type = CONVERT_UNSET;
9071
9072 memset(&def_datctl, 0, sizeof(datctl));
9073 def_datctl.oveccount = DEFAULT_OVECCOUNT;
9074 def_datctl.copy_numbers[0] = -1;
9075 def_datctl.get_numbers[0] = -1;
9076 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
9077 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
9078 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
9079
9080 /* Scan command line options. */
9081
9082 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
9083 {
9084 char *endptr;
9085 char *arg = argv[op];
9086 unsigned long uli;
9087
9088 /* List modifiers and exit. */
9089
9090 if (strcmp(arg, "-LM") == 0)
9091 {
9092 display_modifiers();
9093 goto EXIT;
9094 }
9095
9096 /* List properties and exit */
9097
9098 if (strcmp(arg, "-LP") == 0)
9099 {
9100 display_properties(FALSE);
9101 goto EXIT;
9102 }
9103
9104 /* List scripts and exit */
9105
9106 if (strcmp(arg, "-LS") == 0)
9107 {
9108 display_properties(TRUE);
9109 goto EXIT;
9110 }
9111
9112 /* Display and/or set return code for configuration options. */
9113
9114 if (strcmp(arg, "-C") == 0)
9115 {
9116 yield = c_option(argv[op + 1]);
9117 goto EXIT;
9118 }
9119
9120 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
9121 and 32-bit modes because that won't happen naturally when 8-bit is also
9122 configured. Also call some other functions that are not otherwise used. This
9123 means that a coverage report won't claim there are uncalled functions. */
9124
9125 if (strcmp(arg, "-8") == 0)
9126 {
9127 #ifdef SUPPORT_PCRE2_8
9128 test_mode = PCRE8_MODE;
9129 (void)pcre2_set_bsr_8(pat_context8, 999);
9130 (void)pcre2_set_newline_8(pat_context8, 999);
9131 #else
9132 fprintf(stderr,
9133 "** This version of PCRE2 was built without 8-bit support\n");
9134 exit(1);
9135 #endif
9136 }
9137
9138 else if (strcmp(arg, "-16") == 0)
9139 {
9140 #ifdef SUPPORT_PCRE2_16
9141 test_mode = PCRE16_MODE;
9142 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
9143 (void)pcre2_set_bsr_16(pat_context16, 999);
9144 (void)pcre2_set_newline_16(pat_context16, 999);
9145 #else
9146 fprintf(stderr,
9147 "** This version of PCRE2 was built without 16-bit support\n");
9148 exit(1);
9149 #endif
9150 }
9151
9152 else if (strcmp(arg, "-32") == 0)
9153 {
9154 #ifdef SUPPORT_PCRE2_32
9155 test_mode = PCRE32_MODE;
9156 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
9157 (void)pcre2_set_bsr_32(pat_context32, 999);
9158 (void)pcre2_set_newline_32(pat_context32, 999);
9159 #else
9160 fprintf(stderr,
9161 "** This version of PCRE2 was built without 32-bit support\n");
9162 exit(1);
9163 #endif
9164 }
9165
9166 /* Set quiet (no version verification) */
9167
9168 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
9169
9170 /* Set system stack size */
9171
9172 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
9173 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
9174 {
9175 #if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS)
9176 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
9177 exit(1);
9178 #else
9179 int rc;
9180 uint32_t stack_size;
9181 struct rlimit rlim;
9182 if (U32OVERFLOW(uli))
9183 {
9184 fprintf(stderr, "** Argument for -S is too big\n");
9185 exit(1);
9186 }
9187 stack_size = (uint32_t)uli;
9188 getrlimit(RLIMIT_STACK, &rlim);
9189 rlim.rlim_cur = stack_size * 1024 * 1024;
9190 if (rlim.rlim_cur > rlim.rlim_max)
9191 {
9192 fprintf(stderr,
9193 "pcre2test: requested stack size %luMiB is greater than hard limit ",
9194 (unsigned long int)stack_size);
9195 if (rlim.rlim_max % (1024*1024) == 0) fprintf(stderr, "%luMiB\n",
9196 (unsigned long int)(rlim.rlim_max/(1024 * 1024)));
9197 else if (rlim.rlim_max % 1024 == 0) fprintf(stderr, "%luKiB\n",
9198 (unsigned long int)(rlim.rlim_max/1024));
9199 else fprintf(stderr, "%lu bytes\n", (unsigned long int)(rlim.rlim_max));
9200 exit(1);
9201 }
9202 rc = setrlimit(RLIMIT_STACK, &rlim);
9203 if (rc != 0)
9204 {
9205 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
9206 (unsigned long int)stack_size, strerror(errno));
9207 exit(1);
9208 }
9209 op++;
9210 argc--;
9211 #endif
9212 }
9213
9214 /* Set some common pattern and subject controls */
9215
9216 else if (strcmp(arg, "-AC") == 0)
9217 {
9218 def_patctl.options |= PCRE2_AUTO_CALLOUT;
9219 def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
9220 }
9221 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
9222 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
9223 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
9224 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
9225 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
9226 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
9227 strcmp(arg, "-jitfast") == 0)
9228 {
9229 if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
9230 else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
9231 def_patctl.jit = JIT_DEFAULT; /* full & partial */
9232 #ifndef SUPPORT_JIT
9233 fprintf(stderr, "** Warning: JIT support is not available: "
9234 "-jit[fast|verify] calls functions that do nothing.\n");
9235 #endif
9236 }
9237
9238 /* Set timing parameters */
9239
9240 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
9241 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
9242 {
9243 int both = arg[2] == 0;
9244 showtotaltimes = arg[1] == 'T';
9245 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
9246 {
9247 if (uli == 0)
9248 {
9249 fprintf(stderr, "** Argument for %s must not be zero\n", arg);
9250 exit(1);
9251 }
9252 if (U32OVERFLOW(uli))
9253 {
9254 fprintf(stderr, "** Argument for %s is too big\n", arg);
9255 exit(1);
9256 }
9257 timeitm = (int)uli;
9258 op++;
9259 argc--;
9260 }
9261 else timeitm = LOOPREPEAT;
9262 if (both) timeit = timeitm;
9263 }
9264
9265 /* Give help */
9266
9267 else if (strcmp(arg, "-help") == 0 ||
9268 strcmp(arg, "--help") == 0)
9269 {
9270 usage();
9271 goto EXIT;
9272 }
9273
9274 /* Show version */
9275
9276 else if (memcmp(arg, "-v", 2) == 0 ||
9277 strcmp(arg, "--version") == 0)
9278 {
9279 print_version(stdout, FALSE);
9280 goto EXIT;
9281 }
9282
9283 /* The following options save their data for processing once we know what
9284 the running mode is. */
9285
9286 else if (strcmp(arg, "-error") == 0)
9287 {
9288 arg_error = argv[op+1];
9289 goto CHECK_VALUE_EXISTS;
9290 }
9291
9292 else if (strcmp(arg, "-subject") == 0)
9293 {
9294 arg_subject = argv[op+1];
9295 goto CHECK_VALUE_EXISTS;
9296 }
9297
9298 else if (strcmp(arg, "-pattern") == 0)
9299 {
9300 arg_pattern = argv[op+1];
9301 CHECK_VALUE_EXISTS:
9302 if (argc <= 2)
9303 {
9304 fprintf(stderr, "** Missing value for %s\n", arg);
9305 yield = 1;
9306 goto EXIT;
9307 }
9308 op++;
9309 argc--;
9310 }
9311
9312 /* Unrecognized option */
9313
9314 else
9315 {
9316 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
9317 usage();
9318 yield = 1;
9319 goto EXIT;
9320 }
9321 op++;
9322 argc--;
9323 }
9324
9325 /* If -error was present, get the error numbers, show the messages, and exit.
9326 We wait to do this until we know which mode we are in. */
9327
9328 if (arg_error != NULL)
9329 {
9330 int len;
9331 int errcode;
9332 char *endptr;
9333
9334 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
9335 least 128 code units, because it is used for retrieving error messages. */
9336
9337 #ifdef SUPPORT_PCRE2_16
9338 if (test_mode == PCRE16_MODE)
9339 {
9340 pbuffer16_size = 256;
9341 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
9342 if (pbuffer16 == NULL)
9343 {
9344 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
9345 pbuffer16_size);
9346 yield = 1;
9347 goto EXIT;
9348 }
9349 }
9350 #endif
9351
9352 #ifdef SUPPORT_PCRE2_32
9353 if (test_mode == PCRE32_MODE)
9354 {
9355 pbuffer32_size = 512;
9356 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
9357 if (pbuffer32 == NULL)
9358 {
9359 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
9360 pbuffer32_size);
9361 yield = 1;
9362 goto EXIT;
9363 }
9364 }
9365 #endif
9366
9367 /* Loop along a list of error numbers. */
9368
9369 for (;;)
9370 {
9371 errcode = strtol(arg_error, &endptr, 10);
9372 if (*endptr != 0 && *endptr != CHAR_COMMA)
9373 {
9374 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
9375 yield = 1;
9376 goto EXIT;
9377 }
9378 printf("Error %d: ", errcode);
9379 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
9380 if (len < 0)
9381 {
9382 switch (len)
9383 {
9384 case PCRE2_ERROR_BADDATA:
9385 printf("PCRE2_ERROR_BADDATA (unknown error number)");
9386 break;
9387
9388 case PCRE2_ERROR_NOMEMORY:
9389 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
9390 break;
9391
9392 default:
9393 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
9394 break;
9395 }
9396 }
9397 else
9398 {
9399 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
9400 }
9401 printf("\n");
9402 if (*endptr == 0) goto EXIT;
9403 arg_error = endptr + 1;
9404 }
9405 /* Control never reaches here */
9406 } /* End of -error handling */
9407
9408 /* Initialize things that cannot be done until we know which test mode we are
9409 running in. Exercise the general context copying and match data size functions,
9410 which are not otherwise used. */
9411
9412 code_unit_size = test_mode/8;
9413 max_oveccount = DEFAULT_OVECCOUNT;
9414
9415 /* Use macros to save a lot of duplication. */
9416
9417 #define CREATECONTEXTS \
9418 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
9419 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
9420 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
9421 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
9422 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
9423 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
9424 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
9425 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
9426 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
9427
9428 #define CONTEXTTESTS \
9429 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
9430 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
9431 (void)G(pcre2_set_max_pattern_compiled_length_,BITS)(G(pat_context,BITS), 0); \
9432 (void)G(pcre2_set_max_varlookbehind_,BITS)(G(pat_context,BITS), 0); \
9433 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
9434 (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
9435
9436 /* Call the appropriate functions for the current mode, and exercise some
9437 functions that are not otherwise called. */
9438
9439 #ifdef SUPPORT_PCRE2_8
9440 #undef BITS
9441 #define BITS 8
9442 if (test_mode == PCRE8_MODE)
9443 {
9444 CREATECONTEXTS;
9445 CONTEXTTESTS;
9446 }
9447 #endif
9448
9449 #ifdef SUPPORT_PCRE2_16
9450 #undef BITS
9451 #define BITS 16
9452 if (test_mode == PCRE16_MODE)
9453 {
9454 CREATECONTEXTS;
9455 CONTEXTTESTS;
9456 }
9457 #endif
9458
9459 #ifdef SUPPORT_PCRE2_32
9460 #undef BITS
9461 #define BITS 32
9462 if (test_mode == PCRE32_MODE)
9463 {
9464 CREATECONTEXTS;
9465 CONTEXTTESTS;
9466 }
9467 #endif
9468
9469 /* Set a default parentheses nest limit that is large enough to run the
9470 standard tests (this also exercises the function). */
9471
9472 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
9473
9474 /* Handle command line modifier settings, sending any error messages to
9475 stderr. We need to know the mode before modifying the context, and it is tidier
9476 to do them all in the same way. */
9477
9478 outfile = stderr;
9479 if ((arg_pattern != NULL &&
9480 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
9481 (arg_subject != NULL &&
9482 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
9483 {
9484 yield = 1;
9485 goto EXIT;
9486 }
9487
9488 /* Sort out the input and output files, defaulting to stdin/stdout. */
9489
9490 infile = stdin;
9491 outfile = stdout;
9492
9493 if (argc > 1 && strcmp(argv[op], "-") != 0)
9494 {
9495 infile = fopen(argv[op], INPUT_MODE);
9496 if (infile == NULL)
9497 {
9498 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9499 yield = 1;
9500 goto EXIT;
9501 }
9502 }
9503
9504 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9505 if (INTERACTIVE(infile)) using_history();
9506 #endif
9507
9508 if (argc > 2)
9509 {
9510 outfile = fopen(argv[op+1], OUTPUT_MODE);
9511 if (outfile == NULL)
9512 {
9513 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9514 yield = 1;
9515 goto EXIT;
9516 }
9517 }
9518
9519 /* Output a heading line unless quiet, then process input lines. */
9520
9521 if (!quiet) print_version(outfile, TRUE);
9522
9523 SET(compiled_code, NULL);
9524
9525 #ifdef SUPPORT_PCRE2_8
9526 preg.re_pcre2_code = NULL;
9527 preg.re_match_data = NULL;
9528 #endif
9529
9530 while (notdone)
9531 {
9532 uint8_t *p;
9533 int rc = PR_OK;
9534 BOOL expectdata = TEST(compiled_code, !=, NULL);
9535 #ifdef SUPPORT_PCRE2_8
9536 expectdata |= preg.re_pcre2_code != NULL;
9537 #endif
9538
9539 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
9540 break;
9541 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9542 fflush(outfile);
9543 p = buffer;
9544
9545 /* If we have a pattern set up for testing, or we are skipping after a
9546 compile failure, a blank line terminates this test. */
9547
9548 if (expectdata || skipping)
9549 {
9550 while (isspace(*p)) p++;
9551 if (*p == 0)
9552 {
9553 #ifdef SUPPORT_PCRE2_8
9554 if (preg.re_pcre2_code != NULL)
9555 {
9556 regfree(&preg);
9557 preg.re_pcre2_code = NULL;
9558 preg.re_match_data = NULL;
9559 }
9560 #endif /* SUPPORT_PCRE2_8 */
9561 if (TEST(compiled_code, !=, NULL))
9562 {
9563 SUB1(pcre2_code_free, compiled_code);
9564 SET(compiled_code, NULL);
9565 }
9566 skipping = FALSE;
9567 setlocale(LC_CTYPE, "C");
9568 }
9569
9570 /* Otherwise, if we are not skipping, and the line is not a data comment
9571 line starting with "\=", process a data line. */
9572
9573 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9574 {
9575 rc = process_data();
9576 }
9577 }
9578
9579 /* We do not have a pattern set up for testing. Lines starting with # are
9580 either comments or special commands. Blank lines are ignored. Otherwise, the
9581 line must start with a valid delimiter. It is then processed as a pattern
9582 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9583 valgrind, make the unused part of the buffer undefined, to catch overruns. */
9584
9585 else if (*p == '#')
9586 {
9587 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9588 rc = process_command();
9589 }
9590
9591 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9592 {
9593 rc = process_pattern();
9594 dfa_matched = 0;
9595 }
9596
9597 else
9598 {
9599 while (isspace(*p)) p++;
9600 if (*p != 0)
9601 {
9602 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9603 *buffer);
9604 rc = PR_SKIP;
9605 }
9606 }
9607
9608 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9609 else if (rc == PR_ABEND)
9610 {
9611 fprintf(outfile, "** pcre2test run abandoned\n");
9612 yield = 1;
9613 goto EXIT;
9614 }
9615 }
9616
9617 /* Finish off a normal run. */
9618
9619 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9620
9621 if (showtotaltimes)
9622 {
9623 const char *pad = "";
9624 fprintf(outfile, "--------------------------------------\n");
9625 if (timeit > 0)
9626 {
9627 fprintf(outfile, "Total compile time %8.2f microseconds\n",
9628 ((1000000 / CLOCKS_PER_SEC) * (double)total_compile_time) / timeit);
9629 if (total_jit_compile_time > 0)
9630 fprintf(outfile, "Total JIT compile %8.2f microseconds\n",
9631 ((1000000 / CLOCKS_PER_SEC) * (double)total_jit_compile_time) / \
9632 timeit);
9633 pad = " ";
9634 }
9635 fprintf(outfile, "Total match time %s%8.2f microseconds\n", pad,
9636 ((1000000 / CLOCKS_PER_SEC) * (double)total_match_time) / timeitm);
9637 }
9638
9639
9640 EXIT:
9641
9642 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9643 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9644 #endif
9645
9646 if (infile != NULL && infile != stdin) fclose(infile);
9647 if (outfile != NULL && outfile != stdout) fclose(outfile);
9648
9649 free(buffer);
9650 free(dbuffer);
9651 free(pbuffer8);
9652 free(dfa_workspace);
9653 free(tables3);
9654 PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables);
9655 PCRE2_MATCH_DATA_FREE(match_data);
9656 SUB1(pcre2_code_free, compiled_code);
9657
9658 while(patstacknext-- > 0)
9659 {
9660 SET(compiled_code, patstack[patstacknext]);
9661 SUB1(pcre2_code_free, compiled_code);
9662 }
9663
9664 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9665 if (jit_stack != NULL)
9666 {
9667 PCRE2_JIT_STACK_FREE(jit_stack);
9668 }
9669
9670 #define FREECONTEXTS \
9671 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9672 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9673 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9674 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9675 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9676 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9677 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9678 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9679
9680 #ifdef SUPPORT_PCRE2_8
9681 #undef BITS
9682 #define BITS 8
9683 if (preg.re_pcre2_code != NULL) regfree(&preg);
9684 FREECONTEXTS;
9685 #endif
9686
9687 #ifdef SUPPORT_PCRE2_16
9688 #undef BITS
9689 #define BITS 16
9690 free(pbuffer16);
9691 FREECONTEXTS;
9692 #endif
9693
9694 #ifdef SUPPORT_PCRE2_32
9695 #undef BITS
9696 #define BITS 32
9697 free(pbuffer32);
9698 FREECONTEXTS;
9699 #endif
9700
9701 #if defined(__VMS)
9702 yield = SS$_NORMAL; /* Return values via DCL symbols */
9703 #endif
9704
9705 return yield;
9706 }
9707
9708 /* End of pcre2test.c */
9709