xref: /aosp_15_r20/external/pcre/src/pcre2_jit_test.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include <stdio.h>
46 #include <string.h>
47 
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50 
51 /*
52  Letter characters:
53    \xe6\x92\xad = 0x64ad = 25773 (kanji)
54  Non-letter characters:
55    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
56    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57    \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58    \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59  Newlines:
60    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62  Othercase pairs:
63    \xc3\xa9 = 0xe9 = 233 (e')
64       \xc3\x89 = 0xc9 = 201 (E')
65    \xc3\xa1 = 0xe1 = 225 (a')
66       \xc3\x81 = 0xc1 = 193 (A')
67    \x53 = 0x53 = S
68      \x73 = 0x73 = s
69      \xc5\xbf = 0x17f = 383 (long S)
70    \xc8\xba = 0x23a = 570
71       \xe2\xb1\xa5 = 0x2c65 = 11365
72    \xe1\xbd\xb8 = 0x1f78 = 8056
73       \xe1\xbf\xb8 = 0x1ff8 = 8184
74    \xf0\x90\x90\x80 = 0x10400 = 66560
75       \xf0\x90\x90\xa8 = 0x10428 = 66600
76    \xc7\x84 = 0x1c4 = 452
77      \xc7\x85 = 0x1c5 = 453
78      \xc7\x86 = 0x1c6 = 454
79  Caseless sets:
80    ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81    ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82    ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83 
84  Mark property:
85    \xcc\x8d = 0x30d = 781
86  Special:
87    \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88    \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89    \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90    \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91    \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92    \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94 
95 static int regression_tests(void);
96 static int invalid_utf8_regression_tests(void);
97 static int invalid_utf16_regression_tests(void);
98 static int invalid_utf32_regression_tests(void);
99 
main(void)100 int main(void)
101 {
102 	int jit = 0;
103 #if defined SUPPORT_PCRE2_8
104 	pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE2_16
106 	pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE2_32
108 	pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109 #endif
110 	if (!jit) {
111 		printf("JIT must be enabled to run pcre2_jit_test\n");
112 		return 1;
113 	}
114 	return regression_tests()
115 		| invalid_utf8_regression_tests()
116 		| invalid_utf16_regression_tests()
117 		| invalid_utf32_regression_tests();
118 }
119 
120 /* --------------------------------------------------------------------------------------- */
121 
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124 #endif
125 
126 #define MU	(PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP	(PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M	(PCRE2_MULTILINE)
131 #define MP	(PCRE2_MULTILINE | PCRE2_UCP)
132 #define U	(PCRE2_UTF)
133 #define CM	(PCRE2_CASELESS | PCRE2_MULTILINE)
134 
135 #define BSR(x)	((x) << 16)
136 #define A	PCRE2_NEWLINE_ANYCRLF
137 
138 #define GET_NEWLINE(x)	((x) & 0xffff)
139 #define GET_BSR(x)	((x) >> 16)
140 
141 #define OFFSET_MASK	0x00ffff
142 #define F_NO8		0x010000
143 #define F_NO16		0x020000
144 #define F_NO32		0x020000
145 #define F_NOMATCH	0x040000
146 #define F_DIFF		0x080000
147 #define F_FORCECONV	0x100000
148 #define F_PROPERTY	0x200000
149 
150 struct regression_test_case {
151 	uint32_t compile_options;
152 	int newline;
153 	int match_options;
154 	int start_offset;
155 	const char *pattern;
156 	const char *input;
157 };
158 
159 static struct regression_test_case regression_test_cases[] = {
160 	/* Constant strings. */
161 	{ MU, A, 0, 0, "AbC", "AbAbC" },
162 	{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163 	{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164 	{ M, A, 0, 0, "[^a]", "aAbB" },
165 	{ CM, A, 0, 0, "[^m]", "mMnN" },
166 	{ M, A, 0, 0, "a[^b][^#]", "abacd" },
167 	{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
168 	{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169 	{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170 	{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175 	{ MU, A, 0, 0, "[axd]", "sAXd" },
176 	{ CMU, A, 0, 0, "[axd]", "sAXd" },
177 	{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178 	{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179 	{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180 	{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181 	{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182 	{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183 	{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184 	{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185 	{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186 	{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187 	{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188 #ifndef NEVER_BACKSLASH_C
189 	{ M, A, 0, 0, "\\Ca", "cda" },
190 	{ CM, A, 0, 0, "\\Ca", "CDA" },
191 	{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192 	{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193 #endif /* !NEVER_BACKSLASH_C */
194 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196 	{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198 	{ M, A, 0, 0, "[3-57-9]", "5" },
199 	{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200 		"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201 	{ 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
202 	{ 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
203 
204 	/* Assertions. */
205 	{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
206 	{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
207 	{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
208 	{ MP, A, 0, 0, "\\B", "_\xa1" },
209 	{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
210 	{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
211 	{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
212 	{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
213 	{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
214 	{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
215 	{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
216 	{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
217 	{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
218 	{ 0, 0, 0, 0, "^ab", "ab" },
219 	{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
220 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
221 	{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
222 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
223 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
224 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
225 	{ 0, 0, 0, 0, "ab$", "ab" },
226 	{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
227 	{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
228 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
229 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
230 	{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
231 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
232 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
233 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
234 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
235 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
236 	{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
237 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
238 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
239 	{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
240 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
241 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
242 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
243 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
244 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
245 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
246 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
247 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
248 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
249 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
250 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
251 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
252 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
253 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
254 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
255 	{ M, A, 0, 0, "\\Aa", "aaa" },
256 	{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
257 	{ M, A, 0, 1, "\\Ga", "aaa" },
258 	{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
259 	{ M, A, 0, 0, "a\\z", "aaa" },
260 	{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
261 
262 	/* Brackets and alternatives. */
263 	{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
264 	{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
265 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
266 	{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
267 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
268 	{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
269 	{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270 	{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
271 	{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
272 	{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
273 	{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
274 	{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
275 	{ CM, A, 0, 0, "ab|cd", "CD" },
276 	{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
277 	{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
278 	{ 0, A, 0, 0, "(a|)b*+a", "a" },
279 	{ 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" },
280 
281 	/* Greedy and non-greedy ? operators. */
282 	{ MU, A, 0, 0, "(?:a)?a", "laab" },
283 	{ CMU, A, 0, 0, "(A)?A", "llaab" },
284 	{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
285 	{ MU, A, 0, 0, "(a)?a", "manm" },
286 	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
287 	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
288 	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
289 
290 	/* Greedy and non-greedy + operators */
291 	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
292 	{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
293 	{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
294 	{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
295 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
296 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
297 	{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
298 	{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
299 
300 	/* Greedy and non-greedy * operators */
301 	{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
302 	{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
303 	{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
304 	{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
305 	{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
306 	{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
307 	{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
308 	{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
309 
310 	/* Combining ? + * operators */
311 	{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
312 	{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
313 	{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
314 	{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
315 	{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
316 
317 	/* Single character iterators. */
318 	{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
319 	{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
320 	{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
321 	{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
322 	{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
323 	{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
324 	{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
325 	{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
326 	{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
327 	{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
328 	{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
329 	{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
330 	{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
331 	{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
332 	{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
333 	{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
334 	{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
335 	{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
336 	{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
337 	{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
338 	{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
339 	{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
340 	{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
341 	{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
342 	{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
343 	{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
344 	{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
345 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
346 	{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
347 	{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
348 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
349 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
350 	{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
351 	{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
352 	{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
353 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
354 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
355 	{ MU, A, 0, 0, ".[ab]*.", "xx" },
356 	{ MU, A, 0, 0, ".[ab]*a", "xxa" },
357 	{ MU, A, 0, 0, ".[ab]?.", "xx" },
358 	{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
359 	{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
360 	{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
361 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
362 	{ 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "<br><div id>" },
363 
364 	/* Bracket repeats with limit. */
365 	{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
366 	{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
367 	{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
368 	{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
369 	{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
370 	{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
371 	{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
372 	{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
373 	{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
374 
375 	/* Basic character sets. */
376 	{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
377 	{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
378 	{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
379 	{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
380 	{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
381 	{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
382 	{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
383 	{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
384 	{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
385 	{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
386 	{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
387 	{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
388 	{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
389 	{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
390 	{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
391 	{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
392 	{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
393 	{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
394 	{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
395 	{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
396 	{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
397 	{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
398 	{ CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " },
399 	{ M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
400 
401 	/* Unicode properties. */
402 	{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
403 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
404 	{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
405 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
406 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
407 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
408 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
409 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
410 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
411 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
412 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
413 	{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
414 	{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
415 	{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
416 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
417 	{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
418 	{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
419 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
420 	{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
421 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
422 	{ MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " },
423 	{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
424 	{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
425 	{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
426 	{ MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
427 	{ MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" },
428 	{ MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" },
429 
430 	/* Possible empty brackets. */
431 	{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
432 	{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
433 	{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
434 	{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
435 	{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
436 	{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
437 	{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
438 	{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
439 	{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
440 	{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
441 
442 	/* Start offset. */
443 	{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
444 	{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
445 	{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
446 	{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
447 
448 	/* Newline. */
449 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
450 	{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
451 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
452 	{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
453 	{ MU, A, 0, 1, "^", "\r\n" },
454 	{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
455 	{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
456 
457 	/* Any character except newline or any newline. */
458 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
459 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
460 	{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
461 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
462 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
463 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
464 	{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
465 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
466 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
467 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
468 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
469 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
470 	{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
471 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
472 	{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
473 	{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
474 	{ MU, A, 0, 0, "\\R*", "\r\n\r" },
475 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
476 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
477 	{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
478 	{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
479 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
480 	{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
481 	{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
482 	{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
483 	{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
484 
485 	/* Atomic groups (no fallback from "next" direction). */
486 	{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
487 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
488 	{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
489 			"bababcdedefgheijijklmlmnop" },
490 	{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
491 	{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
492 	{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
493 	{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
494 	{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
495 	{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
496 	{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
497 	{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
498 	{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
499 	{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
500 	{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
501 	{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
502 	{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
503 	{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
504 	{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
505 	{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
506 	{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
507 	{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
508 	{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
509 	{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
510 	{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
511 	{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
512 	{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
513 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
514 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
515 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
516 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
517 	{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
518 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
519 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
520 	{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
521 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
522 	{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
523 	{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
524 	{ MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" },
525 
526 	/* Possessive quantifiers. */
527 	{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
528 	{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
529 	{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
530 	{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
531 	{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
532 	{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
533 	{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
534 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
535 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
536 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
537 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
538 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
539 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
540 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
541 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
542 	{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
543 	{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
544 	{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
545 	{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
546 	{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
547 	{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
548 	{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
549 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
550 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
551 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
552 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
553 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
554 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
555 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
556 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
557 	{ MU, A, 0, 0, "(A)*+$", "ABC" },
558 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
559 	{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
560 	{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
561 	{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
562 	{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
563 
564 	/* Back references. */
565 	{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
566 	{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
567 	{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
568 	{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
569 	{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
570 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
571 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
572 	{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
573 	{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
574 	{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
575 	{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
576 	{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
577 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
578 	{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
579 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
580 	{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
581 	{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
582 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
583 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
584 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
585 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
586 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
587 	{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
588 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
589 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
590 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
591 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
592 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
593 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
594 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
595 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
596 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
597 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
598 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
599 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
600 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
601 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
602 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
603 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
604 	{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
605 	{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
606 
607 	/* Assertions. */
608 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
609 	{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
610 	{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
611 	{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
612 	{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
613 	{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
614 	{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
615 	{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
616 	{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
617 	{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
618 	{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
619 	{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
620 	{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
621 	{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
622 	{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
623 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
624 	{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
625 	{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
626 	{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
627 	{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
628 	{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
629 	{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
630 	{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
631 	{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
632 	{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
633 	{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
634 	{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
635 	{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
636 	{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
637 	{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
638 	{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
639 	{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
640 	{ MU, A, 0, 0, "a(?=)b", "ab" },
641 	{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
642 	{ MU, A, 0, 0, "(?(?<!|(|a)))", "a" },
643 
644 	/* Not empty, ACCEPT, FAIL */
645 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
646 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
647 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
648 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
649 	{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
650 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
651 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
652 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
653 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
654 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
655 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
656 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
657 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
658 	{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
659 	{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
660 	{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
661 	{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
662 	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
663 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
664 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
665 	{ MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
666 
667 	/* Conditional blocks. */
668 	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
669 	{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
670 	{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
671 	{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
672 	{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
673 	{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
674 	{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
675 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
676 	{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
677 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
678 	{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
679 	{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
680 	{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
681 	{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
682 	{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
683 	{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
684 	{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
685 	{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
686 	{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
687 	{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
688 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
689 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
690 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
691 	{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
692 	{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
693 	{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
694 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
695 	{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
696 	{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
697 	{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
698 	{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
699 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
700 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
701 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
702 	{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
703 	{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
704 	{ MU, A, 0, 0, "(?(?!)a)", "ab" },
705 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
706 
707 	/* Set start of match. */
708 	{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
709 	{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
710 	{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
711 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
712 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
713 
714 	/* First line. */
715 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
716 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
717 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
718 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
719 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
720 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
721 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
722 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
723 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
724 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
725 	{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
726 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
727 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
728 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
729 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
730 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
731 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
732 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
733 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
734 	{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
735 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
736 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
737 
738 	/* Recurse. */
739 	{ MU, A, 0, 0, "(a)(?1)", "aa" },
740 	{ MU, A, 0, 0, "((a))(?1)", "aa" },
741 	{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
742 	{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
743 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
744 	{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
745 	{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
746 	{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
747 	{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
748 	{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
749 	{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
750 	{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
751 	{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
752 	{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
753 	{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
754 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
755 	{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
756 	{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
757 	{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
758 	{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
759 	{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
760 	{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
761 	{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
762 	{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
763 	{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
764 	{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
765 	{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
766 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
767 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
768 	{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
769 	{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
770 	{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
771 
772 	/* 16 bit specific tests. */
773 	{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
774 	{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
775 	{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
776 	{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
777 	{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
778 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
779 	{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
780 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
781 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
782 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
783 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
784 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
785 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
786 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
787 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
788 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
789 	{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
790 	{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
791 	{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
792 	{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
793 	{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
794 	{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
795 	{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
796 	{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
797 	{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
798 	{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
799 	{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
800 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
801 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
802 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
803 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
804 
805 	/* Partial matching. */
806 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
807 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
808 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
809 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
810 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
811 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
812 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
813 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
814 
815 	/* (*MARK) verb. */
816 	{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
817 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
818 	{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
819 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
820 	{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
821 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
822 	{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
823 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
824 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
825 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
826 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
827 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
828 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
829 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
830 	{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
831 
832 	/* (*COMMIT) verb. */
833 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
834 	{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
835 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
836 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
837 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
838 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
839 
840 	/* (*PRUNE) verb. */
841 	{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
842 	{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
843 	{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
844 	{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
845 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
846 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
847 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
848 	{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
849 	{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
850 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
851 	{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
852 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
853 	{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
854 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
855 	{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
856 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
857 	{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
858 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
859 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
860 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
861 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
862 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
863 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
864 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
865 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
866 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
867 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
868 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
869 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
870 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
871 
872 	/* (*SKIP) verb. */
873 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
874 	{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
875 	{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
876 	{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
877 
878 	/* (*THEN) verb. */
879 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
880 	{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
881 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
882 	{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
883 	{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
884 	{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
885 	{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
886 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
887 	{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
888 	{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
889 	{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
890 	{ MU, A, 0, 0 | F_NOMATCH, "(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)", "c" },
891 
892 	/* Recurse and control verbs. */
893 	{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
894 	{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
895 	{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
896 	{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
897 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
898 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
899 	{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
900 	{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
901 
902 #ifdef SUPPORT_UNICODE
903 	/* Script runs and iterations. */
904 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
905 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
906 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
907 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
908 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
909 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
910 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
911 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
912 #endif /* SUPPORT_UNICODE */
913 
914 	/* Deep recursion. */
915 	{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
916 	{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
917 	{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
918 
919 	/* Deep recursion: Stack limit reached. */
920 	{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
921 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
922 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
923 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
924 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
925 
926 	{ 0, 0, 0, 0, NULL, NULL }
927 };
928 
929 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)930 static pcre2_jit_stack_8* callback8(void *arg)
931 {
932 	return (pcre2_jit_stack_8 *)arg;
933 }
934 #endif
935 
936 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)937 static pcre2_jit_stack_16* callback16(void *arg)
938 {
939 	return (pcre2_jit_stack_16 *)arg;
940 }
941 #endif
942 
943 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)944 static pcre2_jit_stack_32* callback32(void *arg)
945 {
946 	return (pcre2_jit_stack_32 *)arg;
947 }
948 #endif
949 
950 #ifdef SUPPORT_PCRE2_8
951 static pcre2_jit_stack_8 *stack8;
952 
getstack8(void)953 static pcre2_jit_stack_8 *getstack8(void)
954 {
955 	if (!stack8)
956 		stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
957 	return stack8;
958 }
959 
setstack8(pcre2_match_context_8 * mcontext)960 static void setstack8(pcre2_match_context_8 *mcontext)
961 {
962 	if (!mcontext) {
963 		if (stack8)
964 			pcre2_jit_stack_free_8(stack8);
965 		stack8 = NULL;
966 		return;
967 	}
968 
969 	pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
970 }
971 #endif /* SUPPORT_PCRE2_8 */
972 
973 #ifdef SUPPORT_PCRE2_16
974 static pcre2_jit_stack_16 *stack16;
975 
getstack16(void)976 static pcre2_jit_stack_16 *getstack16(void)
977 {
978 	if (!stack16)
979 		stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
980 	return stack16;
981 }
982 
setstack16(pcre2_match_context_16 * mcontext)983 static void setstack16(pcre2_match_context_16 *mcontext)
984 {
985 	if (!mcontext) {
986 		if (stack16)
987 			pcre2_jit_stack_free_16(stack16);
988 		stack16 = NULL;
989 		return;
990 	}
991 
992 	pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
993 }
994 #endif /* SUPPORT_PCRE2_16 */
995 
996 #ifdef SUPPORT_PCRE2_32
997 static pcre2_jit_stack_32 *stack32;
998 
getstack32(void)999 static pcre2_jit_stack_32 *getstack32(void)
1000 {
1001 	if (!stack32)
1002 		stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
1003 	return stack32;
1004 }
1005 
setstack32(pcre2_match_context_32 * mcontext)1006 static void setstack32(pcre2_match_context_32 *mcontext)
1007 {
1008 	if (!mcontext) {
1009 		if (stack32)
1010 			pcre2_jit_stack_free_32(stack32);
1011 		stack32 = NULL;
1012 		return;
1013 	}
1014 
1015 	pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
1016 }
1017 #endif /* SUPPORT_PCRE2_32 */
1018 
1019 #ifdef SUPPORT_PCRE2_16
1020 
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)1021 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
1022 {
1023 	PCRE2_SPTR8 iptr = input;
1024 	PCRE2_UCHAR16 *optr = output;
1025 	unsigned int c;
1026 
1027 	if (max_length == 0)
1028 		return 0;
1029 
1030 	while (*iptr && max_length > 1) {
1031 		c = 0;
1032 		if (offsetmap)
1033 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1034 
1035 		if (*iptr < 0xc0)
1036 			c = *iptr++;
1037 		else if (!(*iptr & 0x20)) {
1038 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1039 			iptr += 2;
1040 		} else if (!(*iptr & 0x10)) {
1041 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1042 			iptr += 3;
1043 		} else if (!(*iptr & 0x08)) {
1044 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1045 			iptr += 4;
1046 		}
1047 
1048 		if (c < 65536) {
1049 			*optr++ = c;
1050 			max_length--;
1051 		} else if (max_length <= 2) {
1052 			*optr = '\0';
1053 			return (int)(optr - output);
1054 		} else {
1055 			c -= 0x10000;
1056 			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1057 			*optr++ = 0xdc00 | (c & 0x3ff);
1058 			max_length -= 2;
1059 			if (offsetmap)
1060 				offsetmap++;
1061 		}
1062 	}
1063 	if (offsetmap)
1064 		*offsetmap = (int)(iptr - (unsigned char*)input);
1065 	*optr = '\0';
1066 	return (int)(optr - output);
1067 }
1068 
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1069 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1070 {
1071 	PCRE2_SPTR8 iptr = input;
1072 	PCRE2_UCHAR16 *optr = output;
1073 
1074 	if (max_length == 0)
1075 		return 0;
1076 
1077 	while (*iptr && max_length > 1) {
1078 		*optr++ = *iptr++;
1079 		max_length--;
1080 	}
1081 	*optr = '\0';
1082 	return (int)(optr - output);
1083 }
1084 
1085 #define REGTEST_MAX_LENGTH16 4096
1086 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1087 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1088 
1089 #endif /* SUPPORT_PCRE2_16 */
1090 
1091 #ifdef SUPPORT_PCRE2_32
1092 
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1093 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1094 {
1095 	PCRE2_SPTR8 iptr = input;
1096 	PCRE2_UCHAR32 *optr = output;
1097 	unsigned int c;
1098 
1099 	if (max_length == 0)
1100 		return 0;
1101 
1102 	while (*iptr && max_length > 1) {
1103 		c = 0;
1104 		if (offsetmap)
1105 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1106 
1107 		if (*iptr < 0xc0)
1108 			c = *iptr++;
1109 		else if (!(*iptr & 0x20)) {
1110 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1111 			iptr += 2;
1112 		} else if (!(*iptr & 0x10)) {
1113 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1114 			iptr += 3;
1115 		} else if (!(*iptr & 0x08)) {
1116 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1117 			iptr += 4;
1118 		}
1119 
1120 		*optr++ = c;
1121 		max_length--;
1122 	}
1123 	if (offsetmap)
1124 		*offsetmap = (int)(iptr - (unsigned char*)input);
1125 	*optr = 0;
1126 	return (int)(optr - output);
1127 }
1128 
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1129 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1130 {
1131 	PCRE2_SPTR8 iptr = input;
1132 	PCRE2_UCHAR32 *optr = output;
1133 
1134 	if (max_length == 0)
1135 		return 0;
1136 
1137 	while (*iptr && max_length > 1) {
1138 		*optr++ = *iptr++;
1139 		max_length--;
1140 	}
1141 	*optr = '\0';
1142 	return (int)(optr - output);
1143 }
1144 
1145 #define REGTEST_MAX_LENGTH32 4096
1146 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1147 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1148 
1149 #endif /* SUPPORT_PCRE2_32 */
1150 
check_ascii(const char * input)1151 static int check_ascii(const char *input)
1152 {
1153 	const unsigned char *ptr = (unsigned char *)input;
1154 	while (*ptr) {
1155 		if (*ptr > 127)
1156 			return 0;
1157 		ptr++;
1158 	}
1159 	return 1;
1160 }
1161 
1162 #define OVECTOR_SIZE 15
1163 
regression_tests(void)1164 static int regression_tests(void)
1165 {
1166 	struct regression_test_case *current = regression_test_cases;
1167 	int error;
1168 	PCRE2_SIZE err_offs;
1169 	int is_successful;
1170 	int is_ascii;
1171 	int total = 0;
1172 	int successful = 0;
1173 	int successful_row = 0;
1174 	int counter = 0;
1175 	int jit_compile_mode;
1176 	int utf = 0;
1177 	uint32_t disabled_options = 0;
1178 	int i;
1179 #ifdef SUPPORT_PCRE2_8
1180 	pcre2_code_8 *re8;
1181 	pcre2_compile_context_8 *ccontext8;
1182 	pcre2_match_data_8 *mdata8_1;
1183 	pcre2_match_data_8 *mdata8_2;
1184 	pcre2_match_context_8 *mcontext8;
1185 	PCRE2_SIZE *ovector8_1 = NULL;
1186 	PCRE2_SIZE *ovector8_2 = NULL;
1187 	int return_value8[2];
1188 #endif
1189 #ifdef SUPPORT_PCRE2_16
1190 	pcre2_code_16 *re16;
1191 	pcre2_compile_context_16 *ccontext16;
1192 	pcre2_match_data_16 *mdata16_1;
1193 	pcre2_match_data_16 *mdata16_2;
1194 	pcre2_match_context_16 *mcontext16;
1195 	PCRE2_SIZE *ovector16_1 = NULL;
1196 	PCRE2_SIZE *ovector16_2 = NULL;
1197 	int return_value16[2];
1198 	int length16;
1199 #endif
1200 #ifdef SUPPORT_PCRE2_32
1201 	pcre2_code_32 *re32;
1202 	pcre2_compile_context_32 *ccontext32;
1203 	pcre2_match_data_32 *mdata32_1;
1204 	pcre2_match_data_32 *mdata32_2;
1205 	pcre2_match_context_32 *mcontext32;
1206 	PCRE2_SIZE *ovector32_1 = NULL;
1207 	PCRE2_SIZE *ovector32_2 = NULL;
1208 	int return_value32[2];
1209 	int length32;
1210 #endif
1211 
1212 #if defined SUPPORT_PCRE2_8
1213 	PCRE2_UCHAR8 cpu_info[128];
1214 #elif defined SUPPORT_PCRE2_16
1215 	PCRE2_UCHAR16 cpu_info[128];
1216 #elif defined SUPPORT_PCRE2_32
1217 	PCRE2_UCHAR32 cpu_info[128];
1218 #endif
1219 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1220 	int return_value;
1221 #endif
1222 
1223 	/* This test compares the behaviour of interpreter and JIT. Although disabling
1224 	utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
1225 	still considered successful from pcre2_jit_test point of view. */
1226 
1227 #if defined SUPPORT_PCRE2_8
1228 	pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1229 #elif defined SUPPORT_PCRE2_16
1230 	pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1231 #elif defined SUPPORT_PCRE2_32
1232 	pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1233 #endif
1234 
1235 	printf("Running JIT regression tests\n");
1236 	printf("  target CPU of SLJIT compiler: ");
1237 	for (i = 0; cpu_info[i]; i++)
1238 		printf("%c", (char)(cpu_info[i]));
1239 	printf("\n");
1240 
1241 #if defined SUPPORT_PCRE2_8
1242 	pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1243 #elif defined SUPPORT_PCRE2_16
1244 	pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1245 #elif defined SUPPORT_PCRE2_32
1246 	pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1247 #endif
1248 
1249 	if (!utf)
1250 		disabled_options |= PCRE2_UTF;
1251 #ifdef SUPPORT_PCRE2_8
1252 	printf("  in  8 bit mode with UTF-8  %s:\n", utf ? "enabled" : "disabled");
1253 #endif
1254 #ifdef SUPPORT_PCRE2_16
1255 	printf("  in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1256 #endif
1257 #ifdef SUPPORT_PCRE2_32
1258 	printf("  in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1259 #endif
1260 
1261 	while (current->pattern) {
1262 		/* printf("\nPattern: %s :\n", current->pattern); */
1263 		total++;
1264 		is_ascii = 0;
1265 		if (!(current->start_offset & F_PROPERTY))
1266 			is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1267 
1268 		if (current->match_options & PCRE2_PARTIAL_SOFT)
1269 			jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1270 		else if (current->match_options & PCRE2_PARTIAL_HARD)
1271 			jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1272 		else
1273 			jit_compile_mode = PCRE2_JIT_COMPLETE;
1274 		error = 0;
1275 #ifdef SUPPORT_PCRE2_8
1276 		re8 = NULL;
1277 		ccontext8 = pcre2_compile_context_create_8(NULL);
1278 		if (ccontext8) {
1279 			if (GET_NEWLINE(current->newline))
1280 				pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1281 			if (GET_BSR(current->newline))
1282 				pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1283 
1284 			if (!(current->start_offset & F_NO8)) {
1285 				re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1286 					current->compile_options & ~disabled_options,
1287 					&error, &err_offs, ccontext8);
1288 
1289 				if (!re8 && (utf || is_ascii))
1290 					printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1291 			}
1292 			pcre2_compile_context_free_8(ccontext8);
1293 		}
1294 		else
1295 			printf("\n8 bit: Cannot allocate compile context\n");
1296 #endif
1297 #ifdef SUPPORT_PCRE2_16
1298 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1299 			convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1300 		else
1301 			copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1302 
1303 		re16 = NULL;
1304 		ccontext16 = pcre2_compile_context_create_16(NULL);
1305 		if (ccontext16) {
1306 			if (GET_NEWLINE(current->newline))
1307 				pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1308 			if (GET_BSR(current->newline))
1309 				pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1310 
1311 			if (!(current->start_offset & F_NO16)) {
1312 				re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1313 					current->compile_options & ~disabled_options,
1314 					&error, &err_offs, ccontext16);
1315 
1316 				if (!re16 && (utf || is_ascii))
1317 					printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1318 			}
1319 			pcre2_compile_context_free_16(ccontext16);
1320 		}
1321 		else
1322 			printf("\n16 bit: Cannot allocate compile context\n");
1323 #endif
1324 #ifdef SUPPORT_PCRE2_32
1325 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1326 			convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1327 		else
1328 			copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1329 
1330 		re32 = NULL;
1331 		ccontext32 = pcre2_compile_context_create_32(NULL);
1332 		if (ccontext32) {
1333 			if (GET_NEWLINE(current->newline))
1334 				pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1335 			if (GET_BSR(current->newline))
1336 				pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1337 
1338 			if (!(current->start_offset & F_NO32)) {
1339 				re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1340 					current->compile_options & ~disabled_options,
1341 					&error, &err_offs, ccontext32);
1342 
1343 				if (!re32 && (utf || is_ascii))
1344 					printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1345 			}
1346 			pcre2_compile_context_free_32(ccontext32);
1347 		}
1348 		else
1349 			printf("\n32 bit: Cannot allocate compile context\n");
1350 #endif
1351 
1352 		counter++;
1353 		if ((counter & 0x3) != 0) {
1354 #ifdef SUPPORT_PCRE2_8
1355 			setstack8(NULL);
1356 #endif
1357 #ifdef SUPPORT_PCRE2_16
1358 			setstack16(NULL);
1359 #endif
1360 #ifdef SUPPORT_PCRE2_32
1361 			setstack32(NULL);
1362 #endif
1363 		}
1364 
1365 #ifdef SUPPORT_PCRE2_8
1366 		return_value8[0] = -1000;
1367 		return_value8[1] = -1000;
1368 		mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1369 		mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1370 		mcontext8 = pcre2_match_context_create_8(NULL);
1371 		if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1372 			printf("\n8 bit: Cannot allocate match data\n");
1373 			pcre2_match_data_free_8(mdata8_1);
1374 			pcre2_match_data_free_8(mdata8_2);
1375 			pcre2_match_context_free_8(mcontext8);
1376 			pcre2_code_free_8(re8);
1377 			re8 = NULL;
1378 		} else {
1379 			ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1380 			ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1381 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1382 				ovector8_1[i] = (PCRE2_SIZE)(-2);
1383 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1384 				ovector8_2[i] = (PCRE2_SIZE)(-2);
1385 			pcre2_set_match_limit_8(mcontext8, 10000000);
1386 		}
1387 		if (re8) {
1388 			return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1389 				current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1390 
1391 			if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1392 				printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1393 			} else if ((counter & 0x1) != 0) {
1394 				setstack8(mcontext8);
1395 				return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1396 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1397 			} else {
1398 				pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1399 				return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1400 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1401 			}
1402 		}
1403 #endif
1404 
1405 #ifdef SUPPORT_PCRE2_16
1406 		return_value16[0] = -1000;
1407 		return_value16[1] = -1000;
1408 		mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1409 		mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1410 		mcontext16 = pcre2_match_context_create_16(NULL);
1411 		if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1412 			printf("\n16 bit: Cannot allocate match data\n");
1413 			pcre2_match_data_free_16(mdata16_1);
1414 			pcre2_match_data_free_16(mdata16_2);
1415 			pcre2_match_context_free_16(mcontext16);
1416 			pcre2_code_free_16(re16);
1417 			re16 = NULL;
1418 		} else {
1419 			ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1420 			ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1421 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1422 				ovector16_1[i] = (PCRE2_SIZE)(-2);
1423 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1424 				ovector16_2[i] = (PCRE2_SIZE)(-2);
1425 			pcre2_set_match_limit_16(mcontext16, 10000000);
1426 		}
1427 		if (re16) {
1428 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1429 				length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1430 			else
1431 				length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1432 
1433 			return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1434 				current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1435 
1436 			if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1437 				printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1438 			} else if ((counter & 0x1) != 0) {
1439 				setstack16(mcontext16);
1440 				return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1441 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1442 			} else {
1443 				pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1444 				return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1445 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1446 			}
1447 		}
1448 #endif
1449 
1450 #ifdef SUPPORT_PCRE2_32
1451 		return_value32[0] = -1000;
1452 		return_value32[1] = -1000;
1453 		mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1454 		mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1455 		mcontext32 = pcre2_match_context_create_32(NULL);
1456 		if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1457 			printf("\n32 bit: Cannot allocate match data\n");
1458 			pcre2_match_data_free_32(mdata32_1);
1459 			pcre2_match_data_free_32(mdata32_2);
1460 			pcre2_match_context_free_32(mcontext32);
1461 			pcre2_code_free_32(re32);
1462 			re32 = NULL;
1463 		} else {
1464 			ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1465 			ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1466 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1467 				ovector32_1[i] = (PCRE2_SIZE)(-2);
1468 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1469 				ovector32_2[i] = (PCRE2_SIZE)(-2);
1470 			pcre2_set_match_limit_32(mcontext32, 10000000);
1471 		}
1472 		if (re32) {
1473 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1474 				length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1475 			else
1476 				length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1477 
1478 			return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1479 				current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1480 
1481 			if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1482 				printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1483 			} else if ((counter & 0x1) != 0) {
1484 				setstack32(mcontext32);
1485 				return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1486 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1487 			} else {
1488 				pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1489 				return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1490 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1491 			}
1492 		}
1493 #endif
1494 
1495 		/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1496 			return_value8[0], return_value16[0], return_value32[0],
1497 			(int)ovector8_1[0], (int)ovector8_1[1],
1498 			(int)ovector16_1[0], (int)ovector16_1[1],
1499 			(int)ovector32_1[0], (int)ovector32_1[1],
1500 			(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1501 
1502 		/* If F_DIFF is set, just run the test, but do not compare the results.
1503 		Segfaults can still be captured. */
1504 
1505 		is_successful = 1;
1506 		if (!(current->start_offset & F_DIFF)) {
1507 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1508 			if (!(current->start_offset & F_FORCECONV)) {
1509 
1510 				/* All results must be the same. */
1511 #ifdef SUPPORT_PCRE2_8
1512 				if ((return_value = return_value8[0]) != return_value8[1]) {
1513 					printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1514 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1515 					is_successful = 0;
1516 				} else
1517 #endif
1518 #ifdef SUPPORT_PCRE2_16
1519 				if ((return_value = return_value16[0]) != return_value16[1]) {
1520 					printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1521 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1522 					is_successful = 0;
1523 				} else
1524 #endif
1525 #ifdef SUPPORT_PCRE2_32
1526 				if ((return_value = return_value32[0]) != return_value32[1]) {
1527 					printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1528 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1529 					is_successful = 0;
1530 				} else
1531 #endif
1532 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1533 				if (return_value8[0] != return_value16[0]) {
1534 					printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1535 						return_value8[0], return_value16[0],
1536 						total, current->pattern, current->input);
1537 					is_successful = 0;
1538 				} else
1539 #endif
1540 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1541 				if (return_value8[0] != return_value32[0]) {
1542 					printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1543 						return_value8[0], return_value32[0],
1544 						total, current->pattern, current->input);
1545 					is_successful = 0;
1546 				} else
1547 #endif
1548 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1549 				if (return_value16[0] != return_value32[0]) {
1550 					printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1551 						return_value16[0], return_value32[0],
1552 						total, current->pattern, current->input);
1553 					is_successful = 0;
1554 				} else
1555 #endif
1556 				if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1557 					if (return_value == PCRE2_ERROR_PARTIAL) {
1558 						return_value = 2;
1559 					} else {
1560 						return_value *= 2;
1561 					}
1562 #ifdef SUPPORT_PCRE2_8
1563 					return_value8[0] = return_value;
1564 #endif
1565 #ifdef SUPPORT_PCRE2_16
1566 					return_value16[0] = return_value;
1567 #endif
1568 #ifdef SUPPORT_PCRE2_32
1569 					return_value32[0] = return_value;
1570 #endif
1571 					/* Transform back the results. */
1572 					if (current->compile_options & PCRE2_UTF) {
1573 #ifdef SUPPORT_PCRE2_16
1574 						for (i = 0; i < return_value; ++i) {
1575 							if (ovector16_1[i] != PCRE2_UNSET)
1576 								ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1577 							if (ovector16_2[i] != PCRE2_UNSET)
1578 								ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1579 						}
1580 #endif
1581 #ifdef SUPPORT_PCRE2_32
1582 						for (i = 0; i < return_value; ++i) {
1583 							if (ovector32_1[i] != PCRE2_UNSET)
1584 								ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1585 							if (ovector32_2[i] != PCRE2_UNSET)
1586 								ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1587 						}
1588 #endif
1589 					}
1590 
1591 					for (i = 0; i < return_value; ++i) {
1592 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1593 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1594 							printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1595 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1596 								total, current->pattern, current->input);
1597 							is_successful = 0;
1598 						}
1599 #endif
1600 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1601 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1602 							printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1603 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1604 								total, current->pattern, current->input);
1605 							is_successful = 0;
1606 						}
1607 #endif
1608 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1609 						if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1610 							printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1611 								i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1612 								total, current->pattern, current->input);
1613 							is_successful = 0;
1614 						}
1615 #endif
1616 					}
1617 				}
1618 			} else
1619 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1620 			{
1621 #ifdef SUPPORT_PCRE2_8
1622 				if (return_value8[0] != return_value8[1]) {
1623 					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1624 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1625 					is_successful = 0;
1626 				} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1627 					if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1628 						return_value8[0] = 2;
1629 					else
1630 						return_value8[0] *= 2;
1631 
1632 					for (i = 0; i < return_value8[0]; ++i)
1633 						if (ovector8_1[i] != ovector8_2[i]) {
1634 							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1635 								i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1636 							is_successful = 0;
1637 						}
1638 				}
1639 #endif
1640 
1641 #ifdef SUPPORT_PCRE2_16
1642 				if (return_value16[0] != return_value16[1]) {
1643 					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1644 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1645 					is_successful = 0;
1646 				} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1647 					if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1648 						return_value16[0] = 2;
1649 					else
1650 						return_value16[0] *= 2;
1651 
1652 					for (i = 0; i < return_value16[0]; ++i)
1653 						if (ovector16_1[i] != ovector16_2[i]) {
1654 							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1655 								i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1656 							is_successful = 0;
1657 						}
1658 				}
1659 #endif
1660 
1661 #ifdef SUPPORT_PCRE2_32
1662 				if (return_value32[0] != return_value32[1]) {
1663 					printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1664 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1665 					is_successful = 0;
1666 				} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1667 					if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1668 						return_value32[0] = 2;
1669 					else
1670 						return_value32[0] *= 2;
1671 
1672 					for (i = 0; i < return_value32[0]; ++i)
1673 						if (ovector32_1[i] != ovector32_2[i]) {
1674 							printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1675 								i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1676 							is_successful = 0;
1677 						}
1678 				}
1679 #endif
1680 			}
1681 		}
1682 
1683 		if (is_successful) {
1684 #ifdef SUPPORT_PCRE2_8
1685 			if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1686 				if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1687 					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1688 						total, current->pattern, current->input);
1689 					is_successful = 0;
1690 				}
1691 
1692 				if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1693 					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1694 						total, current->pattern, current->input);
1695 					is_successful = 0;
1696 				}
1697 			}
1698 #endif
1699 #ifdef SUPPORT_PCRE2_16
1700 			if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1701 				if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1702 					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1703 						total, current->pattern, current->input);
1704 					is_successful = 0;
1705 				}
1706 
1707 				if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1708 					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1709 						total, current->pattern, current->input);
1710 					is_successful = 0;
1711 				}
1712 			}
1713 #endif
1714 #ifdef SUPPORT_PCRE2_32
1715 			if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1716 				if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1717 					printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1718 						total, current->pattern, current->input);
1719 					is_successful = 0;
1720 				}
1721 
1722 				if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1723 					printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1724 						total, current->pattern, current->input);
1725 					is_successful = 0;
1726 				}
1727 			}
1728 #endif
1729 		}
1730 
1731 		if (is_successful) {
1732 #ifdef SUPPORT_PCRE2_8
1733 			if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1734 				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1735 					total, current->pattern, current->input);
1736 				is_successful = 0;
1737 			}
1738 #endif
1739 #ifdef SUPPORT_PCRE2_16
1740 			if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1741 				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1742 					total, current->pattern, current->input);
1743 				is_successful = 0;
1744 			}
1745 #endif
1746 #ifdef SUPPORT_PCRE2_32
1747 			if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1748 				printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1749 					total, current->pattern, current->input);
1750 				is_successful = 0;
1751 			}
1752 #endif
1753 		}
1754 
1755 #ifdef SUPPORT_PCRE2_8
1756 		pcre2_code_free_8(re8);
1757 		pcre2_match_data_free_8(mdata8_1);
1758 		pcre2_match_data_free_8(mdata8_2);
1759 		pcre2_match_context_free_8(mcontext8);
1760 #endif
1761 #ifdef SUPPORT_PCRE2_16
1762 		pcre2_code_free_16(re16);
1763 		pcre2_match_data_free_16(mdata16_1);
1764 		pcre2_match_data_free_16(mdata16_2);
1765 		pcre2_match_context_free_16(mcontext16);
1766 #endif
1767 #ifdef SUPPORT_PCRE2_32
1768 		pcre2_code_free_32(re32);
1769 		pcre2_match_data_free_32(mdata32_1);
1770 		pcre2_match_data_free_32(mdata32_2);
1771 		pcre2_match_context_free_32(mcontext32);
1772 #endif
1773 
1774 		if (is_successful) {
1775 			successful++;
1776 			successful_row++;
1777 			printf(".");
1778 			if (successful_row >= 60) {
1779 				successful_row = 0;
1780 				printf("\n");
1781 			}
1782 		} else
1783 			successful_row = 0;
1784 
1785 		fflush(stdout);
1786 		current++;
1787 	}
1788 #ifdef SUPPORT_PCRE2_8
1789 	setstack8(NULL);
1790 #endif
1791 #ifdef SUPPORT_PCRE2_16
1792 	setstack16(NULL);
1793 #endif
1794 #ifdef SUPPORT_PCRE2_32
1795 	setstack32(NULL);
1796 #endif
1797 
1798 	if (total == successful) {
1799 		printf("\nAll JIT regression tests are successfully passed.\n");
1800 		return 0;
1801 	} else {
1802 		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1803 		return 1;
1804 	}
1805 }
1806 
1807 #if defined SUPPORT_UNICODE
1808 
check_invalid_utf_result(int pattern_index,const char * type,int result,int match_start,int match_end,PCRE2_SIZE * ovector)1809 static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1810 	int match_start, int match_end, PCRE2_SIZE *ovector)
1811 {
1812 	if (match_start < 0) {
1813 		if (result != -1) {
1814 			printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1815 			return 1;
1816 		}
1817 		return 0;
1818 	}
1819 
1820 	if (result <= 0) {
1821 		printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1822 		return 1;
1823 	}
1824 
1825 	if (ovector[0] != (PCRE2_SIZE)match_start) {
1826 		printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1827 			pattern_index, type, (int)ovector[0], match_start);
1828 		return 1;
1829 	}
1830 
1831 	if (ovector[1] != (PCRE2_SIZE)match_end) {
1832 		printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1833 			pattern_index, type, (int)ovector[1], match_end);
1834 		return 1;
1835 	}
1836 
1837 	return 0;
1838 }
1839 
1840 #endif /* SUPPORT_UNICODE */
1841 
1842 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1843 
1844 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1845 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1846 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1847 
1848 struct invalid_utf8_regression_test_case {
1849 	uint32_t compile_options;
1850 	int jit_compile_options;
1851 	int start_offset;
1852 	int skip_left;
1853 	int skip_right;
1854 	int match_start;
1855 	int match_end;
1856 	const char *pattern[2];
1857 	const char *input;
1858 };
1859 
1860 static const char invalid_utf8_newline_cr;
1861 
1862 static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1863 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1864 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1865 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1866 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1867 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1868 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1869 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1870 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1871 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1872 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1873 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1874 	{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1875 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1876 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1877 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1878 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1879 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1880 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1881 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1882 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1883 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1884 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1885 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1886 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1887 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1888 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1889 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1890 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1891 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1892 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1893 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1894 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1895 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1896 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1897 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1898 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1899 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1900 	{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1901 
1902 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1903 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1904 	{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1905 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1906 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1907 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1908 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1909 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1910 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1911 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1912 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1913 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1914 	{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1915 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1916 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1917 	{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1918 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1919 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1920 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1921 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1922 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1923 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1924 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1925 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1926 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1927 
1928 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1929 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1930 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1931 	{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1932 	{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1933 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1934 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1935 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1936 
1937 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1938 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1939 	{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1940 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1941 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1942 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1943 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1944 
1945 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1946 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1947 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1948 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1949 
1950 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1951 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1952 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1953 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1954 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1955 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1956 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1957 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1958 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1959 
1960 	{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1961 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1962 	{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1963 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1964 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1965 	{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1966 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1967 	{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1968 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1969 
1970 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1971 	{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1972 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1973 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1974 
1975 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1976 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1977 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1978 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1979 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1980 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1981 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1982 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1983 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1984 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1985 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1986 
1987 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1988 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1989 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1990 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1991 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1992 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1993 
1994 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1995 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1996 	{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1997 	{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1998 
1999 	{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
2000 	{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
2001 	{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" },
2002 	{ PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" },
2003 
2004 	/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
2005 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
2006 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
2007 
2008 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
2009 
2010 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2011 };
2012 
2013 #undef UDA
2014 #undef CI
2015 #undef CPI
2016 
run_invalid_utf8_test(const struct invalid_utf8_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_8 * ccontext,pcre2_match_data_8 * mdata)2017 static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
2018 	int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
2019 {
2020 	pcre2_code_8 *code;
2021 	int result, errorcode;
2022 	PCRE2_SIZE length, erroroffset;
2023 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
2024 
2025 	if (current->pattern[i] == NULL)
2026 		return 1;
2027 
2028 	code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2029 		current->compile_options, &errorcode, &erroroffset, ccontext);
2030 
2031 	if (!code) {
2032 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2033 		return 0;
2034 	}
2035 
2036 	if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2037 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2038 		pcre2_code_free_8(code);
2039 		return 0;
2040 	}
2041 
2042 	length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2043 
2044 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2045 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2046 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2047 
2048 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2049 			pcre2_code_free_8(code);
2050 			return 0;
2051 		}
2052 	}
2053 
2054 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2055 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2056 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2057 
2058 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2059 			pcre2_code_free_8(code);
2060 			return 0;
2061 		}
2062 	}
2063 
2064 	pcre2_code_free_8(code);
2065 	return 1;
2066 }
2067 
invalid_utf8_regression_tests(void)2068 static int invalid_utf8_regression_tests(void)
2069 {
2070 	const struct invalid_utf8_regression_test_case *current;
2071 	pcre2_compile_context_8 *ccontext;
2072 	pcre2_match_data_8 *mdata;
2073 	int total = 0, successful = 0;
2074 	int result;
2075 
2076 	printf("\nRunning invalid-utf8 JIT regression tests\n");
2077 
2078 	ccontext = pcre2_compile_context_create_8(NULL);
2079 	pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2080 	mdata = pcre2_match_data_create_8(4, NULL);
2081 
2082 	for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2083 		/* printf("\nPattern: %s :\n", current->pattern); */
2084 		total++;
2085 
2086 		result = 1;
2087 		if (current->pattern[1] != &invalid_utf8_newline_cr)
2088 		{
2089 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2090 				result = 0;
2091 			if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2092 				result = 0;
2093 		} else {
2094 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
2095 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2096 				result = 0;
2097 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2098 		}
2099 
2100 		if (result) {
2101 			successful++;
2102 		}
2103 
2104 		printf(".");
2105 		if ((total % 60) == 0)
2106 			printf("\n");
2107 	}
2108 
2109 	if ((total % 60) != 0)
2110 		printf("\n");
2111 
2112 	pcre2_match_data_free_8(mdata);
2113 	pcre2_compile_context_free_8(ccontext);
2114 
2115 	if (total == successful) {
2116 		printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2117 		return 0;
2118 	} else {
2119 		printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2120 		return 1;
2121 	}
2122 }
2123 
2124 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2125 
invalid_utf8_regression_tests(void)2126 static int invalid_utf8_regression_tests(void)
2127 {
2128 	return 0;
2129 }
2130 
2131 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2132 
2133 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2134 
2135 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2136 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2137 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2138 
2139 struct invalid_utf16_regression_test_case {
2140 	uint32_t compile_options;
2141 	int jit_compile_options;
2142 	int start_offset;
2143 	int skip_left;
2144 	int skip_right;
2145 	int match_start;
2146 	int match_end;
2147 	const PCRE2_UCHAR16 *pattern[2];
2148 	const PCRE2_UCHAR16 *input;
2149 };
2150 
2151 static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2152 static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2153 static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2154 static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2155 static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2156 static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2157 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2158 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2159 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2160 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2161 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2162 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2163 static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2164 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2165 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2166 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2167 static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2168 static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2169 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2170 static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2171 
2172 static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2173 	{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2174 	{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2175 	{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2176 	{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2177 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2178 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2179 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2180 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2181 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2182 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2183 
2184 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2185 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2186 	{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2187 	{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2188 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2189 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2190 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2191 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2192 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2193 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2194 
2195 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2196 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2197 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2198 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2199 
2200 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2201 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2202 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2203 	{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2204 	{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2205 	{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2206 
2207 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2208 	{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2209 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2210 
2211 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2212 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2213 
2214 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2215 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2216 	{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2217 	{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2218 
2219 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2220 };
2221 
2222 #undef UDA
2223 #undef CI
2224 #undef CPI
2225 
run_invalid_utf16_test(const struct invalid_utf16_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_16 * ccontext,pcre2_match_data_16 * mdata)2226 static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2227 	int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2228 {
2229 	pcre2_code_16 *code;
2230 	int result, errorcode;
2231 	PCRE2_SIZE length, erroroffset;
2232 	const PCRE2_UCHAR16 *input;
2233 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2234 
2235 	if (current->pattern[i] == NULL)
2236 		return 1;
2237 
2238 	code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2239 		current->compile_options, &errorcode, &erroroffset, ccontext);
2240 
2241 	if (!code) {
2242 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2243 		return 0;
2244 	}
2245 
2246 	if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2247 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2248 		pcre2_code_free_16(code);
2249 		return 0;
2250 	}
2251 
2252 	input = current->input;
2253 	length = 0;
2254 
2255 	while (*input++ != 0)
2256 		length++;
2257 
2258 	length -= current->skip_left + current->skip_right;
2259 
2260 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2261 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2262 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2263 
2264 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2265 			pcre2_code_free_16(code);
2266 			return 0;
2267 		}
2268 	}
2269 
2270 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2271 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2272 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2273 
2274 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2275 			pcre2_code_free_16(code);
2276 			return 0;
2277 		}
2278 	}
2279 
2280 	pcre2_code_free_16(code);
2281 	return 1;
2282 }
2283 
invalid_utf16_regression_tests(void)2284 static int invalid_utf16_regression_tests(void)
2285 {
2286 	const struct invalid_utf16_regression_test_case *current;
2287 	pcre2_compile_context_16 *ccontext;
2288 	pcre2_match_data_16 *mdata;
2289 	int total = 0, successful = 0;
2290 	int result;
2291 
2292 	printf("\nRunning invalid-utf16 JIT regression tests\n");
2293 
2294 	ccontext = pcre2_compile_context_create_16(NULL);
2295 	pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2296 	mdata = pcre2_match_data_create_16(4, NULL);
2297 
2298 	for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2299 		/* printf("\nPattern: %s :\n", current->pattern); */
2300 		total++;
2301 
2302 		result = 1;
2303 		if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2304 			result = 0;
2305 		if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2306 			result = 0;
2307 
2308 		if (result) {
2309 			successful++;
2310 		}
2311 
2312 		printf(".");
2313 		if ((total % 60) == 0)
2314 			printf("\n");
2315 	}
2316 
2317 	if ((total % 60) != 0)
2318 		printf("\n");
2319 
2320 	pcre2_match_data_free_16(mdata);
2321 	pcre2_compile_context_free_16(ccontext);
2322 
2323 	if (total == successful) {
2324 		printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2325 		return 0;
2326 	} else {
2327 		printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2328 		return 1;
2329 	}
2330 }
2331 
2332 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2333 
invalid_utf16_regression_tests(void)2334 static int invalid_utf16_regression_tests(void)
2335 {
2336 	return 0;
2337 }
2338 
2339 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2340 
2341 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2342 
2343 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2344 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2345 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2346 
2347 struct invalid_utf32_regression_test_case {
2348 	uint32_t compile_options;
2349 	int jit_compile_options;
2350 	int start_offset;
2351 	int skip_left;
2352 	int skip_right;
2353 	int match_start;
2354 	int match_end;
2355 	const PCRE2_UCHAR32 *pattern[2];
2356 	const PCRE2_UCHAR32 *input;
2357 };
2358 
2359 static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2360 static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2361 static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2362 static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2363 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2364 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2365 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2366 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2367 static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2368 static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2369 static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2370 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2371 static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2372 
2373 static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2374 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2375 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2376 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2377 	{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2378 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2379 	{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2380 
2381 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2382 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2383 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2384 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2385 	{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2386 
2387 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2388 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2389 
2390 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2391 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2392 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2393 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2394 	{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2395 	{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2396 
2397 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2398 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2399 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2400 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2401 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2402 
2403 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2404 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2405 
2406 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2407 };
2408 
2409 #undef UDA
2410 #undef CI
2411 #undef CPI
2412 
run_invalid_utf32_test(const struct invalid_utf32_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_32 * ccontext,pcre2_match_data_32 * mdata)2413 static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2414 	int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2415 {
2416 	pcre2_code_32 *code;
2417 	int result, errorcode;
2418 	PCRE2_SIZE length, erroroffset;
2419 	const PCRE2_UCHAR32 *input;
2420 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2421 
2422 	if (current->pattern[i] == NULL)
2423 		return 1;
2424 
2425 	code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2426 		current->compile_options, &errorcode, &erroroffset, ccontext);
2427 
2428 	if (!code) {
2429 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2430 		return 0;
2431 	}
2432 
2433 	if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2434 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2435 		pcre2_code_free_32(code);
2436 		return 0;
2437 	}
2438 
2439 	input = current->input;
2440 	length = 0;
2441 
2442 	while (*input++ != 0)
2443 		length++;
2444 
2445 	length -= current->skip_left + current->skip_right;
2446 
2447 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2448 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2449 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2450 
2451 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2452 			pcre2_code_free_32(code);
2453 			return 0;
2454 		}
2455 	}
2456 
2457 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2458 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2459 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2460 
2461 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2462 			pcre2_code_free_32(code);
2463 			return 0;
2464 		}
2465 	}
2466 
2467 	pcre2_code_free_32(code);
2468 	return 1;
2469 }
2470 
invalid_utf32_regression_tests(void)2471 static int invalid_utf32_regression_tests(void)
2472 {
2473 	const struct invalid_utf32_regression_test_case *current;
2474 	pcre2_compile_context_32 *ccontext;
2475 	pcre2_match_data_32 *mdata;
2476 	int total = 0, successful = 0;
2477 	int result;
2478 
2479 	printf("\nRunning invalid-utf32 JIT regression tests\n");
2480 
2481 	ccontext = pcre2_compile_context_create_32(NULL);
2482 	pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2483 	mdata = pcre2_match_data_create_32(4, NULL);
2484 
2485 	for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2486 		/* printf("\nPattern: %s :\n", current->pattern); */
2487 		total++;
2488 
2489 		result = 1;
2490 		if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2491 			result = 0;
2492 		if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2493 			result = 0;
2494 
2495 		if (result) {
2496 			successful++;
2497 		}
2498 
2499 		printf(".");
2500 		if ((total % 60) == 0)
2501 			printf("\n");
2502 	}
2503 
2504 	if ((total % 60) != 0)
2505 		printf("\n");
2506 
2507 	pcre2_match_data_free_32(mdata);
2508 	pcre2_compile_context_free_32(ccontext);
2509 
2510 	if (total == successful) {
2511 		printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2512 		return 0;
2513 	} else {
2514 		printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2515 		return 1;
2516 	}
2517 }
2518 
2519 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2520 
invalid_utf32_regression_tests(void)2521 static int invalid_utf32_regression_tests(void)
2522 {
2523 	return 0;
2524 }
2525 
2526 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2527 
2528 /* End of pcre2_jit_test.c */
2529