xref: /aosp_15_r20/external/pcre/testdata/testinput12 (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi# This set of tests is for UTF-16 and UTF-32 support, including Unicode
2*22dc650dSSadaf Ebrahimi# properties. It is relevant only to the 16-bit and 32-bit libraries. The
3*22dc650dSSadaf Ebrahimi# output is different for each library, so there are separate output files.
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi/���xxx/IB,utf,no_utf_check
6*22dc650dSSadaf Ebrahimi
7*22dc650dSSadaf Ebrahimi/abc/utf
8*22dc650dSSadaf Ebrahimi    �]
9*22dc650dSSadaf Ebrahimi
10*22dc650dSSadaf Ebrahimi# Check maximum character size
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi/\x{ffff}/IB,utf
13*22dc650dSSadaf Ebrahimi
14*22dc650dSSadaf Ebrahimi/\x{10000}/IB,utf
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi/\x{100}/IB,utf
17*22dc650dSSadaf Ebrahimi
18*22dc650dSSadaf Ebrahimi/\x{1000}/IB,utf
19*22dc650dSSadaf Ebrahimi
20*22dc650dSSadaf Ebrahimi/\x{10000}/IB,utf
21*22dc650dSSadaf Ebrahimi
22*22dc650dSSadaf Ebrahimi/\x{100000}/IB,utf
23*22dc650dSSadaf Ebrahimi
24*22dc650dSSadaf Ebrahimi/\x{10ffff}/IB,utf
25*22dc650dSSadaf Ebrahimi
26*22dc650dSSadaf Ebrahimi/[\x{ff}]/IB,utf
27*22dc650dSSadaf Ebrahimi
28*22dc650dSSadaf Ebrahimi/[\x{100}]/IB,utf
29*22dc650dSSadaf Ebrahimi
30*22dc650dSSadaf Ebrahimi/\x80/IB,utf
31*22dc650dSSadaf Ebrahimi
32*22dc650dSSadaf Ebrahimi/\xff/IB,utf
33*22dc650dSSadaf Ebrahimi
34*22dc650dSSadaf Ebrahimi/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
35*22dc650dSSadaf Ebrahimi    \x{D55c}\x{ad6d}\x{C5B4}
36*22dc650dSSadaf Ebrahimi
37*22dc650dSSadaf Ebrahimi/\x{65e5}\x{672c}\x{8a9e}/IB,utf
38*22dc650dSSadaf Ebrahimi    \x{65e5}\x{672c}\x{8a9e}
39*22dc650dSSadaf Ebrahimi
40*22dc650dSSadaf Ebrahimi/\x{80}/IB,utf
41*22dc650dSSadaf Ebrahimi
42*22dc650dSSadaf Ebrahimi/\x{084}/IB,utf
43*22dc650dSSadaf Ebrahimi
44*22dc650dSSadaf Ebrahimi/\x{104}/IB,utf
45*22dc650dSSadaf Ebrahimi
46*22dc650dSSadaf Ebrahimi/\x{861}/IB,utf
47*22dc650dSSadaf Ebrahimi
48*22dc650dSSadaf Ebrahimi/\x{212ab}/IB,utf
49*22dc650dSSadaf Ebrahimi
50*22dc650dSSadaf Ebrahimi/[^ab\xC0-\xF0]/IB,utf
51*22dc650dSSadaf Ebrahimi    \x{f1}
52*22dc650dSSadaf Ebrahimi    \x{bf}
53*22dc650dSSadaf Ebrahimi    \x{100}
54*22dc650dSSadaf Ebrahimi    \x{1000}
55*22dc650dSSadaf Ebrahimi\= Expect no match
56*22dc650dSSadaf Ebrahimi    \x{c0}
57*22dc650dSSadaf Ebrahimi    \x{f0}
58*22dc650dSSadaf Ebrahimi
59*22dc650dSSadaf Ebrahimi/Ā{3,4}/IB,utf
60*22dc650dSSadaf Ebrahimi  \x{100}\x{100}\x{100}\x{100\x{100}
61*22dc650dSSadaf Ebrahimi
62*22dc650dSSadaf Ebrahimi/(\x{100}+|x)/IB,utf
63*22dc650dSSadaf Ebrahimi
64*22dc650dSSadaf Ebrahimi/(\x{100}*a|x)/IB,utf
65*22dc650dSSadaf Ebrahimi
66*22dc650dSSadaf Ebrahimi/(\x{100}{0,2}a|x)/IB,utf
67*22dc650dSSadaf Ebrahimi
68*22dc650dSSadaf Ebrahimi/(\x{100}{1,2}a|x)/IB,utf
69*22dc650dSSadaf Ebrahimi
70*22dc650dSSadaf Ebrahimi/\x{100}/IB,utf
71*22dc650dSSadaf Ebrahimi
72*22dc650dSSadaf Ebrahimi/a\x{100}\x{101}*/IB,utf
73*22dc650dSSadaf Ebrahimi
74*22dc650dSSadaf Ebrahimi/a\x{100}\x{101}+/IB,utf
75*22dc650dSSadaf Ebrahimi
76*22dc650dSSadaf Ebrahimi/[^\x{c4}]/IB
77*22dc650dSSadaf Ebrahimi
78*22dc650dSSadaf Ebrahimi/[\x{100}]/IB,utf
79*22dc650dSSadaf Ebrahimi    \x{100}
80*22dc650dSSadaf Ebrahimi    Z\x{100}
81*22dc650dSSadaf Ebrahimi    \x{100}Z
82*22dc650dSSadaf Ebrahimi
83*22dc650dSSadaf Ebrahimi/[\xff]/IB,utf
84*22dc650dSSadaf Ebrahimi    >\x{ff}<
85*22dc650dSSadaf Ebrahimi
86*22dc650dSSadaf Ebrahimi/[^\xff]/IB,utf
87*22dc650dSSadaf Ebrahimi
88*22dc650dSSadaf Ebrahimi/\x{100}abc(xyz(?1))/IB,utf
89*22dc650dSSadaf Ebrahimi
90*22dc650dSSadaf Ebrahimi/\777/I,utf
91*22dc650dSSadaf Ebrahimi  \x{1ff}
92*22dc650dSSadaf Ebrahimi  \777
93*22dc650dSSadaf Ebrahimi
94*22dc650dSSadaf Ebrahimi/\x{100}+\x{200}/IB,utf
95*22dc650dSSadaf Ebrahimi
96*22dc650dSSadaf Ebrahimi/\x{100}+X/IB,utf
97*22dc650dSSadaf Ebrahimi
98*22dc650dSSadaf Ebrahimi/^[\QĀ\E-\QŐ\E/B,utf
99*22dc650dSSadaf Ebrahimi
100*22dc650dSSadaf Ebrahimi/X/utf
101*22dc650dSSadaf Ebrahimi    XX\x{d800}\=no_utf_check
102*22dc650dSSadaf Ebrahimi    XX\x{da00}\=no_utf_check
103*22dc650dSSadaf Ebrahimi    XX\x{dc00}\=no_utf_check
104*22dc650dSSadaf Ebrahimi    XX\x{de00}\=no_utf_check
105*22dc650dSSadaf Ebrahimi    XX\x{dfff}\=no_utf_check
106*22dc650dSSadaf Ebrahimi\= Expect UTF error
107*22dc650dSSadaf Ebrahimi    XX\x{d800}
108*22dc650dSSadaf Ebrahimi    XX\x{da00}
109*22dc650dSSadaf Ebrahimi    XX\x{dc00}
110*22dc650dSSadaf Ebrahimi    XX\x{de00}
111*22dc650dSSadaf Ebrahimi    XX\x{dfff}
112*22dc650dSSadaf Ebrahimi    XX\x{110000}
113*22dc650dSSadaf Ebrahimi    XX\x{d800}\x{1234}
114*22dc650dSSadaf Ebrahimi\= Expect no match
115*22dc650dSSadaf Ebrahimi    XX\x{d800}\=offset=3
116*22dc650dSSadaf Ebrahimi
117*22dc650dSSadaf Ebrahimi/(?<=.)X/utf
118*22dc650dSSadaf Ebrahimi    XX\x{d800}\=offset=3
119*22dc650dSSadaf Ebrahimi
120*22dc650dSSadaf Ebrahimi/(*UTF16)\x{11234}/
121*22dc650dSSadaf Ebrahimi  abcd\x{11234}pqr
122*22dc650dSSadaf Ebrahimi
123*22dc650dSSadaf Ebrahimi/(*UTF)\x{11234}/I
124*22dc650dSSadaf Ebrahimi  abcd\x{11234}pqr
125*22dc650dSSadaf Ebrahimi
126*22dc650dSSadaf Ebrahimi/(*UTF-32)\x{11234}/
127*22dc650dSSadaf Ebrahimi  abcd\x{11234}pqr
128*22dc650dSSadaf Ebrahimi
129*22dc650dSSadaf Ebrahimi/(*UTF-32)\x{112}/
130*22dc650dSSadaf Ebrahimi  abcd\x{11234}pqr
131*22dc650dSSadaf Ebrahimi
132*22dc650dSSadaf Ebrahimi/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
133*22dc650dSSadaf Ebrahimi
134*22dc650dSSadaf Ebrahimi/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
135*22dc650dSSadaf Ebrahimi
136*22dc650dSSadaf Ebrahimi/\h/I,utf
137*22dc650dSSadaf Ebrahimi    ABC\x{09}
138*22dc650dSSadaf Ebrahimi    ABC\x{20}
139*22dc650dSSadaf Ebrahimi    ABC\x{a0}
140*22dc650dSSadaf Ebrahimi    ABC\x{1680}
141*22dc650dSSadaf Ebrahimi    ABC\x{180e}
142*22dc650dSSadaf Ebrahimi    ABC\x{2000}
143*22dc650dSSadaf Ebrahimi    ABC\x{202f}
144*22dc650dSSadaf Ebrahimi    ABC\x{205f}
145*22dc650dSSadaf Ebrahimi    ABC\x{3000}
146*22dc650dSSadaf Ebrahimi
147*22dc650dSSadaf Ebrahimi/\v/I,utf
148*22dc650dSSadaf Ebrahimi    ABC\x{0a}
149*22dc650dSSadaf Ebrahimi    ABC\x{0b}
150*22dc650dSSadaf Ebrahimi    ABC\x{0c}
151*22dc650dSSadaf Ebrahimi    ABC\x{0d}
152*22dc650dSSadaf Ebrahimi    ABC\x{85}
153*22dc650dSSadaf Ebrahimi    ABC\x{2028}
154*22dc650dSSadaf Ebrahimi
155*22dc650dSSadaf Ebrahimi/\h*A/I,utf
156*22dc650dSSadaf Ebrahimi    CDBABC
157*22dc650dSSadaf Ebrahimi    \x{2000}ABC
158*22dc650dSSadaf Ebrahimi
159*22dc650dSSadaf Ebrahimi/\R*A/I,bsr=unicode,utf
160*22dc650dSSadaf Ebrahimi    CDBABC
161*22dc650dSSadaf Ebrahimi    \x{2028}A
162*22dc650dSSadaf Ebrahimi
163*22dc650dSSadaf Ebrahimi/\v+A/I,utf
164*22dc650dSSadaf Ebrahimi
165*22dc650dSSadaf Ebrahimi/\s?xxx\s/I,utf
166*22dc650dSSadaf Ebrahimi
167*22dc650dSSadaf Ebrahimi/\sxxx\s/I,utf,tables=2
168*22dc650dSSadaf Ebrahimi    AB\x{85}xxx\x{a0}XYZ
169*22dc650dSSadaf Ebrahimi    AB\x{a0}xxx\x{85}XYZ
170*22dc650dSSadaf Ebrahimi
171*22dc650dSSadaf Ebrahimi/\S \S/I,utf,tables=2
172*22dc650dSSadaf Ebrahimi    \x{a2} \x{84}
173*22dc650dSSadaf Ebrahimi    A Z
174*22dc650dSSadaf Ebrahimi
175*22dc650dSSadaf Ebrahimi/a+/utf
176*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=1
177*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=2
178*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=3
179*22dc650dSSadaf Ebrahimi\= Expect no match
180*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=4
181*22dc650dSSadaf Ebrahimi\= Expect bad offset error
182*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=5
183*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=6
184*22dc650dSSadaf Ebrahimi
185*22dc650dSSadaf Ebrahimi/\x{1234}+/Ii,utf
186*22dc650dSSadaf Ebrahimi
187*22dc650dSSadaf Ebrahimi/\x{1234}+?/Ii,utf
188*22dc650dSSadaf Ebrahimi
189*22dc650dSSadaf Ebrahimi/\x{1234}++/Ii,utf
190*22dc650dSSadaf Ebrahimi
191*22dc650dSSadaf Ebrahimi/\x{1234}{2}/Ii,utf
192*22dc650dSSadaf Ebrahimi
193*22dc650dSSadaf Ebrahimi/[^\x{c4}]/IB,utf
194*22dc650dSSadaf Ebrahimi
195*22dc650dSSadaf Ebrahimi/X+\x{200}/IB,utf
196*22dc650dSSadaf Ebrahimi
197*22dc650dSSadaf Ebrahimi/\R/I,utf
198*22dc650dSSadaf Ebrahimi
199*22dc650dSSadaf Ebrahimi# Check bad offset
200*22dc650dSSadaf Ebrahimi
201*22dc650dSSadaf Ebrahimi/a/utf
202*22dc650dSSadaf Ebrahimi\= Expect bad UTF-16 offset, or no match in 32-bit
203*22dc650dSSadaf Ebrahimi    \x{10000}\=offset=1
204*22dc650dSSadaf Ebrahimi    \x{10000}ab\=offset=1
205*22dc650dSSadaf Ebrahimi\= Expect 16-bit match, 32-bit no match
206*22dc650dSSadaf Ebrahimi    \x{10000}ab\=offset=2
207*22dc650dSSadaf Ebrahimi\= Expect no match
208*22dc650dSSadaf Ebrahimi    \x{10000}ab\=offset=3
209*22dc650dSSadaf Ebrahimi\= Expect no match in 16-bit, bad offset in 32-bit
210*22dc650dSSadaf Ebrahimi    \x{10000}ab\=offset=4
211*22dc650dSSadaf Ebrahimi\= Expect bad offset
212*22dc650dSSadaf Ebrahimi    \x{10000}ab\=offset=5
213*22dc650dSSadaf Ebrahimi
214*22dc650dSSadaf Ebrahimi/�/utf
215*22dc650dSSadaf Ebrahimi
216*22dc650dSSadaf Ebrahimi/\w+\x{C4}/B,utf
217*22dc650dSSadaf Ebrahimi    a\x{C4}\x{C4}
218*22dc650dSSadaf Ebrahimi
219*22dc650dSSadaf Ebrahimi/\w+\x{C4}/B,utf,tables=2
220*22dc650dSSadaf Ebrahimi    a\x{C4}\x{C4}
221*22dc650dSSadaf Ebrahimi
222*22dc650dSSadaf Ebrahimi/\W+\x{C4}/B,utf
223*22dc650dSSadaf Ebrahimi    !\x{C4}
224*22dc650dSSadaf Ebrahimi
225*22dc650dSSadaf Ebrahimi/\W+\x{C4}/B,utf,tables=2
226*22dc650dSSadaf Ebrahimi    !\x{C4}
227*22dc650dSSadaf Ebrahimi
228*22dc650dSSadaf Ebrahimi/\W+\x{A1}/B,utf
229*22dc650dSSadaf Ebrahimi    !\x{A1}
230*22dc650dSSadaf Ebrahimi
231*22dc650dSSadaf Ebrahimi/\W+\x{A1}/B,utf,tables=2
232*22dc650dSSadaf Ebrahimi    !\x{A1}
233*22dc650dSSadaf Ebrahimi
234*22dc650dSSadaf Ebrahimi/X\s+\x{A0}/B,utf
235*22dc650dSSadaf Ebrahimi    X\x20\x{A0}\x{A0}
236*22dc650dSSadaf Ebrahimi
237*22dc650dSSadaf Ebrahimi/X\s+\x{A0}/B,utf,tables=2
238*22dc650dSSadaf Ebrahimi    X\x20\x{A0}\x{A0}
239*22dc650dSSadaf Ebrahimi
240*22dc650dSSadaf Ebrahimi/\S+\x{A0}/B,utf
241*22dc650dSSadaf Ebrahimi    X\x{A0}\x{A0}
242*22dc650dSSadaf Ebrahimi
243*22dc650dSSadaf Ebrahimi/\S+\x{A0}/B,utf,tables=2
244*22dc650dSSadaf Ebrahimi    X\x{A0}\x{A0}
245*22dc650dSSadaf Ebrahimi
246*22dc650dSSadaf Ebrahimi/\x{a0}+\s!/B,utf
247*22dc650dSSadaf Ebrahimi    \x{a0}\x20!
248*22dc650dSSadaf Ebrahimi
249*22dc650dSSadaf Ebrahimi/\x{a0}+\s!/B,utf,tables=2
250*22dc650dSSadaf Ebrahimi    \x{a0}\x20!
251*22dc650dSSadaf Ebrahimi
252*22dc650dSSadaf Ebrahimi/(*UTF)abc/never_utf
253*22dc650dSSadaf Ebrahimi
254*22dc650dSSadaf Ebrahimi/abc/utf,never_utf
255*22dc650dSSadaf Ebrahimi
256*22dc650dSSadaf Ebrahimi/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
257*22dc650dSSadaf Ebrahimi
258*22dc650dSSadaf Ebrahimi/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
259*22dc650dSSadaf Ebrahimi
260*22dc650dSSadaf Ebrahimi/AB\x{1fb0}/IB,utf
261*22dc650dSSadaf Ebrahimi
262*22dc650dSSadaf Ebrahimi/AB\x{1fb0}/IBi,utf
263*22dc650dSSadaf Ebrahimi
264*22dc650dSSadaf Ebrahimi/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
265*22dc650dSSadaf Ebrahimi    \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
266*22dc650dSSadaf Ebrahimi    \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
267*22dc650dSSadaf Ebrahimi
268*22dc650dSSadaf Ebrahimi/[ⱥ]/Bi,utf
269*22dc650dSSadaf Ebrahimi
270*22dc650dSSadaf Ebrahimi/[^ⱥ]/Bi,utf
271*22dc650dSSadaf Ebrahimi
272*22dc650dSSadaf Ebrahimi/[[:blank:]]/B,ucp
273*22dc650dSSadaf Ebrahimi
274*22dc650dSSadaf Ebrahimi/\x{212a}+/Ii,utf
275*22dc650dSSadaf Ebrahimi    KKkk\x{212a}
276*22dc650dSSadaf Ebrahimi
277*22dc650dSSadaf Ebrahimi/s+/Ii,utf
278*22dc650dSSadaf Ebrahimi    SSss\x{17f}
279*22dc650dSSadaf Ebrahimi
280*22dc650dSSadaf Ebrahimi# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
281*22dc650dSSadaf Ebrahimi
282*22dc650dSSadaf Ebrahimi/\x{110000}/utf
283*22dc650dSSadaf Ebrahimi
284*22dc650dSSadaf Ebrahimi/\o{4200000}/utf
285*22dc650dSSadaf Ebrahimi
286*22dc650dSSadaf Ebrahimi/\x{100}*A/IB,utf
287*22dc650dSSadaf Ebrahimi    A
288*22dc650dSSadaf Ebrahimi
289*22dc650dSSadaf Ebrahimi/\x{100}*\d(?R)/IB,utf
290*22dc650dSSadaf Ebrahimi
291*22dc650dSSadaf Ebrahimi/[Z\x{100}]/IB,utf
292*22dc650dSSadaf Ebrahimi    Z\x{100}
293*22dc650dSSadaf Ebrahimi    \x{100}
294*22dc650dSSadaf Ebrahimi    \x{100}Z
295*22dc650dSSadaf Ebrahimi
296*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IB,utf
297*22dc650dSSadaf Ebrahimi
298*22dc650dSSadaf Ebrahimi/[z\Qa-d]Ā\E]/IB,utf
299*22dc650dSSadaf Ebrahimi    \x{100}
300*22dc650dSSadaf Ebrahimi    Ā
301*22dc650dSSadaf Ebrahimi
302*22dc650dSSadaf Ebrahimi/[ab\x{100}]abc(xyz(?1))/IB,utf
303*22dc650dSSadaf Ebrahimi
304*22dc650dSSadaf Ebrahimi/\x{100}*\s/IB,utf
305*22dc650dSSadaf Ebrahimi
306*22dc650dSSadaf Ebrahimi/\x{100}*\d/IB,utf
307*22dc650dSSadaf Ebrahimi
308*22dc650dSSadaf Ebrahimi/\x{100}*\w/IB,utf
309*22dc650dSSadaf Ebrahimi
310*22dc650dSSadaf Ebrahimi/\x{100}*\D/IB,utf
311*22dc650dSSadaf Ebrahimi
312*22dc650dSSadaf Ebrahimi/\x{100}*\S/IB,utf
313*22dc650dSSadaf Ebrahimi
314*22dc650dSSadaf Ebrahimi/\x{100}*\W/IB,utf
315*22dc650dSSadaf Ebrahimi
316*22dc650dSSadaf Ebrahimi/[\x{105}-\x{109}]/IBi,utf
317*22dc650dSSadaf Ebrahimi    \x{104}
318*22dc650dSSadaf Ebrahimi    \x{105}
319*22dc650dSSadaf Ebrahimi    \x{109}
320*22dc650dSSadaf Ebrahimi\= Expect no match
321*22dc650dSSadaf Ebrahimi    \x{100}
322*22dc650dSSadaf Ebrahimi    \x{10a}
323*22dc650dSSadaf Ebrahimi
324*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IBi,utf
325*22dc650dSSadaf Ebrahimi    Z
326*22dc650dSSadaf Ebrahimi    z
327*22dc650dSSadaf Ebrahimi    \x{39c}
328*22dc650dSSadaf Ebrahimi    \x{178}
329*22dc650dSSadaf Ebrahimi    |
330*22dc650dSSadaf Ebrahimi    \x{80}
331*22dc650dSSadaf Ebrahimi    \x{ff}
332*22dc650dSSadaf Ebrahimi    \x{100}
333*22dc650dSSadaf Ebrahimi    \x{101}
334*22dc650dSSadaf Ebrahimi\= Expect no match
335*22dc650dSSadaf Ebrahimi    \x{102}
336*22dc650dSSadaf Ebrahimi    Y
337*22dc650dSSadaf Ebrahimi    y
338*22dc650dSSadaf Ebrahimi
339*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IBi,utf
340*22dc650dSSadaf Ebrahimi
341*22dc650dSSadaf Ebrahimi/\x{3a3}B/IBi,utf
342*22dc650dSSadaf Ebrahimi
343*22dc650dSSadaf Ebrahimi/./utf
344*22dc650dSSadaf Ebrahimi    \x{110000}
345*22dc650dSSadaf Ebrahimi
346*22dc650dSSadaf Ebrahimi/(*UTF)ab������z/B
347*22dc650dSSadaf Ebrahimi
348*22dc650dSSadaf Ebrahimi/ab������z/utf
349*22dc650dSSadaf Ebrahimi
350*22dc650dSSadaf Ebrahimi/[\W\p{Any}]/B
351*22dc650dSSadaf Ebrahimi    abc
352*22dc650dSSadaf Ebrahimi    123
353*22dc650dSSadaf Ebrahimi
354*22dc650dSSadaf Ebrahimi/[\W\pL]/B
355*22dc650dSSadaf Ebrahimi    abc
356*22dc650dSSadaf Ebrahimi    \x{100}
357*22dc650dSSadaf Ebrahimi    \x{308}
358*22dc650dSSadaf Ebrahimi\= Expect no match
359*22dc650dSSadaf Ebrahimi    123
360*22dc650dSSadaf Ebrahimi
361*22dc650dSSadaf Ebrahimi/[\s[:^ascii:]]/B,ucp
362*22dc650dSSadaf Ebrahimi
363*22dc650dSSadaf Ebrahimi/\pP/ucp
364*22dc650dSSadaf Ebrahimi    \x{7fffffff}
365*22dc650dSSadaf Ebrahimi
366*22dc650dSSadaf Ebrahimi# A special extra option allows excaped surrogate code points in 32-bit mode,
367*22dc650dSSadaf Ebrahimi# but subjects containing them must not be UTF-checked. These patterns give
368*22dc650dSSadaf Ebrahimi# errors in 16-bit mode.
369*22dc650dSSadaf Ebrahimi
370*22dc650dSSadaf Ebrahimi/\x{d800}/I,utf,allow_surrogate_escapes
371*22dc650dSSadaf Ebrahimi    \x{d800}\=no_utf_check
372*22dc650dSSadaf Ebrahimi
373*22dc650dSSadaf Ebrahimi/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
374*22dc650dSSadaf Ebrahimi    \x{dfff}\x{df01}\=no_utf_check
375*22dc650dSSadaf Ebrahimi
376*22dc650dSSadaf Ebrahimi# This has different starting code units in 8-bit mode.
377*22dc650dSSadaf Ebrahimi
378*22dc650dSSadaf Ebrahimi/^[^ab]/IB,utf
379*22dc650dSSadaf Ebrahimi    c
380*22dc650dSSadaf Ebrahimi    \x{ff}
381*22dc650dSSadaf Ebrahimi    \x{100}
382*22dc650dSSadaf Ebrahimi\= Expect no match
383*22dc650dSSadaf Ebrahimi    aaa
384*22dc650dSSadaf Ebrahimi
385*22dc650dSSadaf Ebrahimi# Offsets are different in 8-bit mode.
386*22dc650dSSadaf Ebrahimi
387*22dc650dSSadaf Ebrahimi/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
388*22dc650dSSadaf Ebrahimi    123abcáyzabcdef789abcሴqr
389*22dc650dSSadaf Ebrahimi
390*22dc650dSSadaf Ebrahimi# A few script run tests in non-UTF mode (but they need Unicode support)
391*22dc650dSSadaf Ebrahimi
392*22dc650dSSadaf Ebrahimi/^(*script_run:.{4})/
393*22dc650dSSadaf Ebrahimi    \x{3041}\x{30a1}\x{3007}\x{3007}   Hiragana Katakana Han Han
394*22dc650dSSadaf Ebrahimi    \x{30a1}\x{3041}\x{3007}\x{3007}   Katakana Hiragana Han Han
395*22dc650dSSadaf Ebrahimi    \x{1100}\x{2e80}\x{2e80}\x{1101}   Hangul Han Han Hangul
396*22dc650dSSadaf Ebrahimi
397*22dc650dSSadaf Ebrahimi/^(*sr:.*)/utf,allow_surrogate_escapes
398*22dc650dSSadaf Ebrahimi    \x{2e80}\x{3105}\x{2e80}\x{30a1}   Han Bopomofo Han Katakana
399*22dc650dSSadaf Ebrahimi    \x{d800}\x{dfff}                   Surrogates (Unknown) \=no_utf_check
400*22dc650dSSadaf Ebrahimi
401*22dc650dSSadaf Ebrahimi/(?(n/utf
402*22dc650dSSadaf Ebrahimi
403*22dc650dSSadaf Ebrahimi/(?(á/utf
404*22dc650dSSadaf Ebrahimi
405*22dc650dSSadaf Ebrahimi# Invalid UTF-16/32 tests.
406*22dc650dSSadaf Ebrahimi
407*22dc650dSSadaf Ebrahimi/.../g,match_invalid_utf
408*22dc650dSSadaf Ebrahimi    abcd\x{df00}wxzy\x{df00}pqrs
409*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x{df00}pqrs
410*22dc650dSSadaf Ebrahimi
411*22dc650dSSadaf Ebrahimi/abc/match_invalid_utf
412*22dc650dSSadaf Ebrahimi    ab\x{df00}ab\=ph
413*22dc650dSSadaf Ebrahimi\= Expect no match
414*22dc650dSSadaf Ebrahimi    ab\x{df00}cdef\=ph
415*22dc650dSSadaf Ebrahimi
416*22dc650dSSadaf Ebrahimi/.a/match_invalid_utf
417*22dc650dSSadaf Ebrahimi    ab\=ph
418*22dc650dSSadaf Ebrahimi    ab\=ps
419*22dc650dSSadaf Ebrahimi\= Expect no match
420*22dc650dSSadaf Ebrahimi    b\x{df00}\=ph
421*22dc650dSSadaf Ebrahimi    b\x{df00}\=ps
422*22dc650dSSadaf Ebrahimi
423*22dc650dSSadaf Ebrahimi/.a$/match_invalid_utf
424*22dc650dSSadaf Ebrahimi    ab\=ph
425*22dc650dSSadaf Ebrahimi    ab\=ps
426*22dc650dSSadaf Ebrahimi\= Expect no match
427*22dc650dSSadaf Ebrahimi    b\x{df00}\=ph
428*22dc650dSSadaf Ebrahimi    b\x{df00}\=ps
429*22dc650dSSadaf Ebrahimi
430*22dc650dSSadaf Ebrahimi/ab$/match_invalid_utf
431*22dc650dSSadaf Ebrahimi    ab\x{df00}cdeab
432*22dc650dSSadaf Ebrahimi\= Expect no match
433*22dc650dSSadaf Ebrahimi    ab\x{df00}cde
434*22dc650dSSadaf Ebrahimi
435*22dc650dSSadaf Ebrahimi/.../g,match_invalid_utf
436*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x{df00}pqrs
437*22dc650dSSadaf Ebrahimi
438*22dc650dSSadaf Ebrahimi/(?<=x)../g,match_invalid_utf
439*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x{df00}pqrs
440*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x{df00}xpqrs
441*22dc650dSSadaf Ebrahimi
442*22dc650dSSadaf Ebrahimi/X$/match_invalid_utf
443*22dc650dSSadaf Ebrahimi\= Expect no match
444*22dc650dSSadaf Ebrahimi    X\x{df00}
445*22dc650dSSadaf Ebrahimi
446*22dc650dSSadaf Ebrahimi/(?<=..)X/match_invalid_utf,aftertext
447*22dc650dSSadaf Ebrahimi    AB\x{df00}AQXYZ
448*22dc650dSSadaf Ebrahimi    AB\x{df00}AQXYZ\=offset=5
449*22dc650dSSadaf Ebrahimi    AB\x{df00}\x{df00}AXYZXC\=offset=5
450*22dc650dSSadaf Ebrahimi\= Expect no match
451*22dc650dSSadaf Ebrahimi    AB\x{df00}XYZ
452*22dc650dSSadaf Ebrahimi    AB\x{df00}XYZ\=offset=3
453*22dc650dSSadaf Ebrahimi    AB\x{df00}AXYZ
454*22dc650dSSadaf Ebrahimi    AB\x{df00}AXYZ\=offset=4
455*22dc650dSSadaf Ebrahimi    AB\x{df00}\x{df00}AXYZ\=offset=5
456*22dc650dSSadaf Ebrahimi
457*22dc650dSSadaf Ebrahimi/.../match_invalid_utf
458*22dc650dSSadaf Ebrahimi\= Expect no match
459*22dc650dSSadaf Ebrahimi    A\x{d800}B
460*22dc650dSSadaf Ebrahimi    A\x{110000}B
461*22dc650dSSadaf Ebrahimi
462*22dc650dSSadaf Ebrahimi/aa/utf,ucp,match_invalid_utf,global
463*22dc650dSSadaf Ebrahimi    aa\x{d800}aa
464*22dc650dSSadaf Ebrahimi
465*22dc650dSSadaf Ebrahimi/aa/utf,ucp,match_invalid_utf,global
466*22dc650dSSadaf Ebrahimi    \x{d800}aa
467*22dc650dSSadaf Ebrahimi
468*22dc650dSSadaf Ebrahimi/A\z/utf,match_invalid_utf
469*22dc650dSSadaf Ebrahimi    A\x{df00}\n
470*22dc650dSSadaf Ebrahimi
471*22dc650dSSadaf Ebrahimi# ----------------------------------------------------
472*22dc650dSSadaf Ebrahimi
473*22dc650dSSadaf Ebrahimi/(*UTF)(?=\x{123})/I
474*22dc650dSSadaf Ebrahimi
475*22dc650dSSadaf Ebrahimi/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
476*22dc650dSSadaf Ebrahimi
477*22dc650dSSadaf Ebrahimi/[\xff\x{ffff}]/I,utf
478*22dc650dSSadaf Ebrahimi
479*22dc650dSSadaf Ebrahimi/[\xff\x{ff}]/I,utf
480*22dc650dSSadaf Ebrahimi
481*22dc650dSSadaf Ebrahimi/[\xff\x{ff}]/I
482*22dc650dSSadaf Ebrahimi
483*22dc650dSSadaf Ebrahimi/[Ss]/I
484*22dc650dSSadaf Ebrahimi
485*22dc650dSSadaf Ebrahimi/[Ss]/I,utf
486*22dc650dSSadaf Ebrahimi
487*22dc650dSSadaf Ebrahimi/(?:\x{ff}|\x{3000})/I,utf
488*22dc650dSSadaf Ebrahimi
489*22dc650dSSadaf Ebrahimi# ----------------------------------------------------
490*22dc650dSSadaf Ebrahimi# UCP and casing tests
491*22dc650dSSadaf Ebrahimi
492*22dc650dSSadaf Ebrahimi/\x{120}/i,I
493*22dc650dSSadaf Ebrahimi
494*22dc650dSSadaf Ebrahimi/\x{c1}/i,I,ucp
495*22dc650dSSadaf Ebrahimi
496*22dc650dSSadaf Ebrahimi/[\x{120}\x{121}]/iB,ucp
497*22dc650dSSadaf Ebrahimi
498*22dc650dSSadaf Ebrahimi/[ab\x{120}]+/iB,ucp
499*22dc650dSSadaf Ebrahimi    aABb\x{121}\x{120}
500*22dc650dSSadaf Ebrahimi
501*22dc650dSSadaf Ebrahimi/\x{c1}/i,no_start_optimize
502*22dc650dSSadaf Ebrahimi\= Expect no match
503*22dc650dSSadaf Ebrahimi    \x{e1}
504*22dc650dSSadaf Ebrahimi
505*22dc650dSSadaf Ebrahimi/\x{120}\x{c1}/i,ucp,no_start_optimize
506*22dc650dSSadaf Ebrahimi    \x{121}\x{e1}
507*22dc650dSSadaf Ebrahimi
508*22dc650dSSadaf Ebrahimi/\x{120}\x{c1}/i,ucp
509*22dc650dSSadaf Ebrahimi    \x{121}\x{e1}
510*22dc650dSSadaf Ebrahimi
511*22dc650dSSadaf Ebrahimi/[^\x{120}]/i,no_start_optimize
512*22dc650dSSadaf Ebrahimi    \x{121}
513*22dc650dSSadaf Ebrahimi
514*22dc650dSSadaf Ebrahimi/[^\x{120}]/i,ucp,no_start_optimize
515*22dc650dSSadaf Ebrahimi\= Expect no match
516*22dc650dSSadaf Ebrahimi    \x{121}
517*22dc650dSSadaf Ebrahimi
518*22dc650dSSadaf Ebrahimi/[^\x{120}]/i
519*22dc650dSSadaf Ebrahimi    \x{121}
520*22dc650dSSadaf Ebrahimi
521*22dc650dSSadaf Ebrahimi/[^\x{120}]/i,ucp
522*22dc650dSSadaf Ebrahimi\= Expect no match
523*22dc650dSSadaf Ebrahimi    \x{121}
524*22dc650dSSadaf Ebrahimi
525*22dc650dSSadaf Ebrahimi/\x{120}{2}/i,ucp
526*22dc650dSSadaf Ebrahimi    \x{121}\x{121}
527*22dc650dSSadaf Ebrahimi
528*22dc650dSSadaf Ebrahimi/[^\x{120}]{2}/i,ucp
529*22dc650dSSadaf Ebrahimi\= Expect no match
530*22dc650dSSadaf Ebrahimi    \x{121}\x{121}
531*22dc650dSSadaf Ebrahimi
532*22dc650dSSadaf Ebrahimi/\x{c1}+\x{e1}/iB,ucp
533*22dc650dSSadaf Ebrahimi    \x{c1}\x{c1}\x{c1}
534*22dc650dSSadaf Ebrahimi
535*22dc650dSSadaf Ebrahimi/\x{c1}+\x{e1}/iIB,ucp
536*22dc650dSSadaf Ebrahimi    \x{c1}\x{c1}\x{c1}
537*22dc650dSSadaf Ebrahimi    \x{e1}\x{e1}\x{e1}
538*22dc650dSSadaf Ebrahimi
539*22dc650dSSadaf Ebrahimi/a|\x{c1}/iI,ucp
540*22dc650dSSadaf Ebrahimi    \x{e1}xxx
541*22dc650dSSadaf Ebrahimi
542*22dc650dSSadaf Ebrahimi/\x{c1}|\x{e1}/iI,ucp
543*22dc650dSSadaf Ebrahimi
544*22dc650dSSadaf Ebrahimi/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
545*22dc650dSSadaf Ebrahimi    X\x{e1}Y
546*22dc650dSSadaf Ebrahimi
547*22dc650dSSadaf Ebrahimi/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
548*22dc650dSSadaf Ebrahimi    X\x{121}Y
549*22dc650dSSadaf Ebrahimi
550*22dc650dSSadaf Ebrahimi/s/i,ucp
551*22dc650dSSadaf Ebrahimi    \x{17f}
552*22dc650dSSadaf Ebrahimi
553*22dc650dSSadaf Ebrahimi/s/i,utf
554*22dc650dSSadaf Ebrahimi    \x{17f}
555*22dc650dSSadaf Ebrahimi
556*22dc650dSSadaf Ebrahimi/[^s]/i,ucp
557*22dc650dSSadaf Ebrahimi\= Expect no match
558*22dc650dSSadaf Ebrahimi    \x{17f}
559*22dc650dSSadaf Ebrahimi
560*22dc650dSSadaf Ebrahimi/[^s]/i,utf
561*22dc650dSSadaf Ebrahimi\= Expect no match
562*22dc650dSSadaf Ebrahimi    \x{17f}
563*22dc650dSSadaf Ebrahimi
564*22dc650dSSadaf Ebrahimi# ----------------------------------------------------
565*22dc650dSSadaf Ebrahimi
566*22dc650dSSadaf Ebrahimi# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
567*22dc650dSSadaf Ebrahimi# fails in 16-bit mode, but is OK for 32-bit.
568*22dc650dSSadaf Ebrahimi
569*22dc650dSSadaf Ebrahimi/\x{802a0000}*/
570*22dc650dSSadaf Ebrahimi    \x{802a0000}\x{802a0000}
571*22dc650dSSadaf Ebrahimi
572*22dc650dSSadaf Ebrahimi# UTF matching without UTF, check invalid UTF characters
573*22dc650dSSadaf Ebrahimi/\X++/
574*22dc650dSSadaf Ebrahimi    a\x{110000}\x{ffffffff}
575*22dc650dSSadaf Ebrahimi
576*22dc650dSSadaf Ebrahimi# This used to loop in 32-bit mode; it will fail in 16-bit mode.
577*22dc650dSSadaf Ebrahimi/[\x{ffffffff}]/caseless,ucp
578*22dc650dSSadaf Ebrahimi    \x{ffffffff}xyz
579*22dc650dSSadaf Ebrahimi
580*22dc650dSSadaf Ebrahimi# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
581*22dc650dSSadaf Ebrahimi# will give errors in 16-bit mode.
582*22dc650dSSadaf Ebrahimi
583*22dc650dSSadaf Ebrahimi/k*\x{ffffffff}/caseless,ucp
584*22dc650dSSadaf Ebrahimi    \x{ffffffff}
585*22dc650dSSadaf Ebrahimi
586*22dc650dSSadaf Ebrahimi/k+\x{ffffffff}/caseless,ucp,no_start_optimize
587*22dc650dSSadaf Ebrahimi    K\x{ffffffff}
588*22dc650dSSadaf Ebrahimi\= Expect no match
589*22dc650dSSadaf Ebrahimi    \x{ffffffff}\x{ffffffff}
590*22dc650dSSadaf Ebrahimi
591*22dc650dSSadaf Ebrahimi/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
592*22dc650dSSadaf Ebrahimi\= Expect no match
593*22dc650dSSadaf Ebrahimi    \x{ffffffff}\x{ffffffff}\x{ffffffff}
594*22dc650dSSadaf Ebrahimi
595*22dc650dSSadaf Ebrahimi/k\x{ffffffff}/caseless,ucp,no_start_optimize
596*22dc650dSSadaf Ebrahimi    K\x{ffffffff}
597*22dc650dSSadaf Ebrahimi\= Expect no match
598*22dc650dSSadaf Ebrahimi    \x{ffffffff}\x{ffffffff}\x{ffffffff}
599*22dc650dSSadaf Ebrahimi
600*22dc650dSSadaf Ebrahimi/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
601*22dc650dSSadaf Ebrahimi\= Expect no match
602*22dc650dSSadaf Ebrahimi    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
603*22dc650dSSadaf Ebrahimi
604*22dc650dSSadaf Ebrahimi# ---------------------------------------------------------
605*22dc650dSSadaf Ebrahimi
606*22dc650dSSadaf Ebrahimi# End of testinput12
607