1*22dc650dSSadaf Ebrahimi# This set of tests is for UTF-8 support and Unicode property support, with 2*22dc650dSSadaf Ebrahimi# relevance only for the 8-bit library. 3*22dc650dSSadaf Ebrahimi 4*22dc650dSSadaf Ebrahimi#newline_default lf any anycrlf 5*22dc650dSSadaf Ebrahimi 6*22dc650dSSadaf Ebrahimi# The next 5 patterns have UTF-8 errors 7*22dc650dSSadaf Ebrahimi 8*22dc650dSSadaf Ebrahimi/[�]/utf 9*22dc650dSSadaf Ebrahimi 10*22dc650dSSadaf Ebrahimi/�/utf 11*22dc650dSSadaf Ebrahimi 12*22dc650dSSadaf Ebrahimi/���xxx/utf 13*22dc650dSSadaf Ebrahimi 14*22dc650dSSadaf Ebrahimi/Â��������/utf 15*22dc650dSSadaf Ebrahimi 16*22dc650dSSadaf Ebrahimi/Â��������/match_invalid_utf 17*22dc650dSSadaf Ebrahimi 18*22dc650dSSadaf Ebrahimi# Now test subjects 19*22dc650dSSadaf Ebrahimi 20*22dc650dSSadaf Ebrahimi/badutf/utf 21*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 22*22dc650dSSadaf Ebrahimi X\xdf 23*22dc650dSSadaf Ebrahimi XX\xef 24*22dc650dSSadaf Ebrahimi XXX\xef\x80 25*22dc650dSSadaf Ebrahimi X\xf7 26*22dc650dSSadaf Ebrahimi XX\xf7\x80 27*22dc650dSSadaf Ebrahimi XXX\xf7\x80\x80 28*22dc650dSSadaf Ebrahimi \xfb 29*22dc650dSSadaf Ebrahimi \xfb\x80 30*22dc650dSSadaf Ebrahimi \xfb\x80\x80 31*22dc650dSSadaf Ebrahimi \xfb\x80\x80\x80 32*22dc650dSSadaf Ebrahimi \xfd 33*22dc650dSSadaf Ebrahimi \xfd\x80 34*22dc650dSSadaf Ebrahimi \xfd\x80\x80 35*22dc650dSSadaf Ebrahimi \xfd\x80\x80\x80 36*22dc650dSSadaf Ebrahimi \xfd\x80\x80\x80\x80 37*22dc650dSSadaf Ebrahimi \xdf\x7f 38*22dc650dSSadaf Ebrahimi \xef\x7f\x80 39*22dc650dSSadaf Ebrahimi \xef\x80\x7f 40*22dc650dSSadaf Ebrahimi \xf7\x7f\x80\x80 41*22dc650dSSadaf Ebrahimi \xf7\x80\x7f\x80 42*22dc650dSSadaf Ebrahimi \xf7\x80\x80\x7f 43*22dc650dSSadaf Ebrahimi \xfb\x7f\x80\x80\x80 44*22dc650dSSadaf Ebrahimi \xfb\x80\x7f\x80\x80 45*22dc650dSSadaf Ebrahimi \xfb\x80\x80\x7f\x80 46*22dc650dSSadaf Ebrahimi \xfb\x80\x80\x80\x7f 47*22dc650dSSadaf Ebrahimi \xfd\x7f\x80\x80\x80\x80 48*22dc650dSSadaf Ebrahimi \xfd\x80\x7f\x80\x80\x80 49*22dc650dSSadaf Ebrahimi \xfd\x80\x80\x7f\x80\x80 50*22dc650dSSadaf Ebrahimi \xfd\x80\x80\x80\x7f\x80 51*22dc650dSSadaf Ebrahimi \xfd\x80\x80\x80\x80\x7f 52*22dc650dSSadaf Ebrahimi \xed\xa0\x80 53*22dc650dSSadaf Ebrahimi \xc0\x8f 54*22dc650dSSadaf Ebrahimi \xe0\x80\x8f 55*22dc650dSSadaf Ebrahimi \xf0\x80\x80\x8f 56*22dc650dSSadaf Ebrahimi \xf8\x80\x80\x80\x8f 57*22dc650dSSadaf Ebrahimi \xfc\x80\x80\x80\x80\x8f 58*22dc650dSSadaf Ebrahimi \x80 59*22dc650dSSadaf Ebrahimi \xfe 60*22dc650dSSadaf Ebrahimi \xff 61*22dc650dSSadaf Ebrahimi 62*22dc650dSSadaf Ebrahimi/badutf/utf 63*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 64*22dc650dSSadaf Ebrahimi XX\xfb\x80\x80\x80\x80 65*22dc650dSSadaf Ebrahimi XX\xfd\x80\x80\x80\x80\x80 66*22dc650dSSadaf Ebrahimi XX\xf7\xbf\xbf\xbf 67*22dc650dSSadaf Ebrahimi 68*22dc650dSSadaf Ebrahimi/shortutf/utf 69*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 70*22dc650dSSadaf Ebrahimi XX\xdf\=ph 71*22dc650dSSadaf Ebrahimi XX\xef\=ph 72*22dc650dSSadaf Ebrahimi XX\xef\x80\=ph 73*22dc650dSSadaf Ebrahimi \xf7\=ph 74*22dc650dSSadaf Ebrahimi \xf7\x80\=ph 75*22dc650dSSadaf Ebrahimi \xf7\x80\x80\=ph 76*22dc650dSSadaf Ebrahimi \xfb\=ph 77*22dc650dSSadaf Ebrahimi \xfb\x80\=ph 78*22dc650dSSadaf Ebrahimi \xfb\x80\x80\=ph 79*22dc650dSSadaf Ebrahimi \xfb\x80\x80\x80\=ph 80*22dc650dSSadaf Ebrahimi \xfd\=ph 81*22dc650dSSadaf Ebrahimi \xfd\x80\=ph 82*22dc650dSSadaf Ebrahimi \xfd\x80\x80\=ph 83*22dc650dSSadaf Ebrahimi \xfd\x80\x80\x80\=ph 84*22dc650dSSadaf Ebrahimi \xfd\x80\x80\x80\x80\=ph 85*22dc650dSSadaf Ebrahimi 86*22dc650dSSadaf Ebrahimi/anything/utf 87*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 88*22dc650dSSadaf Ebrahimi X\xc0\x80 89*22dc650dSSadaf Ebrahimi XX\xc1\x8f 90*22dc650dSSadaf Ebrahimi XXX\xe0\x9f\x80 91*22dc650dSSadaf Ebrahimi \xf0\x8f\x80\x80 92*22dc650dSSadaf Ebrahimi \xf8\x87\x80\x80\x80 93*22dc650dSSadaf Ebrahimi \xfc\x83\x80\x80\x80\x80 94*22dc650dSSadaf Ebrahimi \xfe\x80\x80\x80\x80\x80 95*22dc650dSSadaf Ebrahimi \xff\x80\x80\x80\x80\x80 96*22dc650dSSadaf Ebrahimi \xf8\x88\x80\x80\x80 97*22dc650dSSadaf Ebrahimi \xf9\x87\x80\x80\x80 98*22dc650dSSadaf Ebrahimi \xfc\x84\x80\x80\x80\x80 99*22dc650dSSadaf Ebrahimi \xfd\x83\x80\x80\x80\x80 100*22dc650dSSadaf Ebrahimi\= Expect no match 101*22dc650dSSadaf Ebrahimi \xc3\x8f 102*22dc650dSSadaf Ebrahimi \xe0\xaf\x80 103*22dc650dSSadaf Ebrahimi \xe1\x80\x80 104*22dc650dSSadaf Ebrahimi \xf0\x9f\x80\x80 105*22dc650dSSadaf Ebrahimi \xf1\x8f\x80\x80 106*22dc650dSSadaf Ebrahimi \xf8\x88\x80\x80\x80\=no_utf_check 107*22dc650dSSadaf Ebrahimi \xf9\x87\x80\x80\x80\=no_utf_check 108*22dc650dSSadaf Ebrahimi \xfc\x84\x80\x80\x80\x80\=no_utf_check 109*22dc650dSSadaf Ebrahimi \xfd\x83\x80\x80\x80\x80\=no_utf_check 110*22dc650dSSadaf Ebrahimi 111*22dc650dSSadaf Ebrahimi# Similar tests with offsets 112*22dc650dSSadaf Ebrahimi 113*22dc650dSSadaf Ebrahimi/badutf/utf 114*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 115*22dc650dSSadaf Ebrahimi X\xdfabcd 116*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=1 117*22dc650dSSadaf Ebrahimi\= Expect no match 118*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=2 119*22dc650dSSadaf Ebrahimi 120*22dc650dSSadaf Ebrahimi/(?<=x)badutf/utf 121*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 122*22dc650dSSadaf Ebrahimi X\xdfabcd 123*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=1 124*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=2 125*22dc650dSSadaf Ebrahimi X\xdfabcd\xdf\=offset=3 126*22dc650dSSadaf Ebrahimi\= Expect no match 127*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=3 128*22dc650dSSadaf Ebrahimi 129*22dc650dSSadaf Ebrahimi/(?<=xx)badutf/utf 130*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 131*22dc650dSSadaf Ebrahimi X\xdfabcd 132*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=1 133*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=2 134*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=3 135*22dc650dSSadaf Ebrahimi 136*22dc650dSSadaf Ebrahimi/(?<=xxxx)badutf/utf 137*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 138*22dc650dSSadaf Ebrahimi X\xdfabcd 139*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=1 140*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=2 141*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=3 142*22dc650dSSadaf Ebrahimi X\xdfabc\xdf\=offset=6 143*22dc650dSSadaf Ebrahimi X\xdfabc\xdf\=offset=7 144*22dc650dSSadaf Ebrahimi\= Expect no match 145*22dc650dSSadaf Ebrahimi X\xdfabcd\=offset=6 146*22dc650dSSadaf Ebrahimi 147*22dc650dSSadaf Ebrahimi/\x{100}/IB,utf 148*22dc650dSSadaf Ebrahimi 149*22dc650dSSadaf Ebrahimi/\x{1000}/IB,utf 150*22dc650dSSadaf Ebrahimi 151*22dc650dSSadaf Ebrahimi/\x{10000}/IB,utf 152*22dc650dSSadaf Ebrahimi 153*22dc650dSSadaf Ebrahimi/\x{100000}/IB,utf 154*22dc650dSSadaf Ebrahimi 155*22dc650dSSadaf Ebrahimi/\x{10ffff}/IB,utf 156*22dc650dSSadaf Ebrahimi 157*22dc650dSSadaf Ebrahimi/[\x{ff}]/IB,utf 158*22dc650dSSadaf Ebrahimi 159*22dc650dSSadaf Ebrahimi/[\x{100}]/IB,utf 160*22dc650dSSadaf Ebrahimi 161*22dc650dSSadaf Ebrahimi/\x80/IB,utf 162*22dc650dSSadaf Ebrahimi 163*22dc650dSSadaf Ebrahimi/\xff/IB,utf 164*22dc650dSSadaf Ebrahimi 165*22dc650dSSadaf Ebrahimi/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 166*22dc650dSSadaf Ebrahimi \x{D55c}\x{ad6d}\x{C5B4} 167*22dc650dSSadaf Ebrahimi 168*22dc650dSSadaf Ebrahimi/\x{65e5}\x{672c}\x{8a9e}/IB,utf 169*22dc650dSSadaf Ebrahimi \x{65e5}\x{672c}\x{8a9e} 170*22dc650dSSadaf Ebrahimi 171*22dc650dSSadaf Ebrahimi/\x{80}/IB,utf 172*22dc650dSSadaf Ebrahimi 173*22dc650dSSadaf Ebrahimi/\x{084}/IB,utf 174*22dc650dSSadaf Ebrahimi 175*22dc650dSSadaf Ebrahimi/\x{104}/IB,utf 176*22dc650dSSadaf Ebrahimi 177*22dc650dSSadaf Ebrahimi/\x{861}/IB,utf 178*22dc650dSSadaf Ebrahimi 179*22dc650dSSadaf Ebrahimi/\x{212ab}/IB,utf 180*22dc650dSSadaf Ebrahimi 181*22dc650dSSadaf Ebrahimi/[^ab\xC0-\xF0]/IB,utf 182*22dc650dSSadaf Ebrahimi \x{f1} 183*22dc650dSSadaf Ebrahimi \x{bf} 184*22dc650dSSadaf Ebrahimi \x{100} 185*22dc650dSSadaf Ebrahimi \x{1000} 186*22dc650dSSadaf Ebrahimi\= Expect no match 187*22dc650dSSadaf Ebrahimi \x{c0} 188*22dc650dSSadaf Ebrahimi \x{f0} 189*22dc650dSSadaf Ebrahimi 190*22dc650dSSadaf Ebrahimi/Ā{3,4}/IB,utf 191*22dc650dSSadaf Ebrahimi \x{100}\x{100}\x{100}\x{100\x{100} 192*22dc650dSSadaf Ebrahimi 193*22dc650dSSadaf Ebrahimi/(\x{100}+|x)/IB,utf 194*22dc650dSSadaf Ebrahimi 195*22dc650dSSadaf Ebrahimi/(\x{100}*a|x)/IB,utf 196*22dc650dSSadaf Ebrahimi 197*22dc650dSSadaf Ebrahimi/(\x{100}{0,2}a|x)/IB,utf 198*22dc650dSSadaf Ebrahimi 199*22dc650dSSadaf Ebrahimi/(\x{100}{1,2}a|x)/IB,utf 200*22dc650dSSadaf Ebrahimi 201*22dc650dSSadaf Ebrahimi/\x{100}/IB,utf 202*22dc650dSSadaf Ebrahimi 203*22dc650dSSadaf Ebrahimi/a\x{100}\x{101}*/IB,utf 204*22dc650dSSadaf Ebrahimi 205*22dc650dSSadaf Ebrahimi/a\x{100}\x{101}+/IB,utf 206*22dc650dSSadaf Ebrahimi 207*22dc650dSSadaf Ebrahimi/[^\x{c4}]/IB 208*22dc650dSSadaf Ebrahimi 209*22dc650dSSadaf Ebrahimi/[\x{100}]/IB,utf 210*22dc650dSSadaf Ebrahimi \x{100} 211*22dc650dSSadaf Ebrahimi Z\x{100} 212*22dc650dSSadaf Ebrahimi \x{100}Z 213*22dc650dSSadaf Ebrahimi 214*22dc650dSSadaf Ebrahimi/[\xff]/IB,utf 215*22dc650dSSadaf Ebrahimi >\x{ff}< 216*22dc650dSSadaf Ebrahimi 217*22dc650dSSadaf Ebrahimi/[^\xff]/IB,utf 218*22dc650dSSadaf Ebrahimi 219*22dc650dSSadaf Ebrahimi/\x{100}abc(xyz(?1))/IB,utf 220*22dc650dSSadaf Ebrahimi 221*22dc650dSSadaf Ebrahimi/\777/I,utf 222*22dc650dSSadaf Ebrahimi \x{1ff} 223*22dc650dSSadaf Ebrahimi \777 224*22dc650dSSadaf Ebrahimi 225*22dc650dSSadaf Ebrahimi/\x{100}+\x{200}/IB,utf 226*22dc650dSSadaf Ebrahimi 227*22dc650dSSadaf Ebrahimi/\x{100}+X/IB,utf 228*22dc650dSSadaf Ebrahimi 229*22dc650dSSadaf Ebrahimi/^[\QĀ\E-\QŐ\E/B,utf 230*22dc650dSSadaf Ebrahimi 231*22dc650dSSadaf Ebrahimi# This tests the stricter UTF-8 check according to RFC 3629. 232*22dc650dSSadaf Ebrahimi 233*22dc650dSSadaf Ebrahimi/X/utf 234*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors 235*22dc650dSSadaf Ebrahimi \x{d800} 236*22dc650dSSadaf Ebrahimi \x{da00} 237*22dc650dSSadaf Ebrahimi \x{dfff} 238*22dc650dSSadaf Ebrahimi \x{110000} 239*22dc650dSSadaf Ebrahimi \x{2000000} 240*22dc650dSSadaf Ebrahimi \x{7fffffff} 241*22dc650dSSadaf Ebrahimi\= Expect no match 242*22dc650dSSadaf Ebrahimi \x{d800}\=no_utf_check 243*22dc650dSSadaf Ebrahimi \x{da00}\=no_utf_check 244*22dc650dSSadaf Ebrahimi \x{dfff}\=no_utf_check 245*22dc650dSSadaf Ebrahimi \x{110000}\=no_utf_check 246*22dc650dSSadaf Ebrahimi \x{2000000}\=no_utf_check 247*22dc650dSSadaf Ebrahimi \x{7fffffff}\=no_utf_check 248*22dc650dSSadaf Ebrahimi 249*22dc650dSSadaf Ebrahimi/(*UTF8)\x{1234}/ 250*22dc650dSSadaf Ebrahimi abcd\x{1234}pqr 251*22dc650dSSadaf Ebrahimi 252*22dc650dSSadaf Ebrahimi/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I 253*22dc650dSSadaf Ebrahimi 254*22dc650dSSadaf Ebrahimi/\h/I,utf 255*22dc650dSSadaf Ebrahimi ABC\x{09} 256*22dc650dSSadaf Ebrahimi ABC\x{20} 257*22dc650dSSadaf Ebrahimi ABC\x{a0} 258*22dc650dSSadaf Ebrahimi ABC\x{1680} 259*22dc650dSSadaf Ebrahimi ABC\x{180e} 260*22dc650dSSadaf Ebrahimi ABC\x{2000} 261*22dc650dSSadaf Ebrahimi ABC\x{202f} 262*22dc650dSSadaf Ebrahimi ABC\x{205f} 263*22dc650dSSadaf Ebrahimi ABC\x{3000} 264*22dc650dSSadaf Ebrahimi 265*22dc650dSSadaf Ebrahimi/\v/I,utf 266*22dc650dSSadaf Ebrahimi ABC\x{0a} 267*22dc650dSSadaf Ebrahimi ABC\x{0b} 268*22dc650dSSadaf Ebrahimi ABC\x{0c} 269*22dc650dSSadaf Ebrahimi ABC\x{0d} 270*22dc650dSSadaf Ebrahimi ABC\x{85} 271*22dc650dSSadaf Ebrahimi ABC\x{2028} 272*22dc650dSSadaf Ebrahimi 273*22dc650dSSadaf Ebrahimi/\h*A/I,utf 274*22dc650dSSadaf Ebrahimi CDBABC 275*22dc650dSSadaf Ebrahimi 276*22dc650dSSadaf Ebrahimi/\v+A/I,utf 277*22dc650dSSadaf Ebrahimi 278*22dc650dSSadaf Ebrahimi/\s?xxx\s/I,utf 279*22dc650dSSadaf Ebrahimi 280*22dc650dSSadaf Ebrahimi/\sxxx\s/I,utf,tables=2 281*22dc650dSSadaf Ebrahimi AB\x{85}xxx\x{a0}XYZ 282*22dc650dSSadaf Ebrahimi AB\x{a0}xxx\x{85}XYZ 283*22dc650dSSadaf Ebrahimi 284*22dc650dSSadaf Ebrahimi/\S \S/I,utf,tables=2 285*22dc650dSSadaf Ebrahimi \x{a2} \x{84} 286*22dc650dSSadaf Ebrahimi A Z 287*22dc650dSSadaf Ebrahimi 288*22dc650dSSadaf Ebrahimi/a+/utf 289*22dc650dSSadaf Ebrahimi a\x{123}aa\=offset=1 290*22dc650dSSadaf Ebrahimi a\x{123}aa\=offset=3 291*22dc650dSSadaf Ebrahimi a\x{123}aa\=offset=4 292*22dc650dSSadaf Ebrahimi\= Expect bad offset value 293*22dc650dSSadaf Ebrahimi a\x{123}aa\=offset=6 294*22dc650dSSadaf Ebrahimi\= Expect bad UTF-8 offset 295*22dc650dSSadaf Ebrahimi a\x{123}aa\=offset=2 296*22dc650dSSadaf Ebrahimi\= Expect no match 297*22dc650dSSadaf Ebrahimi a\x{123}aa\=offset=5 298*22dc650dSSadaf Ebrahimi 299*22dc650dSSadaf Ebrahimi/\x{1234}+/Ii,utf 300*22dc650dSSadaf Ebrahimi 301*22dc650dSSadaf Ebrahimi/\x{1234}+?/Ii,utf 302*22dc650dSSadaf Ebrahimi 303*22dc650dSSadaf Ebrahimi/\x{1234}++/Ii,utf 304*22dc650dSSadaf Ebrahimi 305*22dc650dSSadaf Ebrahimi/\x{1234}{2}/Ii,utf 306*22dc650dSSadaf Ebrahimi 307*22dc650dSSadaf Ebrahimi/[^\x{c4}]/IB,utf 308*22dc650dSSadaf Ebrahimi 309*22dc650dSSadaf Ebrahimi/X+\x{200}/IB,utf 310*22dc650dSSadaf Ebrahimi 311*22dc650dSSadaf Ebrahimi/\R/I,utf 312*22dc650dSSadaf Ebrahimi 313*22dc650dSSadaf Ebrahimi/\777/IB,utf 314*22dc650dSSadaf Ebrahimi 315*22dc650dSSadaf Ebrahimi/\w+\x{C4}/B,utf 316*22dc650dSSadaf Ebrahimi a\x{C4}\x{C4} 317*22dc650dSSadaf Ebrahimi 318*22dc650dSSadaf Ebrahimi/\w+\x{C4}/B,utf,tables=2 319*22dc650dSSadaf Ebrahimi a\x{C4}\x{C4} 320*22dc650dSSadaf Ebrahimi 321*22dc650dSSadaf Ebrahimi/\W+\x{C4}/B,utf 322*22dc650dSSadaf Ebrahimi !\x{C4} 323*22dc650dSSadaf Ebrahimi 324*22dc650dSSadaf Ebrahimi/\W+\x{C4}/B,utf,tables=2 325*22dc650dSSadaf Ebrahimi !\x{C4} 326*22dc650dSSadaf Ebrahimi 327*22dc650dSSadaf Ebrahimi/\W+\x{A1}/B,utf 328*22dc650dSSadaf Ebrahimi !\x{A1} 329*22dc650dSSadaf Ebrahimi 330*22dc650dSSadaf Ebrahimi/\W+\x{A1}/B,utf,tables=2 331*22dc650dSSadaf Ebrahimi !\x{A1} 332*22dc650dSSadaf Ebrahimi 333*22dc650dSSadaf Ebrahimi/X\s+\x{A0}/B,utf 334*22dc650dSSadaf Ebrahimi X\x20\x{A0}\x{A0} 335*22dc650dSSadaf Ebrahimi 336*22dc650dSSadaf Ebrahimi/X\s+\x{A0}/B,utf,tables=2 337*22dc650dSSadaf Ebrahimi X\x20\x{A0}\x{A0} 338*22dc650dSSadaf Ebrahimi 339*22dc650dSSadaf Ebrahimi/\S+\x{A0}/B,utf 340*22dc650dSSadaf Ebrahimi X\x{A0}\x{A0} 341*22dc650dSSadaf Ebrahimi 342*22dc650dSSadaf Ebrahimi/\S+\x{A0}/B,utf,tables=2 343*22dc650dSSadaf Ebrahimi X\x{A0}\x{A0} 344*22dc650dSSadaf Ebrahimi 345*22dc650dSSadaf Ebrahimi/\x{a0}+\s!/B,utf 346*22dc650dSSadaf Ebrahimi \x{a0}\x20! 347*22dc650dSSadaf Ebrahimi 348*22dc650dSSadaf Ebrahimi/\x{a0}+\s!/B,utf,tables=2 349*22dc650dSSadaf Ebrahimi \x{a0}\x20! 350*22dc650dSSadaf Ebrahimi 351*22dc650dSSadaf Ebrahimi/A/utf 352*22dc650dSSadaf Ebrahimi \x{ff000041} 353*22dc650dSSadaf Ebrahimi \x{7f000041} 354*22dc650dSSadaf Ebrahimi 355*22dc650dSSadaf Ebrahimi/(*UTF8)abc/never_utf 356*22dc650dSSadaf Ebrahimi 357*22dc650dSSadaf Ebrahimi/abc/utf,never_utf 358*22dc650dSSadaf Ebrahimi 359*22dc650dSSadaf Ebrahimi/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 360*22dc650dSSadaf Ebrahimi 361*22dc650dSSadaf Ebrahimi/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 362*22dc650dSSadaf Ebrahimi 363*22dc650dSSadaf Ebrahimi/AB\x{1fb0}/IB,utf 364*22dc650dSSadaf Ebrahimi 365*22dc650dSSadaf Ebrahimi/AB\x{1fb0}/IBi,utf 366*22dc650dSSadaf Ebrahimi 367*22dc650dSSadaf Ebrahimi/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 368*22dc650dSSadaf Ebrahimi \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 369*22dc650dSSadaf Ebrahimi \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 370*22dc650dSSadaf Ebrahimi 371*22dc650dSSadaf Ebrahimi/[ⱥ]/Bi,utf 372*22dc650dSSadaf Ebrahimi 373*22dc650dSSadaf Ebrahimi/[^ⱥ]/Bi,utf 374*22dc650dSSadaf Ebrahimi 375*22dc650dSSadaf Ebrahimi/\h/I 376*22dc650dSSadaf Ebrahimi 377*22dc650dSSadaf Ebrahimi/\v/I 378*22dc650dSSadaf Ebrahimi 379*22dc650dSSadaf Ebrahimi/\R/I 380*22dc650dSSadaf Ebrahimi 381*22dc650dSSadaf Ebrahimi/[[:blank:]]/B,ucp 382*22dc650dSSadaf Ebrahimi 383*22dc650dSSadaf Ebrahimi/\x{212a}+/Ii,utf 384*22dc650dSSadaf Ebrahimi KKkk\x{212a} 385*22dc650dSSadaf Ebrahimi 386*22dc650dSSadaf Ebrahimi/s+/Ii,utf 387*22dc650dSSadaf Ebrahimi SSss\x{17f} 388*22dc650dSSadaf Ebrahimi 389*22dc650dSSadaf Ebrahimi/\x{100}*A/IB,utf 390*22dc650dSSadaf Ebrahimi A 391*22dc650dSSadaf Ebrahimi 392*22dc650dSSadaf Ebrahimi/\x{100}*\d(?R)/IB,utf 393*22dc650dSSadaf Ebrahimi 394*22dc650dSSadaf Ebrahimi/[Z\x{100}]/IB,utf 395*22dc650dSSadaf Ebrahimi Z\x{100} 396*22dc650dSSadaf Ebrahimi \x{100} 397*22dc650dSSadaf Ebrahimi \x{100}Z 398*22dc650dSSadaf Ebrahimi 399*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IB,utf 400*22dc650dSSadaf Ebrahimi 401*22dc650dSSadaf Ebrahimi/[z\Qa-d]Ā\E]/IB,utf 402*22dc650dSSadaf Ebrahimi \x{100} 403*22dc650dSSadaf Ebrahimi Ā 404*22dc650dSSadaf Ebrahimi 405*22dc650dSSadaf Ebrahimi/[ab\x{100}]abc(xyz(?1))/IB,utf 406*22dc650dSSadaf Ebrahimi 407*22dc650dSSadaf Ebrahimi/\x{100}*\s/IB,utf 408*22dc650dSSadaf Ebrahimi 409*22dc650dSSadaf Ebrahimi/\x{100}*\d/IB,utf 410*22dc650dSSadaf Ebrahimi 411*22dc650dSSadaf Ebrahimi/\x{100}*\w/IB,utf 412*22dc650dSSadaf Ebrahimi 413*22dc650dSSadaf Ebrahimi/\x{100}*\D/IB,utf 414*22dc650dSSadaf Ebrahimi 415*22dc650dSSadaf Ebrahimi/\x{100}*\S/IB,utf 416*22dc650dSSadaf Ebrahimi 417*22dc650dSSadaf Ebrahimi/\x{100}*\W/IB,utf 418*22dc650dSSadaf Ebrahimi 419*22dc650dSSadaf Ebrahimi/[\x{105}-\x{109}]/IBi,utf 420*22dc650dSSadaf Ebrahimi \x{104} 421*22dc650dSSadaf Ebrahimi \x{105} 422*22dc650dSSadaf Ebrahimi \x{109} 423*22dc650dSSadaf Ebrahimi\= Expect no match 424*22dc650dSSadaf Ebrahimi \x{100} 425*22dc650dSSadaf Ebrahimi \x{10a} 426*22dc650dSSadaf Ebrahimi 427*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IBi,utf 428*22dc650dSSadaf Ebrahimi Z 429*22dc650dSSadaf Ebrahimi z 430*22dc650dSSadaf Ebrahimi \x{39c} 431*22dc650dSSadaf Ebrahimi \x{178} 432*22dc650dSSadaf Ebrahimi | 433*22dc650dSSadaf Ebrahimi \x{80} 434*22dc650dSSadaf Ebrahimi \x{ff} 435*22dc650dSSadaf Ebrahimi \x{100} 436*22dc650dSSadaf Ebrahimi \x{101} 437*22dc650dSSadaf Ebrahimi\= Expect no match 438*22dc650dSSadaf Ebrahimi \x{102} 439*22dc650dSSadaf Ebrahimi Y 440*22dc650dSSadaf Ebrahimi y 441*22dc650dSSadaf Ebrahimi 442*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IBi,utf 443*22dc650dSSadaf Ebrahimi 444*22dc650dSSadaf Ebrahimi/\x{3a3}B/IBi,utf 445*22dc650dSSadaf Ebrahimi 446*22dc650dSSadaf Ebrahimi/abc/utf,replace=� 447*22dc650dSSadaf Ebrahimi abc 448*22dc650dSSadaf Ebrahimi 449*22dc650dSSadaf Ebrahimi/(?<=(a)(?-1))x/I,utf 450*22dc650dSSadaf Ebrahimi a\x80zx\=offset=3 451*22dc650dSSadaf Ebrahimi 452*22dc650dSSadaf Ebrahimi/[\W\p{Any}]/B 453*22dc650dSSadaf Ebrahimi abc 454*22dc650dSSadaf Ebrahimi 123 455*22dc650dSSadaf Ebrahimi 456*22dc650dSSadaf Ebrahimi/[\W\pL]/B 457*22dc650dSSadaf Ebrahimi abc 458*22dc650dSSadaf Ebrahimi\= Expect no match 459*22dc650dSSadaf Ebrahimi 123 460*22dc650dSSadaf Ebrahimi 461*22dc650dSSadaf Ebrahimi/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf 462*22dc650dSSadaf Ebrahimi 463*22dc650dSSadaf Ebrahimi/[\s[:^ascii:]]/B,ucp 464*22dc650dSSadaf Ebrahimi 465*22dc650dSSadaf Ebrahimi# A special extra option allows excaped surrogate code points in 8-bit mode, 466*22dc650dSSadaf Ebrahimi# but subjects containing them must not be UTF-checked. 467*22dc650dSSadaf Ebrahimi 468*22dc650dSSadaf Ebrahimi/\x{d800}/I,utf,allow_surrogate_escapes 469*22dc650dSSadaf Ebrahimi \x{d800}\=no_utf_check 470*22dc650dSSadaf Ebrahimi 471*22dc650dSSadaf Ebrahimi/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes 472*22dc650dSSadaf Ebrahimi \x{dfff}\x{df01}\=no_utf_check 473*22dc650dSSadaf Ebrahimi 474*22dc650dSSadaf Ebrahimi# This has different starting code units in 8-bit mode. 475*22dc650dSSadaf Ebrahimi 476*22dc650dSSadaf Ebrahimi/^[^ab]/IB,utf 477*22dc650dSSadaf Ebrahimi c 478*22dc650dSSadaf Ebrahimi \x{ff} 479*22dc650dSSadaf Ebrahimi \x{100} 480*22dc650dSSadaf Ebrahimi\= Expect no match 481*22dc650dSSadaf Ebrahimi aaa 482*22dc650dSSadaf Ebrahimi 483*22dc650dSSadaf Ebrahimi# Offsets are different in 8-bit mode. 484*22dc650dSSadaf Ebrahimi 485*22dc650dSSadaf Ebrahimi/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout 486*22dc650dSSadaf Ebrahimi 123abcáyzabcdef789abcሴqr 487*22dc650dSSadaf Ebrahimi 488*22dc650dSSadaf Ebrahimi# Check name length with non-ASCII characters 489*22dc650dSSadaf Ebrahimi 490*22dc650dSSadaf Ebrahimi/(?'ABáC678901234567890123456789012012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf 491*22dc650dSSadaf Ebrahimi 492*22dc650dSSadaf Ebrahimi/(?'ABáC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf 493*22dc650dSSadaf Ebrahimi 494*22dc650dSSadaf Ebrahimi/(?'ABZC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf 495*22dc650dSSadaf Ebrahimi 496*22dc650dSSadaf Ebrahimi/(?(n/utf 497*22dc650dSSadaf Ebrahimi 498*22dc650dSSadaf Ebrahimi/(?(á/utf 499*22dc650dSSadaf Ebrahimi 500*22dc650dSSadaf Ebrahimi# Invalid UTF-8 tests 501*22dc650dSSadaf Ebrahimi 502*22dc650dSSadaf Ebrahimi/.../g,match_invalid_utf 503*22dc650dSSadaf Ebrahimi abcd\x80wxzy\x80pqrs 504*22dc650dSSadaf Ebrahimi abcd\x{80}wxzy\x80pqrs 505*22dc650dSSadaf Ebrahimi 506*22dc650dSSadaf Ebrahimi/abc/match_invalid_utf 507*22dc650dSSadaf Ebrahimi ab\x80ab\=ph 508*22dc650dSSadaf Ebrahimi\= Expect no match 509*22dc650dSSadaf Ebrahimi ab\x80cdef\=ph 510*22dc650dSSadaf Ebrahimi 511*22dc650dSSadaf Ebrahimi/.a/match_invalid_utf 512*22dc650dSSadaf Ebrahimi ab\=ph 513*22dc650dSSadaf Ebrahimi ab\=ps 514*22dc650dSSadaf Ebrahimi b\xf0\x91\x88b\=ph 515*22dc650dSSadaf Ebrahimi b\xf0\x91\x88b\=ps 516*22dc650dSSadaf Ebrahimi b\xf0\x91\x88\xb4a 517*22dc650dSSadaf Ebrahimi\= Expect no match 518*22dc650dSSadaf Ebrahimi b\x80\=ph 519*22dc650dSSadaf Ebrahimi b\x80\=ps 520*22dc650dSSadaf Ebrahimi b\xf0\x91\x88\=ph 521*22dc650dSSadaf Ebrahimi b\xf0\x91\x88\=ps 522*22dc650dSSadaf Ebrahimi 523*22dc650dSSadaf Ebrahimi/.a$/match_invalid_utf 524*22dc650dSSadaf Ebrahimi ab\=ph 525*22dc650dSSadaf Ebrahimi ab\=ps 526*22dc650dSSadaf Ebrahimi\= Expect no match 527*22dc650dSSadaf Ebrahimi b\xf0\x91\x98\=ph 528*22dc650dSSadaf Ebrahimi b\xf0\x91\x98\=ps 529*22dc650dSSadaf Ebrahimi 530*22dc650dSSadaf Ebrahimi/ab$/match_invalid_utf 531*22dc650dSSadaf Ebrahimi ab\x80cdeab 532*22dc650dSSadaf Ebrahimi\= Expect no match 533*22dc650dSSadaf Ebrahimi ab\x80cde 534*22dc650dSSadaf Ebrahimi 535*22dc650dSSadaf Ebrahimi/.../g,match_invalid_utf 536*22dc650dSSadaf Ebrahimi abcd\x{80}wxzy\x80pqrs 537*22dc650dSSadaf Ebrahimi 538*22dc650dSSadaf Ebrahimi/(?<=x)../g,match_invalid_utf 539*22dc650dSSadaf Ebrahimi abcd\x{80}wxzy\x80pqrs 540*22dc650dSSadaf Ebrahimi abcd\x{80}wxzy\x80xpqrs 541*22dc650dSSadaf Ebrahimi 542*22dc650dSSadaf Ebrahimi/X$/match_invalid_utf 543*22dc650dSSadaf Ebrahimi\= Expect no match 544*22dc650dSSadaf Ebrahimi X\xc4 545*22dc650dSSadaf Ebrahimi 546*22dc650dSSadaf Ebrahimi/(?<=..)X/match_invalid_utf,aftertext 547*22dc650dSSadaf Ebrahimi AB\x80AQXYZ 548*22dc650dSSadaf Ebrahimi AB\x80AQXYZ\=offset=5 549*22dc650dSSadaf Ebrahimi AB\x80\x80AXYZXC\=offset=5 550*22dc650dSSadaf Ebrahimi\= Expect no match 551*22dc650dSSadaf Ebrahimi AB\x80XYZ 552*22dc650dSSadaf Ebrahimi AB\x80XYZ\=offset=3 553*22dc650dSSadaf Ebrahimi AB\xfeXYZ 554*22dc650dSSadaf Ebrahimi AB\xffXYZ\=offset=3 555*22dc650dSSadaf Ebrahimi AB\x80AXYZ 556*22dc650dSSadaf Ebrahimi AB\x80AXYZ\=offset=4 557*22dc650dSSadaf Ebrahimi AB\x80\x80AXYZ\=offset=5 558*22dc650dSSadaf Ebrahimi 559*22dc650dSSadaf Ebrahimi/.../match_invalid_utf 560*22dc650dSSadaf Ebrahimi AB\xc4CCC 561*22dc650dSSadaf Ebrahimi\= Expect no match 562*22dc650dSSadaf Ebrahimi A\x{d800}B 563*22dc650dSSadaf Ebrahimi A\x{110000}B 564*22dc650dSSadaf Ebrahimi A\xc4B 565*22dc650dSSadaf Ebrahimi 566*22dc650dSSadaf Ebrahimi/\bX/match_invalid_utf 567*22dc650dSSadaf Ebrahimi A\x80X 568*22dc650dSSadaf Ebrahimi 569*22dc650dSSadaf Ebrahimi/\BX/match_invalid_utf 570*22dc650dSSadaf Ebrahimi\= Expect no match 571*22dc650dSSadaf Ebrahimi A\x80X 572*22dc650dSSadaf Ebrahimi 573*22dc650dSSadaf Ebrahimi/(?<=...)X/match_invalid_utf 574*22dc650dSSadaf Ebrahimi AAA\x80BBBXYZ 575*22dc650dSSadaf Ebrahimi\= Expect no match 576*22dc650dSSadaf Ebrahimi AAA\x80BXYZ 577*22dc650dSSadaf Ebrahimi AAA\x80BBXYZ 578*22dc650dSSadaf Ebrahimi 579*22dc650dSSadaf Ebrahimi# ------------------------------------- 580*22dc650dSSadaf Ebrahimi 581*22dc650dSSadaf Ebrahimi/(*UTF)(?=\x{123})/I 582*22dc650dSSadaf Ebrahimi 583*22dc650dSSadaf Ebrahimi/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf 584*22dc650dSSadaf Ebrahimi 585*22dc650dSSadaf Ebrahimi/[,]/BI,utf 586*22dc650dSSadaf Ebrahimi 587*22dc650dSSadaf Ebrahimi/[\x{fff4}-\x{ffff8}]/I,utf 588*22dc650dSSadaf Ebrahimi 589*22dc650dSSadaf Ebrahimi/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf 590*22dc650dSSadaf Ebrahimi 591*22dc650dSSadaf Ebrahimi/[\xff\x{ffff}]/I,utf 592*22dc650dSSadaf Ebrahimi 593*22dc650dSSadaf Ebrahimi/[\xff\x{ff}]/I,utf 594*22dc650dSSadaf Ebrahimi abc\x{ff}def 595*22dc650dSSadaf Ebrahimi 596*22dc650dSSadaf Ebrahimi/[\xff\x{ff}]/I 597*22dc650dSSadaf Ebrahimi abc\x{ff}def 598*22dc650dSSadaf Ebrahimi 599*22dc650dSSadaf Ebrahimi/[Ss]/I 600*22dc650dSSadaf Ebrahimi 601*22dc650dSSadaf Ebrahimi/[Ss]/I,utf 602*22dc650dSSadaf Ebrahimi 603*22dc650dSSadaf Ebrahimi/(?:\x{ff}|\x{3000})/I,utf 604*22dc650dSSadaf Ebrahimi 605*22dc650dSSadaf Ebrahimi/x/utf 606*22dc650dSSadaf Ebrahimi abxyz 607*22dc650dSSadaf Ebrahimi \x80\=startchar 608*22dc650dSSadaf Ebrahimi abc\x80\=startchar 609*22dc650dSSadaf Ebrahimi abc\x80\=startchar,offset=3 610*22dc650dSSadaf Ebrahimi 611*22dc650dSSadaf Ebrahimi/\x{c1}+\x{e1}/iIB,ucp 612*22dc650dSSadaf Ebrahimi \x{c1}\x{c1}\x{c1} 613*22dc650dSSadaf Ebrahimi \x{e1}\x{e1}\x{e1} 614*22dc650dSSadaf Ebrahimi 615*22dc650dSSadaf Ebrahimi/a|\x{c1}/iI,ucp 616*22dc650dSSadaf Ebrahimi \x{e1}xxx 617*22dc650dSSadaf Ebrahimi 618*22dc650dSSadaf Ebrahimi/a|\x{c1}/iI,utf 619*22dc650dSSadaf Ebrahimi \x{e1}xxx 620*22dc650dSSadaf Ebrahimi 621*22dc650dSSadaf Ebrahimi/\x{c1}|\x{e1}/iI,ucp 622*22dc650dSSadaf Ebrahimi 623*22dc650dSSadaf Ebrahimi/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended 624*22dc650dSSadaf Ebrahimi X\x{e1}Y 625*22dc650dSSadaf Ebrahimi 626*22dc650dSSadaf Ebrahimi/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended 627*22dc650dSSadaf Ebrahimi X\x{c1}Y 628*22dc650dSSadaf Ebrahimi 629*22dc650dSSadaf Ebrahimi# Without UTF or UCP characters > 127 have only one case in the default locale. 630*22dc650dSSadaf Ebrahimi 631*22dc650dSSadaf Ebrahimi/X(\x{e1})Y/replace=>\U$1<,substitute_extended 632*22dc650dSSadaf Ebrahimi X\x{e1}Y 633*22dc650dSSadaf Ebrahimi 634*22dc650dSSadaf Ebrahimi/A/utf,match_invalid_utf,caseless 635*22dc650dSSadaf Ebrahimi \xe5A 636*22dc650dSSadaf Ebrahimi 637*22dc650dSSadaf Ebrahimi/\bch\b/utf,match_invalid_utf 638*22dc650dSSadaf Ebrahimi qchq\=ph 639*22dc650dSSadaf Ebrahimi qchq\=ps 640*22dc650dSSadaf Ebrahimi 641*22dc650dSSadaf Ebrahimi/line1\nbreak/firstline,utf,match_invalid_utf 642*22dc650dSSadaf Ebrahimi line1\nbreak 643*22dc650dSSadaf Ebrahimi line0\nline1\nbreak 644*22dc650dSSadaf Ebrahimi 645*22dc650dSSadaf Ebrahimi/A\z/utf,match_invalid_utf 646*22dc650dSSadaf Ebrahimi A\x80\x42\n 647*22dc650dSSadaf Ebrahimi 648*22dc650dSSadaf Ebrahimi# End of testinput10 649