1# This set of tests checks the API, internals, and non-Perl stuff for UTF 2# support, including Unicode properties. However, tests that give different 3# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and 4# 12). 5 6#newline_default lf any anycrlf 7 8# PCRE2 and Perl disagree about the characteristics of certain Unicode 9# characters. For example, 061C was considered by Perl to be Arabic, though 10# it was not listed as such in the Unicode Scripts.txt file for Unicode 8. 11# However, it *is* in that file for Unicode 10, but when I came to re-check, 12# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic. 13 14# 2066-2069 are graphic and printable according to Perl, though they are 15# actually "isolate" control characters. That is why the following tests are 16# here rather than in test 4. 17 18/^[\p{Arabic}]/utf 19 \x{061c} 20 21/^[[:graph:]]+$/utf,ucp 22\= Expect no match 23 \x{61c} 24 \x{2066} 25 \x{2067} 26 \x{2068} 27 \x{2069} 28 29/^[[:print:]]+$/utf,ucp 30\= Expect no match 31 \x{61c} 32 \x{2066} 33 \x{2067} 34 \x{2068} 35 \x{2069} 36 37/^[[:^graph:]]+$/utf,ucp 38 \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680} 39 \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} 40 41/^[[:^print:]]+$/utf,ucp 42 \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} 43 \x{2068}\x{2069} 44 45# Perl does not consider U+180e to be a space character. It is true that it 46# does not appear in the Unicode PropList.txt file as such, but in many other 47# sources it is listed as a space, and has been treated as such in PCRE for 48# a long time. 49 50/^>[[:blank:]]*/utf,ucp 51 >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 52 53/^A\s+Z/utf,ucp 54 A\x{85}\x{180e}\x{2005}Z 55 56/^A[\s]+Z/utf,ucp 57 A\x{2005}Z 58 A\x{85}\x{2005}Z 59 60/^[[:graph:]]+$/utf,ucp 61\= Expect no match 62 \x{180e} 63 64/^[[:print:]]+$/utf,ucp 65 \x{180e} 66 67/^[[:^graph:]]+$/utf,ucp 68 \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} 69 70/^[[:^print:]]+$/utf,ucp 71\= Expect no match 72 \x{180e} 73 74# End of U+180E tests. 75 76# --------------------------------------------------------------------- 77 78/\x{110000}/IB,utf 79 80/\o{4200000}/IB,utf 81 82/\x{ffffffff}/utf 83 84/\o{37777777777}/utf 85 86/\x{100000000}/utf 87 88/\o{77777777777}/utf 89 90/\x{d800}/utf 91 92/\o{154000}/utf 93 94/\x{dfff}/utf 95 96/\o{157777}/utf 97 98/\x{d7ff}/utf 99 100/\o{153777}/utf 101 102/\x{e000}/utf 103 104/\o{170000}/utf 105 106/^\x{100}a\x{1234}/utf 107 \x{100}a\x{1234}bcd 108 109/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf 110 \x{0041}\x{2262}\x{0391}\x{002e} 111 112/.{3,5}X/IB,utf 113 \x{212ab}\x{212ab}\x{212ab}\x{861}X 114 115/.{3,5}?/IB,utf 116 \x{212ab}\x{212ab}\x{212ab}\x{861} 117 118/^[ab]/IB,utf 119 bar 120\= Expect no match 121 c 122 \x{ff} 123 \x{100} 124 125/\x{100}*(\d+|"(?1)")/utf 126 1234 127 "1234" 128 \x{100}1234 129 "\x{100}1234" 130 \x{100}\x{100}12ab 131 \x{100}\x{100}"12" 132\= Expect no match 133 \x{100}\x{100}abcd 134 135/\x{100}*/IB,utf 136 137/a\x{100}*/IB,utf 138 139/ab\x{100}*/IB,utf 140 141/[\x{200}-\x{100}]/utf 142 143/[Ā-Ą]/utf 144 \x{100} 145 \x{104} 146\= Expect no match 147 \x{105} 148 \x{ff} 149 150/[\xFF]/IB 151 >\xff< 152 153/[^\xFF]/IB 154 155/[Ä-Ü]/utf 156 Ö # Matches without Study 157 \x{d6} 158 159/[Ä-Ü]/utf 160 Ö <-- Same with Study 161 \x{d6} 162 163/[\x{c4}-\x{dc}]/utf 164 Ö # Matches without Study 165 \x{d6} 166 167/[\x{c4}-\x{dc}]/utf 168 Ö <-- Same with Study 169 \x{d6} 170 171/[^\x{100}]abc(xyz(?1))/IB,utf 172 173/(\x{100}(b(?2)c))?/IB,utf 174 175/(\x{100}(b(?2)c)){0,2}/IB,utf 176 177/(\x{100}(b(?1)c))?/IB,utf 178 179/(\x{100}(b(?1)c)){0,2}/IB,utf 180 181/\W/utf 182 A.B 183 A\x{100}B 184 185/\w/utf 186 \x{100}X 187 188# Use no_start_optimize because the first code unit is different in 8-bit from 189# the wider modes. 190 191/^\ሴ/IB,utf,no_start_optimize 192 193/()()()()()()()()()() 194 ()()()()()()()()()() 195 ()()()()()()()()()() 196 ()()()()()()()()()() 197 A (x) (?41) B/x,utf 198 AxxB 199 200/^[\x{100}\E-\Q\E\x{150}]/B,utf 201 202/^[\QĀ\E-\QŐ\E]/B,utf 203 204/^abc./gmx,newline=any,utf 205 abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK 206 207/abc.$/gmx,newline=any,utf 208 abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 209 210/^a\Rb/bsr=unicode,utf 211 a\nb 212 a\rb 213 a\r\nb 214 a\x0bb 215 a\x0cb 216 a\x{85}b 217 a\x{2028}b 218 a\x{2029}b 219\= Expect no match 220 a\n\rb 221 222/^a\R*b/bsr=unicode,utf 223 ab 224 a\nb 225 a\rb 226 a\r\nb 227 a\x0bb 228 a\x0c\x{2028}\x{2029}b 229 a\x{85}b 230 a\n\rb 231 a\n\r\x{85}\x0cb 232 233/^a\R+b/bsr=unicode,utf 234 a\nb 235 a\rb 236 a\r\nb 237 a\x0bb 238 a\x0c\x{2028}\x{2029}b 239 a\x{85}b 240 a\n\rb 241 a\n\r\x{85}\x0cb 242\= Expect no match 243 ab 244 245/^a\R{1,3}b/bsr=unicode,utf 246 a\nb 247 a\n\rb 248 a\n\r\x{85}b 249 a\r\n\r\nb 250 a\r\n\r\n\r\nb 251 a\n\r\n\rb 252 a\n\n\r\nb 253\= Expect no match 254 a\n\n\n\rb 255 a\r 256 257/\H\h\V\v/utf 258 X X\x0a 259 X\x09X\x0b 260\= Expect no match 261 \x{a0} X\x0a 262 263/\H*\h+\V?\v{3,4}/utf 264 \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a 265 \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a 266 \x09\x20\x{a0}\x0a\x0b\x0c 267\= Expect no match 268 \x09\x20\x{a0}\x0a\x0b 269 270/\H\h\V\v/utf 271 \x{3001}\x{3000}\x{2030}\x{2028} 272 X\x{180e}X\x{85} 273\= Expect no match 274 \x{2009} X\x0a 275 276/\H*\h+\V?\v{3,4}/utf 277 \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a 278 \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a 279 \x09\x20\x{202f}\x0a\x0b\x0c 280\= Expect no match 281 \x09\x{200a}\x{a0}\x{2028}\x0b 282 283/[\h]/B,utf 284 >\x{1680} 285 286/[\h]{3,}/B,utf 287 >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< 288 289/[\v]/B,utf 290 291/[\H]/B,utf 292 293/[\V]/B,utf 294 295/.*$/newline=any,utf 296 \x{1ec5} 297 298/a\Rb/I,bsr=anycrlf,utf 299 a\rb 300 a\nb 301 a\r\nb 302\= Expect no match 303 a\x{85}b 304 a\x0bb 305 306/a\Rb/I,bsr=unicode,utf 307 a\rb 308 a\nb 309 a\r\nb 310 a\x{85}b 311 a\x0bb 312 313/a\R?b/I,bsr=anycrlf,utf 314 a\rb 315 a\nb 316 a\r\nb 317\= Expect no match 318 a\x{85}b 319 a\x0bb 320 321/a\R?b/I,bsr=unicode,utf 322 a\rb 323 a\nb 324 a\r\nb 325 a\x{85}b 326 a\x0bb 327 328/.*a.*=.b.*/utf,newline=any 329 QQQ\x{2029}ABCaXYZ=!bPQR 330\= Expect no match 331 a\x{2029}b 332 \x61\xe2\x80\xa9\x62 333 334/[[:a\x{100}b:]]/utf 335 336/a[^]b/utf,allow_empty_class,match_unset_backref 337 a\x{1234}b 338 a\nb 339\= Expect no match 340 ab 341 342/a[^]+b/utf,allow_empty_class,match_unset_backref 343 aXb 344 a\nX\nX\x{1234}b 345\= Expect no match 346 ab 347 348/(\x{de})\1/ 349 \x{de}\x{de} 350 351/X/newline=any,utf,firstline 352 A\x{1ec5}ABCXYZ 353 354/Xa{2,4}b/utf 355 X\=ps 356 Xa\=ps 357 Xaa\=ps 358 Xaaa\=ps 359 Xaaaa\=ps 360 361/Xa{2,4}?b/utf 362 X\=ps 363 Xa\=ps 364 Xaa\=ps 365 Xaaa\=ps 366 Xaaaa\=ps 367 368/Xa{2,4}+b/utf 369 X\=ps 370 Xa\=ps 371 Xaa\=ps 372 Xaaa\=ps 373 Xaaaa\=ps 374 375/X\x{123}{2,4}b/utf 376 X\=ps 377 X\x{123}\=ps 378 X\x{123}\x{123}\=ps 379 X\x{123}\x{123}\x{123}\=ps 380 X\x{123}\x{123}\x{123}\x{123}\=ps 381 382/X\x{123}{2,4}?b/utf 383 X\=ps 384 X\x{123}\=ps 385 X\x{123}\x{123}\=ps 386 X\x{123}\x{123}\x{123}\=ps 387 X\x{123}\x{123}\x{123}\x{123}\=ps 388 389/X\x{123}{2,4}+b/utf 390 X\=ps 391 X\x{123}\=ps 392 X\x{123}\x{123}\=ps 393 X\x{123}\x{123}\x{123}\=ps 394 X\x{123}\x{123}\x{123}\x{123}\=ps 395 396/X\x{123}{2,4}b/utf 397\= Expect no match 398 Xx\=ps 399 X\x{123}x\=ps 400 X\x{123}\x{123}x\=ps 401 X\x{123}\x{123}\x{123}x\=ps 402 X\x{123}\x{123}\x{123}\x{123}x\=ps 403 404/X\x{123}{2,4}?b/utf 405\= Expect no match 406 Xx\=ps 407 X\x{123}x\=ps 408 X\x{123}\x{123}x\=ps 409 X\x{123}\x{123}\x{123}x\=ps 410 X\x{123}\x{123}\x{123}\x{123}x\=ps 411 412/X\x{123}{2,4}+b/utf 413\= Expect no match 414 Xx\=ps 415 X\x{123}x\=ps 416 X\x{123}\x{123}x\=ps 417 X\x{123}\x{123}\x{123}x\=ps 418 X\x{123}\x{123}\x{123}\x{123}x\=ps 419 420/X\d{2,4}b/utf 421 X\=ps 422 X3\=ps 423 X33\=ps 424 X333\=ps 425 X3333\=ps 426 427/X\d{2,4}?b/utf 428 X\=ps 429 X3\=ps 430 X33\=ps 431 X333\=ps 432 X3333\=ps 433 434/X\d{2,4}+b/utf 435 X\=ps 436 X3\=ps 437 X33\=ps 438 X333\=ps 439 X3333\=ps 440 441/X\D{2,4}b/utf 442 X\=ps 443 Xa\=ps 444 Xaa\=ps 445 Xaaa\=ps 446 Xaaaa\=ps 447 448/X\D{2,4}?b/utf 449 X\=ps 450 Xa\=ps 451 Xaa\=ps 452 Xaaa\=ps 453 Xaaaa\=ps 454 455/X\D{2,4}+b/utf 456 X\=ps 457 Xa\=ps 458 Xaa\=ps 459 Xaaa\=ps 460 Xaaaa\=ps 461 462/X\D{2,4}b/utf 463 X\=ps 464 X\x{123}\=ps 465 X\x{123}\x{123}\=ps 466 X\x{123}\x{123}\x{123}\=ps 467 X\x{123}\x{123}\x{123}\x{123}\=ps 468 469/X\D{2,4}?b/utf 470 X\=ps 471 X\x{123}\=ps 472 X\x{123}\x{123}\=ps 473 X\x{123}\x{123}\x{123}\=ps 474 X\x{123}\x{123}\x{123}\x{123}\=ps 475 476/X\D{2,4}+b/utf 477 X\=ps 478 X\x{123}\=ps 479 X\x{123}\x{123}\=ps 480 X\x{123}\x{123}\x{123}\=ps 481 X\x{123}\x{123}\x{123}\x{123}\=ps 482 483/X[abc]{2,4}b/utf 484 X\=ps 485 Xa\=ps 486 Xaa\=ps 487 Xaaa\=ps 488 Xaaaa\=ps 489 490/X[abc]{2,4}?b/utf 491 X\=ps 492 Xa\=ps 493 Xaa\=ps 494 Xaaa\=ps 495 Xaaaa\=ps 496 497/X[abc]{2,4}+b/utf 498 X\=ps 499 Xa\=ps 500 Xaa\=ps 501 Xaaa\=ps 502 Xaaaa\=ps 503 504/X[abc\x{123}]{2,4}b/utf 505 X\=ps 506 X\x{123}\=ps 507 X\x{123}\x{123}\=ps 508 X\x{123}\x{123}\x{123}\=ps 509 X\x{123}\x{123}\x{123}\x{123}\=ps 510 511/X[abc\x{123}]{2,4}?b/utf 512 X\=ps 513 X\x{123}\=ps 514 X\x{123}\x{123}\=ps 515 X\x{123}\x{123}\x{123}\=ps 516 X\x{123}\x{123}\x{123}\x{123}\=ps 517 518/X[abc\x{123}]{2,4}+b/utf 519 X\=ps 520 X\x{123}\=ps 521 X\x{123}\x{123}\=ps 522 X\x{123}\x{123}\x{123}\=ps 523 X\x{123}\x{123}\x{123}\x{123}\=ps 524 525/X[^a]{2,4}b/utf 526 X\=ps 527 Xz\=ps 528 Xzz\=ps 529 Xzzz\=ps 530 Xzzzz\=ps 531 532/X[^a]{2,4}?b/utf 533 X\=ps 534 Xz\=ps 535 Xzz\=ps 536 Xzzz\=ps 537 Xzzzz\=ps 538 539/X[^a]{2,4}+b/utf 540 X\=ps 541 Xz\=ps 542 Xzz\=ps 543 Xzzz\=ps 544 Xzzzz\=ps 545 546/X[^a]{2,4}b/utf 547 X\=ps 548 X\x{123}\=ps 549 X\x{123}\x{123}\=ps 550 X\x{123}\x{123}\x{123}\=ps 551 X\x{123}\x{123}\x{123}\x{123}\=ps 552 553/X[^a]{2,4}?b/utf 554 X\=ps 555 X\x{123}\=ps 556 X\x{123}\x{123}\=ps 557 X\x{123}\x{123}\x{123}\=ps 558 X\x{123}\x{123}\x{123}\x{123}\=ps 559 560/X[^a]{2,4}+b/utf 561 X\=ps 562 X\x{123}\=ps 563 X\x{123}\x{123}\=ps 564 X\x{123}\x{123}\x{123}\=ps 565 X\x{123}\x{123}\x{123}\x{123}\=ps 566 567/(Y)X\1{2,4}b/utf 568 YX\=ps 569 YXY\=ps 570 YXYY\=ps 571 YXYYY\=ps 572 YXYYYY\=ps 573 574/(Y)X\1{2,4}?b/utf 575 YX\=ps 576 YXY\=ps 577 YXYY\=ps 578 YXYYY\=ps 579 YXYYYY\=ps 580 581/(Y)X\1{2,4}+b/utf 582 YX\=ps 583 YXY\=ps 584 YXYY\=ps 585 YXYYY\=ps 586 YXYYYY\=ps 587 588/(\x{123})X\1{2,4}b/utf 589 \x{123}X\=ps 590 \x{123}X\x{123}\=ps 591 \x{123}X\x{123}\x{123}\=ps 592 \x{123}X\x{123}\x{123}\x{123}\=ps 593 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps 594 595/(\x{123})X\1{2,4}?b/utf 596 \x{123}X\=ps 597 \x{123}X\x{123}\=ps 598 \x{123}X\x{123}\x{123}\=ps 599 \x{123}X\x{123}\x{123}\x{123}\=ps 600 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps 601 602/(\x{123})X\1{2,4}+b/utf 603 \x{123}X\=ps 604 \x{123}X\x{123}\=ps 605 \x{123}X\x{123}\x{123}\=ps 606 \x{123}X\x{123}\x{123}\x{123}\=ps 607 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps 608 609/\bthe cat\b/utf 610 the cat\=ps 611 the cat\=ph 612 613/abcd*/utf 614 xxxxabcd\=ps 615 xxxxabcd\=ph 616 617/abcd*/i,utf 618 xxxxabcd\=ps 619 xxxxabcd\=ph 620 XXXXABCD\=ps 621 XXXXABCD\=ph 622 623/abc\d*/utf 624 xxxxabc1\=ps 625 xxxxabc1\=ph 626 627/(a)bc\1*/utf 628 xxxxabca\=ps 629 xxxxabca\=ph 630 631/abc[de]*/utf 632 xxxxabcde\=ps 633 xxxxabcde\=ph 634 635/X\W{3}X/utf 636 X\=ps 637 638/\sxxx\s/utf,tables=2 639 AB\x{85}xxx\x{a0}XYZ 640 AB\x{a0}xxx\x{85}XYZ 641 642/\S \S/utf,tables=2 643 \x{a2} \x{84} 644 645'A#хц'Bx,newline=any,utf 646 647'A#хц 648 PQ'Bx,newline=any,utf 649 650/a+#хaa 651 z#XX?/Bx,newline=any,utf 652 653/a+#хaa 654 z#х?/Bx,newline=any,utf 655 656/\g{A}xxx#bXX(?'A'123) 657(?'A'456)/Bx,newline=any,utf 658 659/\g{A}xxx#bх(?'A'123) 660(?'A'456)/Bx,newline=any,utf 661 662/^\cģ/utf 663 664/(\R*)(.)/s,utf 665 \r\n 666 \r\r\n\n\r 667 \r\r\n\n\r\n 668 669/(\R)*(.)/s,utf 670 \r\n 671 \r\r\n\n\r 672 \r\r\n\n\r\n 673 674/[^\x{1234}]+/Ii,utf 675 676/[^\x{1234}]+?/Ii,utf 677 678/[^\x{1234}]++/Ii,utf 679 680/[^\x{1234}]{2}/Ii,utf 681 682/f.*/ 683 for\=ph 684 685/f.*/s 686 for\=ph 687 688/f.*/utf 689 for\=ph 690 691/f.*/s,utf 692 for\=ph 693 694/\x{d7ff}\x{e000}/utf 695 696/\x{d800}/utf 697 698/\x{dfff}/utf 699 700/\h+/utf 701 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 702 \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 703 704/[\h\x{e000}]+/B,utf 705 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 706 \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 707 708/\H+/utf 709 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 710 \x{2000}\x{200a}\x{1fff}\x{200b} 711 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 712 \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 713 714/[\H\x{d7ff}]+/B,utf 715 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 716 \x{2000}\x{200a}\x{1fff}\x{200b} 717 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 718 \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 719 720/\v+/utf 721 \x{2027}\x{2030}\x{2028}\x{2029} 722 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 723 724/[\v\x{e000}]+/B,utf 725 \x{2027}\x{2030}\x{2028}\x{2029} 726 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 727 728/\V+/utf 729 \x{2028}\x{2029}\x{2027}\x{2030} 730 \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 731 732/[\V\x{d7ff}]+/B,utf 733 \x{2028}\x{2029}\x{2027}\x{2030} 734 \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 735 736/\R+/bsr=unicode,utf 737 \x{2027}\x{2030}\x{2028}\x{2029} 738 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 739 740/(..)\1/utf 741 ab\=ps 742 aba\=ps 743 abab\=ps 744 745/(..)\1/i,utf 746 ab\=ps 747 abA\=ps 748 aBAb\=ps 749 750/(..)\1{2,}/utf 751 ab\=ps 752 aba\=ps 753 abab\=ps 754 ababa\=ps 755 ababab\=ps 756 ababab\=ph 757 abababa\=ps 758 abababa\=ph 759 760/(..)\1{2,}/i,utf 761 ab\=ps 762 aBa\=ps 763 aBAb\=ps 764 AbaBA\=ps 765 abABAb\=ps 766 aBAbaB\=ph 767 abABabA\=ps 768 abaBABa\=ph 769 770/(..)\1{2,}?x/i,utf 771 ab\=ps 772 abA\=ps 773 aBAb\=ps 774 abaBA\=ps 775 abAbaB\=ps 776 abaBabA\=ps 777 abAbABaBx\=ps 778 779/./utf,newline=crlf 780 \r\=ps 781 \r\=ph 782 783/.{2,3}/utf,newline=crlf 784 \r\=ps 785 \r\=ph 786 \r\r\=ps 787 \r\r\=ph 788 \r\r\r\=ps 789 \r\r\r\=ph 790 791/.{2,3}?/utf,newline=crlf 792 \r\=ps 793 \r\=ph 794 \r\r\=ps 795 \r\r\=ph 796 \r\r\r\=ps 797 \r\r\r\=ph 798 799/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf 800 801/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf 802 803/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf 804 805/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf 806 807/(?<=\x{1234}\x{1234})\bxy/I,utf 808 809/(?<!^)ETA/utf 810\= Expect no match 811 ETA 812 813/\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref 814 815/[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref 816 817/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref 818 819/^\u{0000000000010ffff}/utf,extra_alt_bsux 820 \x{10ffff} 821 822/\u{ 1bb1}/utf,extra_alt_bsux 823 u{ 1bb1} 824\= Expect no match 825 \x{1bb1} 826 827/\u/utf,alt_bsux 828 \\u 829 830/^a+[a\x{200}]/B,utf 831 aa 832 833/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf 834 835/[\p{L}]/IB 836 837/[\p{^L}]/IB 838 839/[\P{L}]/IB 840 841/[\P{^L}]/IB 842 843/[abc\p{L}\x{0660}]/IB,utf 844 845/[\p{Nd}]/IB,utf 846 1234 847 848/[\p{Nd}+-]+/IB,utf 849 1234 850 12-34 851 12+\x{661}-34 852\= Expect no match 853 abcd 854 855/(?:[\PPa*]*){8,}/ 856 857/[\P{Any}]/B 858 859/[\P{Any}\E]/B 860 861/(\P{Yi}+\277)/ 862 863/(\P{Yi}+\277)?/ 864 865/(?<=\P{Yi}{3}A)X/ 866 867/\p{Yi}+(\P{Yi}+)(?1)/ 868 869/(\P{Yi}{2}\277)?/ 870 871/[\P{Yi}A]/ 872 873/[\P{Yi}\P{Yi}\P{Yi}A]/ 874 875/[^\P{Yi}A]/ 876 877/[^\P{Yi}\P{Yi}\P{Yi}A]/ 878 879/(\P{Yi}*\277)*/ 880 881/(\P{Yi}*?\277)*/ 882 883/(\p{Yi}*+\277)*/ 884 885/(\P{Yi}?\277)*/ 886 887/(\P{Yi}??\277)*/ 888 889/(\p{Yi}?+\277)*/ 890 891/(\P{Yi}{0,3}\277)*/ 892 893/(\P{Yi}{0,3}?\277)*/ 894 895/(\p{Yi}{0,3}+\277)*/ 896 897/\p{Zl}{2,3}+/B,utf 898 899 \x{2028}\x{2028}\x{2028} 900 901/\p{Zl}/B,utf 902 903/\p{Lu}{3}+/B,utf 904 905/\pL{2}+/B,utf 906 907/\p{Cc}{2}+/B,utf 908 909/^\p{Cf}/utf 910 \x{180e} 911 \x{061c} 912 \x{2066} 913 \x{2067} 914 \x{2068} 915 \x{2069} 916 917/^\p{Cs}/utf 918 \x{dfff}\=no_utf_check 919\= Expect no match 920 \x{09f} 921 922/^\p{Mn}/utf 923 \x{1a1b} 924 925/^\p{Pe}/utf 926 \x{2309} 927 \x{230b} 928 929/^\p{Ps}/utf 930 \x{2308} 931 \x{230a} 932 933/^\p{Sc}+/utf 934 $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} 935 \x{9f2} 936\= Expect no match 937 X 938 \x{2c2} 939 940/^\p{Zs}/utf 941 \ \ 942 \x{a0} 943 \x{1680} 944 \x{2000} 945 \x{2001} 946\= Expect no match 947 \x{2028} 948 \x{200d} 949 950# These are here because Perl has problems with the negative versions of the 951# properties and has changed how it behaves for caseless matching. 952 953/\p{^Lu}/i,utf 954 1234 955\= Expect no match 956 ABC 957 958/\P{Lu}/i,utf 959 1234 960\= Expect no match 961 ABC 962 963/\p{Ll}/i,utf 964 a 965 Az 966\= Expect no match 967 ABC 968 969/\p{Lu}/i,utf 970 A 971 a\x{10a0}B 972\= Expect no match 973 a 974 \x{1d00} 975 976/\p{Lu}/i,utf 977 A 978 aZ 979\= Expect no match 980 abc 981 982/[\x{c0}\x{391}]/i,utf 983 \x{c0} 984 \x{e0} 985 986# The next two are special cases where the lengths of the different cases of 987# the same character differ. The first went wrong with heap frame storage; the 988# second was broken in all cases. 989 990/^\x{023a}+?(\x{0130}+)/i,utf 991 \x{023a}\x{2c65}\x{0130} 992 993/^\x{023a}+([^X])/i,utf 994 \x{023a}\x{2c65}X 995 996/\x{c0}+\x{116}+/i,utf 997 \x{c0}\x{e0}\x{116}\x{117} 998 999/[\x{c0}\x{116}]+/i,utf 1000 \x{c0}\x{e0}\x{116}\x{117} 1001 1002/(\x{de})\1/i,utf 1003 \x{de}\x{de} 1004 \x{de}\x{fe} 1005 \x{fe}\x{fe} 1006 \x{fe}\x{de} 1007 1008/^\x{c0}$/i,utf 1009 \x{c0} 1010 \x{e0} 1011 1012/^\x{e0}$/i,utf 1013 \x{c0} 1014 \x{e0} 1015 1016# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE 1017# will match it only with UCP support, because without that it has no notion 1018# of case for anything other than the ASCII letters. 1019 1020/((?i)[\x{c0}])/utf 1021 \x{c0} 1022 \x{e0} 1023 1024/(?i:[\x{c0}])/utf 1025 \x{c0} 1026 \x{e0} 1027 1028# These are PCRE's extra properties to help with Unicodizing \d etc. 1029 1030/^\p{Xan}/utf 1031 ABCD 1032 1234 1033 \x{6ca} 1034 \x{a6c} 1035 \x{10a7} 1036\= Expect no match 1037 _ABC 1038 1039/^\p{Xan}+/utf 1040 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1041\= Expect no match 1042 _ABC 1043 1044/^\p{Xan}+?/utf 1045 \x{6ca}\x{a6c}\x{10a7}_ 1046 1047/^\p{Xan}*/utf 1048 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1049 1050/^\p{Xan}{2,9}/utf 1051 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1052 1053/^\p{Xan}{2,9}?/utf 1054 \x{6ca}\x{a6c}\x{10a7}_ 1055 1056/^[\p{Xan}]/utf 1057 ABCD1234_ 1058 1234abcd_ 1059 \x{6ca} 1060 \x{a6c} 1061 \x{10a7} 1062\= Expect no match 1063 _ABC 1064 1065/^[\p{Xan}]+/utf 1066 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1067\= Expect no match 1068 _ABC 1069 1070/^>\p{Xsp}/utf 1071 >\x{1680}\x{2028}\x{0b} 1072 >\x{a0} 1073\= Expect no match 1074 \x{0b} 1075 1076/^>\p{Xsp}+/utf 1077 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1078 1079/^>\p{Xsp}+?/utf 1080 >\x{1680}\x{2028}\x{0b} 1081 1082/^>\p{Xsp}*/utf 1083 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1084 1085/^>\p{Xsp}{2,9}/utf 1086 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1087 1088/^>\p{Xsp}{2,9}?/utf 1089 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1090 1091/^>[\p{Xsp}]/utf 1092 >\x{2028}\x{0b} 1093 1094/^>[\p{Xsp}]+/utf 1095 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1096 1097/^>\p{Xps}/utf 1098 >\x{1680}\x{2028}\x{0b} 1099 >\x{a0} 1100\= Expect no match 1101 \x{0b} 1102 1103/^>\p{Xps}+/utf 1104 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1105 1106/^>\p{Xps}+?/utf 1107 >\x{1680}\x{2028}\x{0b} 1108 1109/^>\p{Xps}*/utf 1110 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1111 1112/^>\p{Xps}{2,9}/utf 1113 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1114 1115/^>\p{Xps}{2,9}?/utf 1116 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1117 1118/^>[\p{Xps}]/utf 1119 >\x{2028}\x{0b} 1120 1121/^>[\p{Xps}]+/utf 1122 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1123 1124/^\p{Xwd}/utf 1125 ABCD 1126 1234 1127 \x{6ca} 1128 \x{a6c} 1129 \x{10a7} 1130 _ABC 1131\= Expect no match 1132 [] 1133 1134/^\p{Xwd}+/utf 1135 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1136 1137/^\p{Xwd}+?/utf 1138 \x{6ca}\x{a6c}\x{10a7}_ 1139 1140/^\p{Xwd}*/utf 1141 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1142 1143/^\p{Xwd}{2,9}/utf 1144 A_B12\x{6ca}\x{a6c}\x{10a7} 1145 1146/^\p{Xwd}{2,9}?/utf 1147 \x{6ca}\x{a6c}\x{10a7}_ 1148 1149/^[\p{Xwd}]/utf 1150 ABCD1234_ 1151 1234abcd_ 1152 \x{6ca} 1153 \x{a6c} 1154 \x{10a7} 1155 _ABC 1156\= Expect no match 1157 [] 1158 1159/^[\p{Xwd}]+/utf 1160 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1161 1162# A check not in UTF-8 mode 1163 1164/^[\p{Xwd}]+/ 1165 ABCD1234_ 1166 1167# Some negative checks 1168 1169/^[\P{Xwd}]+/utf 1170 !.+\x{019}\x{482}AB 1171 1172/^[\p{^Xwd}]+/utf 1173 !.+\x{019}\x{589}AB 1174 1175/[\D]/B,utf,ucp 1176 1\x{3c8}2 1177 1178/[\d]/B,utf,ucp 1179 >\x{6f4}< 1180 1181/[\S]/B,utf,ucp 1182 \x{1680}\x{6f4}\x{1680} 1183 1184/[\s]/B,utf,ucp 1185 >\x{1680}< 1186 1187/[\W]/B,utf,ucp 1188 A\x{1735}B 1189 1190/[\w]/B,utf,ucp 1191 >\x{1723}< 1192 1193/\D/B,utf,ucp 1194 1\x{3c8}2 1195 1196/\d/B,utf,ucp 1197 >\x{6f4}< 1198 1199/\S/B,utf,ucp 1200 \x{1680}\x{6f4}\x{1680} 1201 1202/\s/B,utf,ucp 1203 >\x{1680}> 1204 1205/\W/B,utf,ucp 1206 A\x{1735}B 1207 1208/\w/B,utf,ucp 1209 >\x{1723}< 1210 1211/[[:alpha:]]/B,ucp 1212 1213/[[:lower:]]/B,ucp 1214 1215/[[:upper:]]/B,ucp 1216 1217/[[:alnum:]]/B,ucp 1218 1219/[[:ascii:]]/B,ucp 1220 1221/[[:cntrl:]]/B,ucp 1222 1223/[[:digit:]]/B,ucp 1224 1225/[[:digit:]]/B,ucp,ascii_digit 1226 1227/[[:graph:]]/B,ucp 1228 1229/[[:print:]]/B,ucp 1230 1231/[[:punct:]]/B,ucp 1232 1233/[[:space:]]/B,ucp 1234 1235/[[:word:]]/B,ucp 1236 1237/[[:xdigit:]]/B,ucp 1238 1239/[[:xdigit:]]/B,ucp,ascii_digit 1240 1241# Unicode properties for \b and \B 1242 1243/\b...\B/utf,ucp 1244 abc_ 1245 \x{37e}abc\x{376} 1246 \x{37e}\x{376}\x{371}\x{393}\x{394} 1247 !\x{c0}++\x{c1}\x{c2} 1248 !\x{c0}+++++ 1249 1250# Without PCRE_UCP, non-ASCII always fail, even if < 256 1251 1252/\b...\B/utf 1253 abc_ 1254\= Expect no match 1255 \x{37e}abc\x{376} 1256 \x{37e}\x{376}\x{371}\x{393}\x{394} 1257 !\x{c0}++\x{c1}\x{c2} 1258 !\x{c0}+++++ 1259 1260# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties 1261 1262/\b...\B/ucp 1263 abc_ 1264 !\x{c0}++\x{c1}\x{c2} 1265 !\x{c0}+++++ 1266 1267# Some of these are silly, but they check various combinations 1268 1269/[[:^alpha:][:^cntrl:]]+/B,utf,ucp 1270 123 1271 abc 1272 1273/[[:^cntrl:][:^alpha:]]+/B,utf,ucp 1274 123 1275 abc 1276 1277/[[:alpha:]]+/B,utf,ucp 1278 abc 1279 1280/[[:^alpha:]\S]+/B,utf,ucp 1281 123 1282 abc 1283 1284/[^\d]+/B,utf,ucp 1285 abc123 1286 abc\x{123} 1287 \x{660}abc 1288 1289/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B 1290 1291/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B 1292 1293/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B 1294 1295/\p{Han}+X\p{Greek}+\x{370}/B,utf 1296 1297/\p{Xan}+!\p{Xan}+A/B 1298 1299/\p{Xsp}+!\p{Xsp}\t/B 1300 1301/\p{Xps}+!\p{Xps}\t/B 1302 1303/\p{Xwd}+!\p{Xwd}_/B 1304 1305/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp 1306 1307# These behaved oddly in Perl, so they are kept in this test 1308 1309/(\x{23a}\x{23a}\x{23a})?\1/i,utf 1310\= Expect no match 1311 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} 1312 1313/(ȺȺȺ)?\1/i,utf 1314\= Expect no match 1315 ȺȺȺⱥⱥ 1316 1317/(\x{23a}\x{23a}\x{23a})?\1/i,utf 1318 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1319 1320/(ȺȺȺ)?\1/i,utf 1321 ȺȺȺⱥⱥⱥ 1322 1323/(\x{23a}\x{23a}\x{23a})\1/i,utf 1324\= Expect no match 1325 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} 1326 1327/(ȺȺȺ)\1/i,utf 1328\= Expect no match 1329 ȺȺȺⱥⱥ 1330 1331/(\x{23a}\x{23a}\x{23a})\1/i,utf 1332 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1333 1334/(ȺȺȺ)\1/i,utf 1335 ȺȺȺⱥⱥⱥ 1336 1337/(\x{2c65}\x{2c65})\1/i,utf 1338 \x{2c65}\x{2c65}\x{23a}\x{23a} 1339 1340/(ⱥⱥ)\1/i,utf 1341 ⱥⱥȺȺ 1342 1343/(\x{23a}\x{23a}\x{23a})\1Y/i,utf 1344 X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ 1345 1346/(\x{2c65}\x{2c65})\1Y/i,utf 1347 X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ 1348 1349# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE 1350 1351/^[\p{Batak}]/utf 1352 \x{1bc0} 1353 \x{1bff} 1354\= Expect no match 1355 \x{1bf4} 1356 1357/^[\p{Brahmi}]/utf 1358 \x{11000} 1359 \x{1106f} 1360\= Expect no match 1361 \x{1104e} 1362 1363/^[\p{Mandaic}]/utf 1364 \x{840} 1365 \x{85e} 1366\= Expect no match 1367 \x{85c} 1368 \x{85d} 1369 1370/(\X*)(.)/s,utf 1371 A\x{300} 1372 1373/^S(\X*)e(\X*)$/utf 1374 Stéréo 1375 1376/^\X/utf 1377 ́réo 1378 1379/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames 1380 aX41z 1381\= Expect no match 1382 aAz 1383 1384/\X/ 1385 a\=ps 1386 a\=ph 1387 1388/\Xa/ 1389 aa\=ps 1390 aa\=ph 1391 1392/\X{2}/ 1393 aa\=ps 1394 aa\=ph 1395 1396/\X+a/ 1397 a\=ps 1398 aa\=ps 1399 aa\=ph 1400 1401/\X+?a/ 1402 a\=ps 1403 ab\=ps 1404 aa\=ps 1405 aa\=ph 1406 aba\=ps 1407 1408# These Unicode 6.1.0 scripts are not known to Perl. 1409 1410/\p{Chakma}\d/utf,ucp 1411 \x{11100}\x{1113c} 1412 1413/\p{Takri}\d/utf,ucp 1414 \x{11680}\x{116c0} 1415 1416/^\X/utf 1417 A\=ps 1418 A\=ph 1419 A\x{300}\x{301}\=ps 1420 A\x{300}\x{301}\=ph 1421 A\x{301}\=ps 1422 A\x{301}\=ph 1423 1424/^\X{2,3}/utf 1425 A\=ps 1426 A\=ph 1427 AA\=ps 1428 AA\=ph 1429 A\x{300}\x{301}\=ps 1430 A\x{300}\x{301}\=ph 1431 A\x{300}\x{301}A\x{300}\x{301}\=ps 1432 A\x{300}\x{301}A\x{300}\x{301}\=ph 1433 1434/^\X{2}/utf 1435 AA\=ps 1436 AA\=ph 1437 A\x{300}\x{301}A\x{300}\x{301}\=ps 1438 A\x{300}\x{301}A\x{300}\x{301}\=ph 1439 1440/^\X+/utf 1441 AA\=ps 1442 AA\=ph 1443 1444/^\X+?Z/utf 1445 AA\=ps 1446 AA\=ph 1447 1448/A\x{3a3}B/IBi,utf 1449 1450/[\x{3a3}]/Bi,utf 1451 1452/[^\x{3a3}]/Bi,utf 1453 1454/[\x{3a3}]+/Bi,utf 1455 1456/[^\x{3a3}]+/Bi,utf 1457 1458/a*\x{3a3}/Bi,utf 1459 1460/\x{3a3}+a/Bi,utf 1461 1462/\x{3a3}*\x{3c2}/Bi,utf 1463 1464/\x{3a3}{3}/i,utf,aftertext 1465 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1466 1467/\x{3a3}{2,4}/i,utf,aftertext 1468 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1469 1470/\x{3a3}{2,4}?/i,utf,aftertext 1471 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1472 1473/\x{3a3}+./i,utf,aftertext 1474 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1475 1476/\x{3a3}++./i,utf,aftertext 1477\= Expect no match 1478 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1479 1480/\x{3a3}*\x{3c2}/Bi,utf 1481 1482/[^\x{3a3}]*\x{3c2}/Bi,utf 1483 1484/[^a]*\x{3c2}/Bi,utf 1485 1486/ist/Bi,utf 1487\= Expect no match 1488 ikt 1489 1490/is+t/i,utf 1491 iSs\x{17f}t 1492\= Expect no match 1493 ikt 1494 1495/is+?t/i,utf 1496\= Expect no match 1497 ikt 1498 1499/is?t/i,utf 1500\= Expect no match 1501 ikt 1502 1503/is{2}t/i,utf 1504\= Expect no match 1505 iskt 1506 1507# This property is a PCRE special 1508 1509/^\p{Xuc}/utf 1510 $abc 1511 @abc 1512 `abc 1513 \x{1234}abc 1514\= Expect no match 1515 abc 1516 1517/^\p{Xuc}+/utf 1518 $@`\x{a0}\x{1234}\x{e000}** 1519\= Expect no match 1520 \x{9f} 1521 1522/^\p{Xuc}+?/utf 1523 $@`\x{a0}\x{1234}\x{e000}** 1524\= Expect no match 1525 \x{9f} 1526 1527/^\p{Xuc}+?\*/utf 1528 $@`\x{a0}\x{1234}\x{e000}** 1529\= Expect no match 1530 \x{9f} 1531 1532/^\p{Xuc}++/utf 1533 $@`\x{a0}\x{1234}\x{e000}** 1534\= Expect no match 1535 \x{9f} 1536 1537/^\p{Xuc}{3,5}/utf 1538 $@`\x{a0}\x{1234}\x{e000}** 1539\= Expect no match 1540 \x{9f} 1541 1542/^\p{Xuc}{3,5}?/utf 1543 $@`\x{a0}\x{1234}\x{e000}** 1544\= Expect no match 1545 \x{9f} 1546 1547/^[\p{Xuc}]/utf 1548 $@`\x{a0}\x{1234}\x{e000}** 1549\= Expect no match 1550 \x{9f} 1551 1552/^[\p{Xuc}]+/utf 1553 $@`\x{a0}\x{1234}\x{e000}** 1554\= Expect no match 1555 \x{9f} 1556 1557/^\P{Xuc}/utf 1558 abc 1559\= Expect no match 1560 $abc 1561 @abc 1562 `abc 1563 \x{1234}abc 1564 1565/^[\P{Xuc}]/utf 1566 abc 1567\= Expect no match 1568 $abc 1569 @abc 1570 `abc 1571 \x{1234}abc 1572 1573# Some auto-possessification tests 1574 1575/\pN+\z/B 1576 1577/\PN+\z/B 1578 1579/\pN+/B 1580 1581/\PN+/B 1582 1583/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp 1584 1585/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp 1586 1587/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp 1588 1589/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp 1590 1591/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp 1592 1593/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp 1594 1595/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp 1596 1597/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp 1598 1599/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp 1600 1601/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp 1602 1603/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp 1604 1605/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp 1606 1607/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp 1608 1609/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp 1610 1611/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp 1612 1613/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp 1614 1615/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp 1616 1617/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp 1618 1619/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp 1620 1621/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp 1622 1623# End auto-possessification tests 1624 1625/\w+/B,utf,ucp,auto_callout 1626 abcd 1627 1628/[\p{N}]?+/B,no_auto_possess 1629 1630/[\p{L}ab]{2,3}+/B,no_auto_possess 1631 1632/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx 1633 1634/.+\X/Bsx 1635 1636/\X+$/Bmx 1637 1638/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx 1639 1640/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp 1641 1642/[RST]+/Bi,utf,ucp 1643 1644/[R-T]+/Bi,utf,ucp 1645 1646/[Q-U]+/Bi,utf,ucp 1647 1648/^s?c/Iim,utf 1649 scat 1650 1651/\X?abc/utf,no_start_optimize 1652 \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 1653 1654/\x{100}\x{200}\K\x{300}/utf,startchar 1655 \x{100}\x{200}\x{300} 1656 1657# Test UTF characters in a substitution 1658 1659/ábc/utf,replace=XሴZ 1660 123ábc123 1661 1662/(?<=abc)(|def)/g,utf,replace=<$0> 1663 123abcáyzabcdef789abcሴqr 1664 1665/[A-`]/iB,utf 1666 abcdefghijklmno 1667 1668/(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk 1669 \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 1670 1671/(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk 1672 \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 1673 1674"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" 1675 1676/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ 1677 1678"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" 1679 1680/[\pS#moq]/ 1681 = 1682 1683/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark 1684 cxxxz 1685 1686/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended 1687 abcd 1688 1689/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended 1690 a\x{e0}\x{101}\x{c0}\x{102} 1691 1692/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> 1693 ab12cde 1694 1695/(*UCP)(*UTF)[[:>:]]X/B 1696 1697/abc/utf,replace=xyz 1698 abc\=zero_terminate 1699 1700/a[[:punct:]b]/ucp,bincode 1701 1702/a[[:punct:]b]/utf,ucp,bincode 1703 1704/a[b[:punct:]]/utf,ucp,bincode 1705 1706/[[:^ascii:]]/utf,ucp,bincode 1707 1708/[[:^ascii:]\w]/utf,ucp,bincode 1709 1710/[\w[:^ascii:]]/utf,ucp,bincode 1711 1712/[^[:ascii:]\W]/utf,ucp,bincode 1713 \x{de} 1714 \x{200} 1715\= Expect no match 1716 \x{589} 1717 \x{37e} 1718 1719/[[:^ascii:]a]/utf,ucp,bincode 1720 1721/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout 1722 1723/L(?#(|++<!(2)?/B,utf,ucp,auto_callout 1724 1725/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/ 1726 1727/[\D]/utf 1728 \x{1d7cf} 1729 1730/[\D\P{Nd}]/utf 1731 \x{1d7cf} 1732 1733/[^\D]/utf 1734 a9b 1735\= Expect no match 1736 \x{1d7cf} 1737 1738/[^\D\P{Nd}]/utf 1739 a9b 1740 \x{1d7cf} 1741\= Expect no match 1742 \x{10000} 1743 1744# Hex uses pattern length, not zero-terminated. This tests for overrunning 1745# the given length of a pattern. 1746 1747/'(*UTF)'/hex 1748 1749/'#('/hex,extended,utf 1750 1751/a(?<=A\XB)/utf 1752 1753/../utf,auto_callout 1754 \n\x{123}\x{123}\x{123}\x{123} 1755 1756# This tests processing wide characters in extended mode. 1757 1758/XȀ/x,utf 1759 1760# These three test a bug fix that was not clearing up after a locale setting 1761# when the test or a subsequent one matched a wide character. 1762 1763//locale=C 1764 1765/[\P{Yi}]/utf 1766\x{2f000} 1767 1768/[\P{Yi}]/utf,locale=C 1769\x{2f000} 1770 1771/^(?<!(?=))/B,utf 1772 1773# Horizontal and vertical space lists ignore caseless 1774 1775/[\HH]/Bi,utf 1776 1777/[^\HH]/Bi,utf 1778 1779//g,utf 1780 \=zero_terminate 1781 1782/^(?1)\p{Nd}{3}(a)/ 1783 a123a 1784 1785/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info 1786 1787# --------------------------------------------------------------------------- 1788 1789# A bunch of tests that hit lines of code that others do not (at least when 1790# these were created). 1791 1792/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess 1793\= Expect no match 1794 bbb 1795 cc 1796 1797/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess 1798\= Expect no match 1799 aaa\x{100} 1800 1801/^X\X/no_start_optimize,no_auto_possess 1802\= Expect no match 1803 X 1804 1805/^X\p{L&}+?/no_start_optimize,no_auto_possess 1806\= Expect no match 1807 X 1808 1809/^X\p{L}+?/no_start_optimize,no_auto_possess 1810\= Expect no match 1811 X 1812 1813/^X\p{Lu}+?/no_start_optimize,no_auto_possess 1814\= Expect no match 1815 X 1816 1817/^X\p{Arabic}+?/no_start_optimize,no_auto_possess 1818\= Expect no match 1819 X 1820 1821/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess 1822\= Expect no match 1823 X 1824 1825/^X\s+?/ucp,no_start_optimize,no_auto_possess 1826\= Expect no match 1827 X 1828 XX 1829 1830/^X\S+?/ucp,no_start_optimize,no_auto_possess 1831 XX 1832\= Expect no match 1833 X 1834 1835/^X\w+?/ucp,no_start_optimize,no_auto_possess 1836\= Expect no match 1837 X 1838 1839/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess 1840\= Expect no match 1841 X 1842 1843/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess 1844\= Expect no match 1845 X 1846 1847/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess 1848\= Expect no match 1849 X 1850 1851/^X.+?Z/s,utf,no_start_optimize,no_auto_possess 1852\= Expect no match 1853 X 1854 1855/^X\R+?/utf,no_start_optimize,no_auto_possess 1856\= Expect no match 1857 X 1858 1859/^X\H+?/utf,no_start_optimize,no_auto_possess 1860\= Expect no match 1861 X 1862 1863/^X\V+?/utf,no_start_optimize,no_auto_possess 1864\= Expect no match 1865 X 1866 1867/^X\s+?/utf,no_start_optimize,no_auto_possess 1868\= Expect no match 1869 X 1870 XX 1871 1872/^X\S+?/utf,no_start_optimize,no_auto_possess 1873\= Expect no match 1874 X 1875 1876/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess 1877 XYYYZ 1878\= Expect no match 1879 XY 1880 XYY 1881 XYYY 1882 XYYYYZ 1883 1884/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess 1885\= Expect no match 1886 XY 1887 XY! 1888 1889/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess 1890\= Expect no match 1891 XY 1892 XY! 1893 1894/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess 1895\= Expect no match 1896 XY 1897 XY! 1898 1899/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess 1900\= Expect no match 1901 XY 1902 XY! 1903 XY\x{2f00}! 1904 1905/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess 1906\= Expect no match 1907 XY 1908 XY! 1909 1910/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess 1911\= Expect no match 1912 X\n 1913 X\n! 1914 X\n\n! 1915 1916/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess 1917\= Expect no match 1918 XYY\n 1919 1920/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess 1921\= Expect no match 1922 XY 1923 XY! 1924 XYY! 1925 1926/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess 1927\= Expect no match 1928 X 1929 X\x{b5} 1930 X\x{b5}\x{b5}Y 1931 1932/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess 1933\= Expect no match 1934 X 1935 X$ 1936 X@@Y 1937 1938/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess 1939\= Expect partial match 1940 XYY\r\=ph 1941\= Expect no match 1942 X 1943 1944/^X.+?Z/s,utf,no_start_optimize,no_auto_possess 1945\= Expect no match 1946 X 1947 XYY 1948 1949/^X\R+?Z/utf,no_start_optimize,no_auto_possess 1950\= Expect no match 1951 X\nX 1952 X\n\rX 1953 X\n\r\nX 1954 X\n\n 1955 X\n\x{0c} 1956 1957/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess 1958\= Expect no match 1959 X\nX 1960 X\n\rX 1961 X\n\r\nX 1962 X\n\n 1963 X\n\x{0c} 1964 1965/^X\H+?Z/utf,no_start_optimize,no_auto_possess 1966\= Expect no match 1967 XY\t 1968 XYY 1969 1970/^X\h+?Z/utf,no_start_optimize,no_auto_possess 1971\= Expect no match 1972 X\t\t 1973 X\tY 1974 1975/^X\V+?Z/utf,no_start_optimize,no_auto_possess 1976\= Expect no match 1977 XY\n 1978 XYY 1979 1980/^X\v+?Z/utf,no_start_optimize,no_auto_possess 1981\= Expect no match 1982 X\n\n 1983 X\nY 1984 1985/^X\D+?Z/utf,no_start_optimize,no_auto_possess 1986\= Expect no match 1987 XY9 1988 XYY 1989 1990/^X\d+?Z/utf,no_start_optimize,no_auto_possess 1991\= Expect no match 1992 X99 1993 X9Y 1994 1995/^X\S+?Z/utf,no_start_optimize,no_auto_possess 1996\= Expect no match 1997 XY\n 1998 XYY 1999 2000/^X\s+?Z/utf,no_start_optimize,no_auto_possess 2001\= Expect no match 2002 X\n\n 2003 X\nY 2004 2005/^X\W+?Z/utf,no_start_optimize,no_auto_possess 2006\= Expect no match 2007 X.A 2008 X++ 2009 2010/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess 2011\= Expect no match 2012 XY 2013 XY! 2014 2015/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess 2016\= Expect no match 2017 XY 2018 2019/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess 2020\= Expect no match 2021 XY 2022 2023/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess 2024\= Expect no match 2025 XYY 2026 2027/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess 2028\= Expect no match 2029 X$ 2030 2031# ---------------------------------------------------------------------- 2032# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. 2033 2034/\x{d800}/B,utf,bad_escape_is_literal 2035 2036/\ud800/B,utf,alt_bsux,bad_escape_is_literal 2037 2038# ---------------------------------------------------------------------- 2039 2040/Aሴ+B/literal,utf,no_utf_check 2041 Aሴ+B 2042 2043# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it 2044# doesn't recognize all these scripts. In time these three tests can be moved 2045# to test 4. 2046 2047/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+) 2048 (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+) 2049 (\p{Zanabazar_Square}+)/x,utf 2050 \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47} 2051 2052/^\x{1E900}\x{104B0}/i,utf 2053 \x{1E900}\x{104B0} 2054 \x{1E922}\x{104D8} 2055 2056/^(?:(\X)(?C))+$/utf 2057 \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where 2058 2059# Similarly for Unicode 11.0.0 2060 2061/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+) 2062 (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf 2063 \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30} 2064 2065# Regional indicators 2066 2067/^(\X)(\X)/utf,aftertext 2068 \x{1F1E6}\x{1F1E7}\x{1F1E7}B 2069 \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B 2070 2071# More differences from Perl 2072 2073/^\p{Common}/utf 2074 \x{60c} 2075 \x{61f} 2076 \x{964} 2077 \x{965} 2078 2079/^\p{Inherited}/utf 2080 \x{64b} 2081 \x{654} 2082 \x{655} 2083 \x{1D1AA} 2084 2085/\N{U+}/ 2086 2087/\N{U+}/utf 2088 2089/\N{U}/ 2090 2091# This tests the non-UTF Unicode NEL pattern whitespace character, only 2092# recognized by PCRE2 with /x when there is Unicode support. 2093 2094/A 2095 2096�B/x 2097 AB 2098 2099# This tests Unicode Pattern White Space characters in verb names when they 2100# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters 2101# with code points greater than 255 between A, B, and C in the pattern. 2102 2103/(*: AB C)abc/x,utf,mark,alt_verbnames 2104 abc 2105 2106# Script run tests: auto-possessification 2107 2108/^(*sr:.*)/B,utf 2109 paypаl.com A classic example of why script run checks are a good thing 2110 2111/^(*sr:.*(*ACCEPT))/utf 2112 paypаl.com But *ACCEPT breaks things 2113 2114/^(*sr:\x{2e80}*)/B,utf 2115 2116/^(*sr:\x{2e80}*)\x{2e80}/B,utf 2117 2118/(?<!)(*sr:)/B 2119 2120/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B 2121 abcXBXYCCC! 2122 2123# Some script run patterns are broken in Perl 5.28.0. These can be moved into 2124# test 4 when a mended version of Perl is released. 2125 2126/^(*sr:.{4})/utf 2127 \x{0980}12\x{0993} Bengali Common-digits Bengali 2128 \x{0780}12\x{07b1} Thaana Common-digits Thaana 2129 \x{0e01}12\x{0e5b} Thai Common-digits Thai 2130 \x{1780}12\x{19ff} Khmer Common-digits Khmer 2131 \x{0904}12\x{0939} Devanagari Common-digits Devanagari 2132 A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin 2133 A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin 2134 2135# These ones involve non-ASCII but nevertheless Common digits. As of October 2136# 2018 even blead Perl wasn't handling all of these - but is going to. 2137 2138/^(*sr:.{4})/utf 2139 A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin 2140 \x{ff10}\x{ff19}.. Common-notascii-digits Common Common 2141 A\x{ff10}BC Latin Common-notascii-digit Latin Latin 2142 A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin 2143 \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common 2144 A\x{1d7ce}BC Latin fancy-common-digit Latin Latin 2145 2146# Some Unicode 12.1.0 new script characters 2147 2148/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf 2149 \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1} 2150 2151# Some Unicode 13.0.0 new script characters 2152 2153/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf 2154 \x{10FB0}\x{11900}\x{18B00}\x{10E80} 2155 2156# ------- 2157 2158# Test reference and errors in non-ASCII characters in group names 2159 2160/(?'ABC'...)/I,utf 2161 abcde\=copy=ABC 2162 2163# Bad ones 2164 2165/(?'AB၌C'...)\g{AB၌C}/utf 2166 2167/(?'٠ABC'...)/utf 2168 2169/(?'²ABC'...)/utf 2170 2171/(?'X²ABC'...)/utf 2172 2173# ------- 2174 2175/\p{Any}*xyz/I 2176 2177/(|�)7/caseless,ucp 2178 2179/(\xc1)\1/i,ucp 2180 \xc1\xe1\=no_jit 2181 2182/\p{L&}+\p{bidi_control}/B 2183 2184/\p{bidi_control}+\p{L&}/B 2185 2186/\p{han}/B 2187 2188/\p{script:han}/B 2189 2190/\p{sc:han}/B 2191 2192/\p{script extensions:han}/B 2193 2194/\p{scx:han}/B 2195 2196# Test error - invalid script name 2197 2198/\p{sc:L}/ 2199 2200# Some Boolean property tests that differ from Perl 2201 2202/\p{emojimodifierbase}\p{ebase}/g,utf 2203 >AN<>\x{261d}\x{1faf6}<>yz< 2204 2205/\p{graphemelink}\p{grlink}/g,utf 2206 >AN<>\x{11d97}\x{94d}<>yz< 2207 2208/\p{soft dotted}\p{sd}/g,utf 2209 >AF23<>\x{1df1a}\x{69}<>yz< 2210 2211# ------------------------------------------------ 2212 2213/\p{\2b[:x�igi:t:_/ 2214 2215# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without 2216# the restriction. 2217 2218/AskZ/i,utf,caseless_restrict 2219 AskZ 2220 aSKz 2221\= Expect no match 2222 A\x{17f}kZ 2223 As\x{212a}Z 2224 2225/AskZ/i,utf 2226 AskZ 2227 aSKz 2228 A\x{17f}kZ 2229 As\x{212a}Z 2230 2231/A\x{17f}\x{212a}Z/ir,utf 2232 \= Expect no match 2233 AskZ 2234 2235/A\x{17f}\x{212a}Z/i,utf 2236 AskZ 2237 2238/[AskZ]+/i,utf,caseless_restrict 2239 AskZ 2240 aSKz 2241 A\x{17f}kZ 2242 As\x{212a}Z 2243 2244/[AskZ]+/i,utf 2245 AskZ 2246 aSKz 2247 A\x{17f}kZ 2248 As\x{212a}Z 2249 2250/[\x{17f}\x{212a}]+/ir,utf 2251\= Expect no match 2252 AskZ 2253 2254/[\x{17f}\x{212a}]+/i,utf 2255 AskZ 2256 2257/[^s]+/ir,utf 2258 A\x{17f}Z 2259 2260/[^s]+/i,utf 2261 A\x{17f}Z 2262 2263/[^k]+/ir,utf 2264 A\x{212a}Z 2265 2266/[^k]+/i,utf 2267 A\x{212a}Z 2268 2269/[^sk]+/ir,utf 2270 A\x{17f}\x{212a}Z 2271 2272/[^sk]+/i,utf 2273 A\x{17f}\x{212a}Z 2274 2275/[^\x{17f}]+/ir,utf 2276 AsSZ 2277 2278/[^\x{17f}]+/i,utf 2279 AsSZ 2280 2281/[Ss]+/irB,utf 2282 Sss\x{17f}ss 2283 2284/[Ss]+/iB,utf 2285 Sss\x{17f}ss 2286 2287/[S\x{17f}]/irB,utf 2288 2289/[S\x{17f}]/iB,utf 2290 2291/[\x{17f}s]/irB,utf 2292 2293/[\x{17f}s]/iB,utf 2294 2295/[\x{4b}\x{6b}]/irB,utf 2296 2297/[\x{4b}\x{6b}]/iB,utf 2298 2299/s(?r)s(?-r)s(?r:s)s/i,utf 2300 \x{17f}S\x{17f}S\x{17f} 2301\= Expect no match 2302 \x{17f}\x{17f}\x{17f}S\x{17f} 2303 \x{17f}S\x{17f}\x{17f}\x{17f} 2304 2305/k(?^i)k/ir,utf 2306 K\x{212a} 2307\= Expect no match 2308 \x{212a}\x{212a} 2309 2310# End caseless restrict tests 2311 2312# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. 2313 2314# DIGITS 2315 2316/\d+/i,utf 2317 123\x{660}456 2318 2319/\d+/i,utf,ucp 2320 123\x{660}456 2321 2322/\d+/i,utf,ucp,ascii_bsd 2323 123\x{660}456 2324 2325/[\d]+/i,utf 2326 123\x{660}456 2327 2328/[\d]+/i,utf,ucp 2329 123\x{660}456 2330 2331/[\d]+/i,utf,ucp,ascii_bsd 2332 123\x{660}456 2333 2334/\d(?aD)\d(?-aD)\d/utf,ucp 2335 \x{660}9\x{660} 2336\= Expect no match 2337 \x{660}\x{660}\x{660} 2338 2339/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd 2340 999 2341 9\x{660}9 2342 2343/\d(?a)\d(?-a)\d/utf,ucp 2344 \x{660}9\x{660} 2345\= Expect no match 2346 \x{660}\x{660}\x{660} 2347 2348/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd 2349 999 2350 9\x{660}9 2351 2352# SPACES 2353 2354/>\s+</i,utf 2355 > < 2356\= Expect no match 2357 >\x{a0} < 2358 2359/>\s+</i,utf,ucp 2360 > < 2361 >\x{a0} < 2362 2363/>\s+</i,utf,ucp,ascii_bss 2364 > < 2365\= Expect no match 2366 >\x{a0} < 2367 2368/>[\s]+</i,utf 2369 > < 2370\= Expect no match 2371 >\x{a0} < 2372 2373/>[\s]+</i,utf,ucp 2374 > < 2375 >\x{a0} < 2376 2377/>[\s]+</i,utf,ucp,ascii_bss 2378 > < 2379\= Expect no match 2380 >\x{a0} < 2381 2382/>\s(?aS)\s(?-aS)\s</utf,ucp 2383 >\x{a0} \x{a0}< 2384\= Expect no match 2385 >\x{a0}\x{a0}\x{a0}< 2386 2387/>\s(?a)\s(?-a)\s</utf,ucp 2388 >\x{a0} \x{a0}< 2389\= Expect no match 2390 >\x{a0}\x{a0}\x{a0}< 2391 2392# WORDS 2393 2394/\w+/i,utf 2395 123\x{660}abc 2396 2397/\w+/i,utf,ucp 2398 123\x{660}abc 2399 2400/\w+/i,utf,ucp,ascii_bsw 2401 123\x{660}abc 2402 2403/[\w]+/i,utf 2404 123\x{660}abc 2405 2406/[\w]+/i,utf,ucp 2407 123\x{660}abc 2408 2409/[\w]+/i,utf,ucp,ascii_bsw 2410 123\x{660}abc 2411 2412/\w(?aW)\w(?-aW)\w/utf,ucp 2413 \x{660}A\x{c0} 2414\= Expect no match 2415 \x{660}\x{c0}\x{c0} 2416 2417/\w(?a)\w(?-a)\w/utf,ucp 2418 \x{660}A\x{c0} 2419\= Expect no match 2420 \x{660}\x{c0}\x{c0} 2421 2422# WORD BOUNDARY 2423 2424/\bABC\b/utf 2425 \x{c0}ABC\x{d0} 2426 2427/\bABC\b/utf,ucp 2428\= Expect no match 2429 \x{c0}ABC\x{d0} 2430 2431/\bABC\b/utf,ucp,ascii_bsw 2432 \x{c0}ABC\x{d0} 2433 2434/\bABC\b/utf,ucp,ascii_all 2435 \x{c0}ABC\x{d0} 2436 2437# POSIX 2438 2439/^[[:digit:]]+$/utf,ucp 2440 123456 2441 123\x{660}456 2442 2443/^[[:digit:]]+$/utf,ucp,ascii_digit 2444 123456 2445\= Expect no match 2446 123\x{660}456 2447 2448/[[:digit:]]+/g,utf,ucp,ascii_digit 2449 123\x{660}456 2450 2451/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit 2452 11 2453 \x{ff11}1 2454\= Expect no match 2455 1\x{ff11} 2456 2457/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit 2458 11 2459 \x{ff11}1 2460\= Expect no match 2461 1\x{ff11} 2462 2463/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit 2464 11 2465\= Expect no match 2466 \x{ff11}1 2467 1\x{ff11} 2468 2469/[[:digit:]]+/utf,ucp,ascii_posix 2470 123\x{660}456 2471 2472/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix 2473 11 2474 \x{ff11}1 2475\= Expect no match 2476 1\x{ff11} 2477 2478/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix 2479 11 2480 \x{ff11}1 2481\= Expect no match 2482 1\x{ff11} 2483 2484/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp 2485 11 2486 \x{ff11}1 2487\= Expect no match 2488 1\x{ff11} 2489 2490/^[[:xdigit:]]+$/utf,ucp 2491 f0 2492 1A 2493 d\x{ff10} 2494 \x{ff26}8 2495\= Expect no match 2496 8g\=no_jit 2497 2498/^[[:xdigit:]]+$/utf,ucp,ascii_digit 2499 f0 2500 1A 2501\= Expect no match 2502 d\x{ff10} 2503 \x{ff26}8 2504 8g 2505 2506/>[[:space:]]+</utf,ucp 2507 >\x{a0} \x{a0}< 2508 >\x{a0}\x{a0}\x{a0}< 2509 2510/>[[:space:]]+</utf,ucp,ascii_posix 2511\= Expect no match 2512 >\x{a0} \x{a0}< 2513 2514/(?aP)[[:alnum:]]+/i,ucp,utf 2515 abcáxyz 2516 abc\x{660}xyz 2517 2518/(?aP)[[:alnum:]\d]+/i,ucp,utf 2519 abc\x{660}xyz 2520 2521/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ 2522 \x{660}A\x{660} 2523\= Expect no match 2524 \x{660}\x{660}\x{660} 2525 2526# VARIOUS 2527 2528/[\d\s\w]+/a,ucp,utf 2529 9 A\x{660}À 2530 9 AÀ\x{660} 2531 2532# End PCRE2_EXTRA_ASCII_xxx tests 2533 2534/(?<!(|l ))/utf 2535 (?<!(|l )) 2536 2537# End of testinput5 2538