1# This set of tests is for UTF-16 and UTF-32 support, including Unicode 2# properties. It is relevant only to the 16-bit and 32-bit libraries. The 3# output is different for each library, so there are separate output files. 4 5/���xxx/IB,utf,no_utf_check 6** Failed: invalid UTF-8 string cannot be converted to 32-bit string 7 8/abc/utf 9 �] 10** Failed: invalid UTF-8 string cannot be used as input in UTF mode 11 12# Check maximum character size 13 14/\x{ffff}/IB,utf 15------------------------------------------------------------------ 16 Bra 17 \x{ffff} 18 Ket 19 End 20------------------------------------------------------------------ 21Capture group count = 0 22Options: utf 23First code unit = \x{ffff} 24Subject length lower bound = 1 25 26/\x{10000}/IB,utf 27------------------------------------------------------------------ 28 Bra 29 \x{10000} 30 Ket 31 End 32------------------------------------------------------------------ 33Capture group count = 0 34Options: utf 35First code unit = \x{10000} 36Subject length lower bound = 1 37 38/\x{100}/IB,utf 39------------------------------------------------------------------ 40 Bra 41 \x{100} 42 Ket 43 End 44------------------------------------------------------------------ 45Capture group count = 0 46Options: utf 47First code unit = \x{100} 48Subject length lower bound = 1 49 50/\x{1000}/IB,utf 51------------------------------------------------------------------ 52 Bra 53 \x{1000} 54 Ket 55 End 56------------------------------------------------------------------ 57Capture group count = 0 58Options: utf 59First code unit = \x{1000} 60Subject length lower bound = 1 61 62/\x{10000}/IB,utf 63------------------------------------------------------------------ 64 Bra 65 \x{10000} 66 Ket 67 End 68------------------------------------------------------------------ 69Capture group count = 0 70Options: utf 71First code unit = \x{10000} 72Subject length lower bound = 1 73 74/\x{100000}/IB,utf 75------------------------------------------------------------------ 76 Bra 77 \x{100000} 78 Ket 79 End 80------------------------------------------------------------------ 81Capture group count = 0 82Options: utf 83First code unit = \x{100000} 84Subject length lower bound = 1 85 86/\x{10ffff}/IB,utf 87------------------------------------------------------------------ 88 Bra 89 \x{10ffff} 90 Ket 91 End 92------------------------------------------------------------------ 93Capture group count = 0 94Options: utf 95First code unit = \x{10ffff} 96Subject length lower bound = 1 97 98/[\x{ff}]/IB,utf 99------------------------------------------------------------------ 100 Bra 101 \x{ff} 102 Ket 103 End 104------------------------------------------------------------------ 105Capture group count = 0 106Options: utf 107First code unit = \xff 108Subject length lower bound = 1 109 110/[\x{100}]/IB,utf 111------------------------------------------------------------------ 112 Bra 113 \x{100} 114 Ket 115 End 116------------------------------------------------------------------ 117Capture group count = 0 118Options: utf 119First code unit = \x{100} 120Subject length lower bound = 1 121 122/\x80/IB,utf 123------------------------------------------------------------------ 124 Bra 125 \x{80} 126 Ket 127 End 128------------------------------------------------------------------ 129Capture group count = 0 130Options: utf 131First code unit = \x80 132Subject length lower bound = 1 133 134/\xff/IB,utf 135------------------------------------------------------------------ 136 Bra 137 \x{ff} 138 Ket 139 End 140------------------------------------------------------------------ 141Capture group count = 0 142Options: utf 143First code unit = \xff 144Subject length lower bound = 1 145 146/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 147------------------------------------------------------------------ 148 Bra 149 \x{d55c}\x{ad6d}\x{c5b4} 150 Ket 151 End 152------------------------------------------------------------------ 153Capture group count = 0 154Options: utf 155First code unit = \x{d55c} 156Last code unit = \x{c5b4} 157Subject length lower bound = 3 158 \x{D55c}\x{ad6d}\x{C5B4} 159 0: \x{d55c}\x{ad6d}\x{c5b4} 160 161/\x{65e5}\x{672c}\x{8a9e}/IB,utf 162------------------------------------------------------------------ 163 Bra 164 \x{65e5}\x{672c}\x{8a9e} 165 Ket 166 End 167------------------------------------------------------------------ 168Capture group count = 0 169Options: utf 170First code unit = \x{65e5} 171Last code unit = \x{8a9e} 172Subject length lower bound = 3 173 \x{65e5}\x{672c}\x{8a9e} 174 0: \x{65e5}\x{672c}\x{8a9e} 175 176/\x{80}/IB,utf 177------------------------------------------------------------------ 178 Bra 179 \x{80} 180 Ket 181 End 182------------------------------------------------------------------ 183Capture group count = 0 184Options: utf 185First code unit = \x80 186Subject length lower bound = 1 187 188/\x{084}/IB,utf 189------------------------------------------------------------------ 190 Bra 191 \x{84} 192 Ket 193 End 194------------------------------------------------------------------ 195Capture group count = 0 196Options: utf 197First code unit = \x84 198Subject length lower bound = 1 199 200/\x{104}/IB,utf 201------------------------------------------------------------------ 202 Bra 203 \x{104} 204 Ket 205 End 206------------------------------------------------------------------ 207Capture group count = 0 208Options: utf 209First code unit = \x{104} 210Subject length lower bound = 1 211 212/\x{861}/IB,utf 213------------------------------------------------------------------ 214 Bra 215 \x{861} 216 Ket 217 End 218------------------------------------------------------------------ 219Capture group count = 0 220Options: utf 221First code unit = \x{861} 222Subject length lower bound = 1 223 224/\x{212ab}/IB,utf 225------------------------------------------------------------------ 226 Bra 227 \x{212ab} 228 Ket 229 End 230------------------------------------------------------------------ 231Capture group count = 0 232Options: utf 233First code unit = \x{212ab} 234Subject length lower bound = 1 235 236/[^ab\xC0-\xF0]/IB,utf 237------------------------------------------------------------------ 238 Bra 239 [\x00-`c-\xbf\xf1-\xff] (neg) 240 Ket 241 End 242------------------------------------------------------------------ 243Capture group count = 0 244Options: utf 245Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 246 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 247 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 248 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 249 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 250 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e 251 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d 252 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac 253 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb 254 \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb 255 \xfc \xfd \xfe \xff 256Subject length lower bound = 1 257 \x{f1} 258 0: \x{f1} 259 \x{bf} 260 0: \x{bf} 261 \x{100} 262 0: \x{100} 263 \x{1000} 264 0: \x{1000} 265\= Expect no match 266 \x{c0} 267No match 268 \x{f0} 269No match 270 271/Ā{3,4}/IB,utf 272------------------------------------------------------------------ 273 Bra 274 \x{100}{3} 275 \x{100}?+ 276 Ket 277 End 278------------------------------------------------------------------ 279Capture group count = 0 280Options: utf 281First code unit = \x{100} 282Last code unit = \x{100} 283Subject length lower bound = 3 284 \x{100}\x{100}\x{100}\x{100\x{100} 285 0: \x{100}\x{100}\x{100} 286 287/(\x{100}+|x)/IB,utf 288------------------------------------------------------------------ 289 Bra 290 CBra 1 291 \x{100}++ 292 Alt 293 x 294 Ket 295 Ket 296 End 297------------------------------------------------------------------ 298Capture group count = 1 299Options: utf 300Starting code units: x \xff 301Subject length lower bound = 1 302 303/(\x{100}*a|x)/IB,utf 304------------------------------------------------------------------ 305 Bra 306 CBra 1 307 \x{100}*+ 308 a 309 Alt 310 x 311 Ket 312 Ket 313 End 314------------------------------------------------------------------ 315Capture group count = 1 316Options: utf 317Starting code units: a x \xff 318Subject length lower bound = 1 319 320/(\x{100}{0,2}a|x)/IB,utf 321------------------------------------------------------------------ 322 Bra 323 CBra 1 324 \x{100}{0,2}+ 325 a 326 Alt 327 x 328 Ket 329 Ket 330 End 331------------------------------------------------------------------ 332Capture group count = 1 333Options: utf 334Starting code units: a x \xff 335Subject length lower bound = 1 336 337/(\x{100}{1,2}a|x)/IB,utf 338------------------------------------------------------------------ 339 Bra 340 CBra 1 341 \x{100} 342 \x{100}{0,1}+ 343 a 344 Alt 345 x 346 Ket 347 Ket 348 End 349------------------------------------------------------------------ 350Capture group count = 1 351Options: utf 352Starting code units: x \xff 353Subject length lower bound = 1 354 355/\x{100}/IB,utf 356------------------------------------------------------------------ 357 Bra 358 \x{100} 359 Ket 360 End 361------------------------------------------------------------------ 362Capture group count = 0 363Options: utf 364First code unit = \x{100} 365Subject length lower bound = 1 366 367/a\x{100}\x{101}*/IB,utf 368------------------------------------------------------------------ 369 Bra 370 a\x{100} 371 \x{101}*+ 372 Ket 373 End 374------------------------------------------------------------------ 375Capture group count = 0 376Options: utf 377First code unit = 'a' 378Last code unit = \x{100} 379Subject length lower bound = 2 380 381/a\x{100}\x{101}+/IB,utf 382------------------------------------------------------------------ 383 Bra 384 a\x{100} 385 \x{101}++ 386 Ket 387 End 388------------------------------------------------------------------ 389Capture group count = 0 390Options: utf 391First code unit = 'a' 392Last code unit = \x{101} 393Subject length lower bound = 3 394 395/[^\x{c4}]/IB 396------------------------------------------------------------------ 397 Bra 398 [^\x{c4}] 399 Ket 400 End 401------------------------------------------------------------------ 402Capture group count = 0 403Subject length lower bound = 1 404 405/[\x{100}]/IB,utf 406------------------------------------------------------------------ 407 Bra 408 \x{100} 409 Ket 410 End 411------------------------------------------------------------------ 412Capture group count = 0 413Options: utf 414First code unit = \x{100} 415Subject length lower bound = 1 416 \x{100} 417 0: \x{100} 418 Z\x{100} 419 0: \x{100} 420 \x{100}Z 421 0: \x{100} 422 423/[\xff]/IB,utf 424------------------------------------------------------------------ 425 Bra 426 \x{ff} 427 Ket 428 End 429------------------------------------------------------------------ 430Capture group count = 0 431Options: utf 432First code unit = \xff 433Subject length lower bound = 1 434 >\x{ff}< 435 0: \x{ff} 436 437/[^\xff]/IB,utf 438------------------------------------------------------------------ 439 Bra 440 [^\x{ff}] 441 Ket 442 End 443------------------------------------------------------------------ 444Capture group count = 0 445Options: utf 446Subject length lower bound = 1 447 448/\x{100}abc(xyz(?1))/IB,utf 449------------------------------------------------------------------ 450 Bra 451 \x{100}abc 452 CBra 1 453 xyz 454 Recurse 455 Ket 456 Ket 457 End 458------------------------------------------------------------------ 459Capture group count = 1 460Options: utf 461First code unit = \x{100} 462Last code unit = 'z' 463Subject length lower bound = 7 464 465/\777/I,utf 466Capture group count = 0 467Options: utf 468First code unit = \x{1ff} 469Subject length lower bound = 1 470 \x{1ff} 471 0: \x{1ff} 472 \777 473 0: \x{1ff} 474 475/\x{100}+\x{200}/IB,utf 476------------------------------------------------------------------ 477 Bra 478 \x{100}++ 479 \x{200} 480 Ket 481 End 482------------------------------------------------------------------ 483Capture group count = 0 484Options: utf 485First code unit = \x{100} 486Last code unit = \x{200} 487Subject length lower bound = 2 488 489/\x{100}+X/IB,utf 490------------------------------------------------------------------ 491 Bra 492 \x{100}++ 493 X 494 Ket 495 End 496------------------------------------------------------------------ 497Capture group count = 0 498Options: utf 499First code unit = \x{100} 500Last code unit = 'X' 501Subject length lower bound = 2 502 503/^[\QĀ\E-\QŐ\E/B,utf 504Failed: error 106 at offset 13: missing terminating ] for character class 505 506/X/utf 507 XX\x{d800}\=no_utf_check 508 0: X 509 XX\x{da00}\=no_utf_check 510 0: X 511 XX\x{dc00}\=no_utf_check 512 0: X 513 XX\x{de00}\=no_utf_check 514 0: X 515 XX\x{dfff}\=no_utf_check 516 0: X 517\= Expect UTF error 518 XX\x{d800} 519Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 520 XX\x{da00} 521Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 522 XX\x{dc00} 523Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 524 XX\x{de00} 525Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 526 XX\x{dfff} 527Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 528 XX\x{110000} 529Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2 530 XX\x{d800}\x{1234} 531Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 532\= Expect no match 533 XX\x{d800}\=offset=3 534No match 535 536/(?<=.)X/utf 537 XX\x{d800}\=offset=3 538Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 539 540/(*UTF16)\x{11234}/ 541Failed: error 160 at offset 7: (*VERB) not recognized or malformed 542 abcd\x{11234}pqr 543 544/(*UTF)\x{11234}/I 545Capture group count = 0 546Compile options: <none> 547Overall options: utf 548First code unit = \x{11234} 549Subject length lower bound = 1 550 abcd\x{11234}pqr 551 0: \x{11234} 552 553/(*UTF-32)\x{11234}/ 554Failed: error 160 at offset 5: (*VERB) not recognized or malformed 555 abcd\x{11234}pqr 556 557/(*UTF-32)\x{112}/ 558Failed: error 160 at offset 5: (*VERB) not recognized or malformed 559 abcd\x{11234}pqr 560 561/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I 562Failed: error 160 at offset 14: (*VERB) not recognized or malformed 563 564/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I 565Capture group count = 0 566Compile options: <none> 567Overall options: utf 568\R matches any Unicode newline 569Forced newline is CRLF 570First code unit = 'a' 571Last code unit = 'b' 572Subject length lower bound = 3 573 574/\h/I,utf 575Capture group count = 0 576Options: utf 577Starting code units: \x09 \x20 \xa0 \xff 578Subject length lower bound = 1 579 ABC\x{09} 580 0: \x{09} 581 ABC\x{20} 582 0: 583 ABC\x{a0} 584 0: \x{a0} 585 ABC\x{1680} 586 0: \x{1680} 587 ABC\x{180e} 588 0: \x{180e} 589 ABC\x{2000} 590 0: \x{2000} 591 ABC\x{202f} 592 0: \x{202f} 593 ABC\x{205f} 594 0: \x{205f} 595 ABC\x{3000} 596 0: \x{3000} 597 598/\v/I,utf 599Capture group count = 0 600Options: utf 601Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 602Subject length lower bound = 1 603 ABC\x{0a} 604 0: \x{0a} 605 ABC\x{0b} 606 0: \x{0b} 607 ABC\x{0c} 608 0: \x{0c} 609 ABC\x{0d} 610 0: \x{0d} 611 ABC\x{85} 612 0: \x{85} 613 ABC\x{2028} 614 0: \x{2028} 615 616/\h*A/I,utf 617Capture group count = 0 618Options: utf 619Starting code units: \x09 \x20 A \xa0 \xff 620Last code unit = 'A' 621Subject length lower bound = 1 622 CDBABC 623 0: A 624 \x{2000}ABC 625 0: \x{2000}A 626 627/\R*A/I,bsr=unicode,utf 628Capture group count = 0 629Options: utf 630\R matches any Unicode newline 631Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff 632Last code unit = 'A' 633Subject length lower bound = 1 634 CDBABC 635 0: A 636 \x{2028}A 637 0: \x{2028}A 638 639/\v+A/I,utf 640Capture group count = 0 641Options: utf 642Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 643Last code unit = 'A' 644Subject length lower bound = 2 645 646/\s?xxx\s/I,utf 647Capture group count = 0 648Options: utf 649Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x 650Last code unit = 'x' 651Subject length lower bound = 4 652 653/\sxxx\s/I,utf,tables=2 654Capture group count = 0 655Options: utf 656Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 657Last code unit = 'x' 658Subject length lower bound = 5 659 AB\x{85}xxx\x{a0}XYZ 660 0: \x{85}xxx\x{a0} 661 AB\x{a0}xxx\x{85}XYZ 662 0: \x{a0}xxx\x{85} 663 664/\S \S/I,utf,tables=2 665Capture group count = 0 666Options: utf 667Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 668 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 669 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 670 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 671 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 672 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 673 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 674 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 675 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 676 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 677 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 678 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 679 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 680 \xff 681Last code unit = ' ' 682Subject length lower bound = 3 683 \x{a2} \x{84} 684 0: \x{a2} \x{84} 685 A Z 686 0: A Z 687 688/a+/utf 689 a\x{123}aa\=offset=1 690 0: aa 691 a\x{123}aa\=offset=2 692 0: aa 693 a\x{123}aa\=offset=3 694 0: a 695\= Expect no match 696 a\x{123}aa\=offset=4 697No match 698\= Expect bad offset error 699 a\x{123}aa\=offset=5 700Failed: error -33: bad offset value 701 a\x{123}aa\=offset=6 702Failed: error -33: bad offset value 703 704/\x{1234}+/Ii,utf 705Capture group count = 0 706Options: caseless utf 707First code unit = \x{1234} 708Subject length lower bound = 1 709 710/\x{1234}+?/Ii,utf 711Capture group count = 0 712Options: caseless utf 713First code unit = \x{1234} 714Subject length lower bound = 1 715 716/\x{1234}++/Ii,utf 717Capture group count = 0 718Options: caseless utf 719First code unit = \x{1234} 720Subject length lower bound = 1 721 722/\x{1234}{2}/Ii,utf 723Capture group count = 0 724Options: caseless utf 725First code unit = \x{1234} 726Last code unit = \x{1234} 727Subject length lower bound = 2 728 729/[^\x{c4}]/IB,utf 730------------------------------------------------------------------ 731 Bra 732 [^\x{c4}] 733 Ket 734 End 735------------------------------------------------------------------ 736Capture group count = 0 737Options: utf 738Subject length lower bound = 1 739 740/X+\x{200}/IB,utf 741------------------------------------------------------------------ 742 Bra 743 X++ 744 \x{200} 745 Ket 746 End 747------------------------------------------------------------------ 748Capture group count = 0 749Options: utf 750First code unit = 'X' 751Last code unit = \x{200} 752Subject length lower bound = 2 753 754/\R/I,utf 755Capture group count = 0 756Options: utf 757Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 758Subject length lower bound = 1 759 760# Check bad offset 761 762/a/utf 763\= Expect bad UTF-16 offset, or no match in 32-bit 764 \x{10000}\=offset=1 765No match 766 \x{10000}ab\=offset=1 767 0: a 768\= Expect 16-bit match, 32-bit no match 769 \x{10000}ab\=offset=2 770No match 771\= Expect no match 772 \x{10000}ab\=offset=3 773No match 774\= Expect no match in 16-bit, bad offset in 32-bit 775 \x{10000}ab\=offset=4 776Failed: error -33: bad offset value 777\= Expect bad offset 778 \x{10000}ab\=offset=5 779Failed: error -33: bad offset value 780 781/�/utf 782Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined 783 784/\w+\x{C4}/B,utf 785------------------------------------------------------------------ 786 Bra 787 \w++ 788 \x{c4} 789 Ket 790 End 791------------------------------------------------------------------ 792 a\x{C4}\x{C4} 793 0: a\x{c4} 794 795/\w+\x{C4}/B,utf,tables=2 796------------------------------------------------------------------ 797 Bra 798 \w+ 799 \x{c4} 800 Ket 801 End 802------------------------------------------------------------------ 803 a\x{C4}\x{C4} 804 0: a\x{c4}\x{c4} 805 806/\W+\x{C4}/B,utf 807------------------------------------------------------------------ 808 Bra 809 \W+ 810 \x{c4} 811 Ket 812 End 813------------------------------------------------------------------ 814 !\x{C4} 815 0: !\x{c4} 816 817/\W+\x{C4}/B,utf,tables=2 818------------------------------------------------------------------ 819 Bra 820 \W++ 821 \x{c4} 822 Ket 823 End 824------------------------------------------------------------------ 825 !\x{C4} 826 0: !\x{c4} 827 828/\W+\x{A1}/B,utf 829------------------------------------------------------------------ 830 Bra 831 \W+ 832 \x{a1} 833 Ket 834 End 835------------------------------------------------------------------ 836 !\x{A1} 837 0: !\x{a1} 838 839/\W+\x{A1}/B,utf,tables=2 840------------------------------------------------------------------ 841 Bra 842 \W+ 843 \x{a1} 844 Ket 845 End 846------------------------------------------------------------------ 847 !\x{A1} 848 0: !\x{a1} 849 850/X\s+\x{A0}/B,utf 851------------------------------------------------------------------ 852 Bra 853 X 854 \s++ 855 \x{a0} 856 Ket 857 End 858------------------------------------------------------------------ 859 X\x20\x{A0}\x{A0} 860 0: X \x{a0} 861 862/X\s+\x{A0}/B,utf,tables=2 863------------------------------------------------------------------ 864 Bra 865 X 866 \s+ 867 \x{a0} 868 Ket 869 End 870------------------------------------------------------------------ 871 X\x20\x{A0}\x{A0} 872 0: X \x{a0}\x{a0} 873 874/\S+\x{A0}/B,utf 875------------------------------------------------------------------ 876 Bra 877 \S+ 878 \x{a0} 879 Ket 880 End 881------------------------------------------------------------------ 882 X\x{A0}\x{A0} 883 0: X\x{a0}\x{a0} 884 885/\S+\x{A0}/B,utf,tables=2 886------------------------------------------------------------------ 887 Bra 888 \S++ 889 \x{a0} 890 Ket 891 End 892------------------------------------------------------------------ 893 X\x{A0}\x{A0} 894 0: X\x{a0} 895 896/\x{a0}+\s!/B,utf 897------------------------------------------------------------------ 898 Bra 899 \x{a0}++ 900 \s 901 ! 902 Ket 903 End 904------------------------------------------------------------------ 905 \x{a0}\x20! 906 0: \x{a0} ! 907 908/\x{a0}+\s!/B,utf,tables=2 909------------------------------------------------------------------ 910 Bra 911 \x{a0}+ 912 \s 913 ! 914 Ket 915 End 916------------------------------------------------------------------ 917 \x{a0}\x20! 918 0: \x{a0} ! 919 920/(*UTF)abc/never_utf 921Failed: error 174 at offset 6: using UTF is disabled by the application 922 923/abc/utf,never_utf 924Failed: error 174 at offset 0: using UTF is disabled by the application 925 926/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 927------------------------------------------------------------------ 928 Bra 929 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} 930 Ket 931 End 932------------------------------------------------------------------ 933Capture group count = 0 934Options: caseless utf 935First code unit = 'A' (caseless) 936Last code unit = \x{1fb0} (caseless) 937Subject length lower bound = 5 938 939/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 940------------------------------------------------------------------ 941 Bra 942 A\x{391}\x{10427}\x{ff3a}\x{1fb0} 943 Ket 944 End 945------------------------------------------------------------------ 946Capture group count = 0 947Options: utf 948First code unit = 'A' 949Last code unit = \x{1fb0} 950Subject length lower bound = 5 951 952/AB\x{1fb0}/IB,utf 953------------------------------------------------------------------ 954 Bra 955 AB\x{1fb0} 956 Ket 957 End 958------------------------------------------------------------------ 959Capture group count = 0 960Options: utf 961First code unit = 'A' 962Last code unit = \x{1fb0} 963Subject length lower bound = 3 964 965/AB\x{1fb0}/IBi,utf 966------------------------------------------------------------------ 967 Bra 968 /i AB\x{1fb0} 969 Ket 970 End 971------------------------------------------------------------------ 972Capture group count = 0 973Options: caseless utf 974First code unit = 'A' (caseless) 975Last code unit = \x{1fb0} (caseless) 976Subject length lower bound = 3 977 978/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 979Capture group count = 0 980Options: caseless utf 981First code unit = \x{401} (caseless) 982Last code unit = \x{42f} (caseless) 983Subject length lower bound = 17 984 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 985 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 986 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 987 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 988 989/[ⱥ]/Bi,utf 990------------------------------------------------------------------ 991 Bra 992 /i \x{2c65} 993 Ket 994 End 995------------------------------------------------------------------ 996 997/[^ⱥ]/Bi,utf 998------------------------------------------------------------------ 999 Bra 1000 /i [^\x{2c65}] 1001 Ket 1002 End 1003------------------------------------------------------------------ 1004 1005/[[:blank:]]/B,ucp 1006------------------------------------------------------------------ 1007 Bra 1008 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] 1009 Ket 1010 End 1011------------------------------------------------------------------ 1012 1013/\x{212a}+/Ii,utf 1014Capture group count = 0 1015Options: caseless utf 1016Starting code units: K k \xff 1017Subject length lower bound = 1 1018 KKkk\x{212a} 1019 0: KKkk\x{212a} 1020 1021/s+/Ii,utf 1022Capture group count = 0 1023Options: caseless utf 1024Starting code units: S s \xff 1025Subject length lower bound = 1 1026 SSss\x{17f} 1027 0: SSss\x{17f} 1028 1029# Non-UTF characters should give errors in both 16-bit and 32-bit modes. 1030 1031/\x{110000}/utf 1032Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large 1033 1034/\o{4200000}/utf 1035Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large 1036 1037/\x{100}*A/IB,utf 1038------------------------------------------------------------------ 1039 Bra 1040 \x{100}*+ 1041 A 1042 Ket 1043 End 1044------------------------------------------------------------------ 1045Capture group count = 0 1046Options: utf 1047Starting code units: A \xff 1048Last code unit = 'A' 1049Subject length lower bound = 1 1050 A 1051 0: A 1052 1053/\x{100}*\d(?R)/IB,utf 1054------------------------------------------------------------------ 1055 Bra 1056 \x{100}*+ 1057 \d 1058 Recurse 1059 Ket 1060 End 1061------------------------------------------------------------------ 1062Capture group count = 0 1063Options: utf 1064Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff 1065Subject length lower bound = 1 1066 1067/[Z\x{100}]/IB,utf 1068------------------------------------------------------------------ 1069 Bra 1070 [Z\x{100}] 1071 Ket 1072 End 1073------------------------------------------------------------------ 1074Capture group count = 0 1075Options: utf 1076Starting code units: Z \xff 1077Subject length lower bound = 1 1078 Z\x{100} 1079 0: Z 1080 \x{100} 1081 0: \x{100} 1082 \x{100}Z 1083 0: \x{100} 1084 1085/[z-\x{100}]/IB,utf 1086------------------------------------------------------------------ 1087 Bra 1088 [z-\xff\x{100}] 1089 Ket 1090 End 1091------------------------------------------------------------------ 1092Capture group count = 0 1093Options: utf 1094Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 1095 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 1096 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 1097 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 1098 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 1099 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 1100 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 1101 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 1102 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1103Subject length lower bound = 1 1104 1105/[z\Qa-d]Ā\E]/IB,utf 1106------------------------------------------------------------------ 1107 Bra 1108 [\-\]adz\x{100}] 1109 Ket 1110 End 1111------------------------------------------------------------------ 1112Capture group count = 0 1113Options: utf 1114Starting code units: - ] a d z \xff 1115Subject length lower bound = 1 1116 \x{100} 1117 0: \x{100} 1118 Ā 1119 0: \x{100} 1120 1121/[ab\x{100}]abc(xyz(?1))/IB,utf 1122------------------------------------------------------------------ 1123 Bra 1124 [ab\x{100}] 1125 abc 1126 CBra 1 1127 xyz 1128 Recurse 1129 Ket 1130 Ket 1131 End 1132------------------------------------------------------------------ 1133Capture group count = 1 1134Options: utf 1135Starting code units: a b \xff 1136Last code unit = 'z' 1137Subject length lower bound = 7 1138 1139/\x{100}*\s/IB,utf 1140------------------------------------------------------------------ 1141 Bra 1142 \x{100}*+ 1143 \s 1144 Ket 1145 End 1146------------------------------------------------------------------ 1147Capture group count = 0 1148Options: utf 1149Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff 1150Subject length lower bound = 1 1151 1152/\x{100}*\d/IB,utf 1153------------------------------------------------------------------ 1154 Bra 1155 \x{100}*+ 1156 \d 1157 Ket 1158 End 1159------------------------------------------------------------------ 1160Capture group count = 0 1161Options: utf 1162Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff 1163Subject length lower bound = 1 1164 1165/\x{100}*\w/IB,utf 1166------------------------------------------------------------------ 1167 Bra 1168 \x{100}*+ 1169 \w 1170 Ket 1171 End 1172------------------------------------------------------------------ 1173Capture group count = 0 1174Options: utf 1175Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 1176 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 1177 \xff 1178Subject length lower bound = 1 1179 1180/\x{100}*\D/IB,utf 1181------------------------------------------------------------------ 1182 Bra 1183 \x{100}* 1184 \D 1185 Ket 1186 End 1187------------------------------------------------------------------ 1188Capture group count = 0 1189Options: utf 1190Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1191 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1192 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1193 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 1194 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 1195 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 1196 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 1197 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf 1198 \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe 1199 \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd 1200 \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc 1201 \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb 1202 \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa 1203 \xfb \xfc \xfd \xfe \xff 1204Subject length lower bound = 1 1205 1206/\x{100}*\S/IB,utf 1207------------------------------------------------------------------ 1208 Bra 1209 \x{100}* 1210 \S 1211 Ket 1212 End 1213------------------------------------------------------------------ 1214Capture group count = 0 1215Options: utf 1216Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 1217 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 1218 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 1219 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 1220 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 1221 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 1222 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 1223 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 1224 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 1225 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf 1226 \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde 1227 \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed 1228 \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc 1229 \xfd \xfe \xff 1230Subject length lower bound = 1 1231 1232/\x{100}*\W/IB,utf 1233------------------------------------------------------------------ 1234 Bra 1235 \x{100}* 1236 \W 1237 Ket 1238 End 1239------------------------------------------------------------------ 1240Capture group count = 0 1241Options: utf 1242Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1243 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1244 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1245 ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 1246 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 1247 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 1248 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 1249 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 1250 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 1251 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 1252 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 1253 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1254Subject length lower bound = 1 1255 1256/[\x{105}-\x{109}]/IBi,utf 1257------------------------------------------------------------------ 1258 Bra 1259 [\x{104}-\x{109}] 1260 Ket 1261 End 1262------------------------------------------------------------------ 1263Capture group count = 0 1264Options: caseless utf 1265Starting code units: \xff 1266Subject length lower bound = 1 1267 \x{104} 1268 0: \x{104} 1269 \x{105} 1270 0: \x{105} 1271 \x{109} 1272 0: \x{109} 1273\= Expect no match 1274 \x{100} 1275No match 1276 \x{10a} 1277No match 1278 1279/[z-\x{100}]/IBi,utf 1280------------------------------------------------------------------ 1281 Bra 1282 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1283 Ket 1284 End 1285------------------------------------------------------------------ 1286Capture group count = 0 1287Options: caseless utf 1288Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 1289 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 1290 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 1291 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 1292 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 1293 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1294 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1295 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1296 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1297 \xff 1298Subject length lower bound = 1 1299 Z 1300 0: Z 1301 z 1302 0: z 1303 \x{39c} 1304 0: \x{39c} 1305 \x{178} 1306 0: \x{178} 1307 | 1308 0: | 1309 \x{80} 1310 0: \x{80} 1311 \x{ff} 1312 0: \x{ff} 1313 \x{100} 1314 0: \x{100} 1315 \x{101} 1316 0: \x{101} 1317\= Expect no match 1318 \x{102} 1319No match 1320 Y 1321No match 1322 y 1323No match 1324 1325/[z-\x{100}]/IBi,utf 1326------------------------------------------------------------------ 1327 Bra 1328 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1329 Ket 1330 End 1331------------------------------------------------------------------ 1332Capture group count = 0 1333Options: caseless utf 1334Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 1335 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 1336 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 1337 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 1338 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 1339 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1340 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1341 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1342 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1343 \xff 1344Subject length lower bound = 1 1345 1346/\x{3a3}B/IBi,utf 1347------------------------------------------------------------------ 1348 Bra 1349 clist 03a3 03c2 03c3 1350 /i B 1351 Ket 1352 End 1353------------------------------------------------------------------ 1354Capture group count = 0 1355Options: caseless utf 1356Starting code units: \xff 1357Last code unit = 'B' (caseless) 1358Subject length lower bound = 2 1359 1360/./utf 1361 \x{110000} 1362Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0 1363 1364/(*UTF)ab������z/B 1365------------------------------------------------------------------ 1366 Bra 1367 ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z 1368 Ket 1369 End 1370------------------------------------------------------------------ 1371 1372/ab������z/utf 1373** Failed: character value greater than 0x10ffff cannot be converted to UTF 1374 1375/[\W\p{Any}]/B 1376------------------------------------------------------------------ 1377 Bra 1378 [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}] 1379 Ket 1380 End 1381------------------------------------------------------------------ 1382 abc 1383 0: a 1384 123 1385 0: 1 1386 1387/[\W\pL]/B 1388------------------------------------------------------------------ 1389 Bra 1390 [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}] 1391 Ket 1392 End 1393------------------------------------------------------------------ 1394 abc 1395 0: a 1396 \x{100} 1397 0: \x{100} 1398 \x{308} 1399 0: \x{308} 1400\= Expect no match 1401 123 1402No match 1403 1404/[\s[:^ascii:]]/B,ucp 1405------------------------------------------------------------------ 1406 Bra 1407 [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}] 1408 Ket 1409 End 1410------------------------------------------------------------------ 1411 1412/\pP/ucp 1413 \x{7fffffff} 1414No match 1415 1416# A special extra option allows excaped surrogate code points in 32-bit mode, 1417# but subjects containing them must not be UTF-checked. These patterns give 1418# errors in 16-bit mode. 1419 1420/\x{d800}/I,utf,allow_surrogate_escapes 1421Capture group count = 0 1422Options: utf 1423Extra options: allow_surrogate_escapes 1424First code unit = \x{d800} 1425Subject length lower bound = 1 1426 \x{d800}\=no_utf_check 1427 0: \x{d800} 1428 1429/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes 1430 \x{dfff}\x{df01}\=no_utf_check 1431 0: \x{dfff}\x{df01} 1432 1433# This has different starting code units in 8-bit mode. 1434 1435/^[^ab]/IB,utf 1436------------------------------------------------------------------ 1437 Bra 1438 ^ 1439 [\x00-`c-\xff] (neg) 1440 Ket 1441 End 1442------------------------------------------------------------------ 1443Capture group count = 0 1444Compile options: utf 1445Overall options: anchored utf 1446Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1447 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1448 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 1449 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 1450 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 1451 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e 1452 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d 1453 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac 1454 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb 1455 \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca 1456 \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 1457 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 1458 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 1459 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1460Subject length lower bound = 1 1461 c 1462 0: c 1463 \x{ff} 1464 0: \x{ff} 1465 \x{100} 1466 0: \x{100} 1467\= Expect no match 1468 aaa 1469No match 1470 1471# Offsets are different in 8-bit mode. 1472 1473/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout 1474 123abcáyzabcdef789abcሴqr 1475 1(2) Old 6 6 "" New 6 8 "<>" 1476 2(2) Old 12 12 "" New 14 16 "<>" 1477 3(2) Old 12 15 "def" New 16 21 "<def>" 1478 4(2) Old 21 21 "" New 27 29 "<>" 1479 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr 1480 1481# A few script run tests in non-UTF mode (but they need Unicode support) 1482 1483/^(*script_run:.{4})/ 1484 \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han 1485 0: \x{3041}\x{30a1}\x{3007}\x{3007} 1486 \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han 1487 0: \x{30a1}\x{3041}\x{3007}\x{3007} 1488 \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul 1489 0: \x{1100}\x{2e80}\x{2e80}\x{1101} 1490 1491/^(*sr:.*)/utf,allow_surrogate_escapes 1492 \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana 1493 0: \x{2e80}\x{3105}\x{2e80} 1494 \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check 1495 0: \x{d800} 1496 1497/(?(n/utf 1498Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) 1499 1500/(?(á/utf 1501Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) 1502 1503# Invalid UTF-16/32 tests. 1504 1505/.../g,match_invalid_utf 1506 abcd\x{df00}wxzy\x{df00}pqrs 1507 0: abc 1508 0: wxz 1509 0: pqr 1510 abcd\x{80}wxzy\x{df00}pqrs 1511 0: abc 1512 0: d\x{80}w 1513 0: xzy 1514 0: pqr 1515 1516/abc/match_invalid_utf 1517 ab\x{df00}ab\=ph 1518Partial match: ab 1519\= Expect no match 1520 ab\x{df00}cdef\=ph 1521No match 1522 1523/.a/match_invalid_utf 1524 ab\=ph 1525Partial match: b 1526 ab\=ps 1527Partial match: b 1528\= Expect no match 1529 b\x{df00}\=ph 1530No match 1531 b\x{df00}\=ps 1532No match 1533 1534/.a$/match_invalid_utf 1535 ab\=ph 1536Partial match: b 1537 ab\=ps 1538Partial match: b 1539\= Expect no match 1540 b\x{df00}\=ph 1541No match 1542 b\x{df00}\=ps 1543No match 1544 1545/ab$/match_invalid_utf 1546 ab\x{df00}cdeab 1547 0: ab 1548\= Expect no match 1549 ab\x{df00}cde 1550No match 1551 1552/.../g,match_invalid_utf 1553 abcd\x{80}wxzy\x{df00}pqrs 1554 0: abc 1555 0: d\x{80}w 1556 0: xzy 1557 0: pqr 1558 1559/(?<=x)../g,match_invalid_utf 1560 abcd\x{80}wxzy\x{df00}pqrs 1561 0: zy 1562 abcd\x{80}wxzy\x{df00}xpqrs 1563 0: zy 1564 0: pq 1565 1566/X$/match_invalid_utf 1567\= Expect no match 1568 X\x{df00} 1569No match 1570 1571/(?<=..)X/match_invalid_utf,aftertext 1572 AB\x{df00}AQXYZ 1573 0: X 1574 0+ YZ 1575 AB\x{df00}AQXYZ\=offset=5 1576 0: X 1577 0+ YZ 1578 AB\x{df00}\x{df00}AXYZXC\=offset=5 1579 0: X 1580 0+ C 1581\= Expect no match 1582 AB\x{df00}XYZ 1583No match 1584 AB\x{df00}XYZ\=offset=3 1585No match 1586 AB\x{df00}AXYZ 1587No match 1588 AB\x{df00}AXYZ\=offset=4 1589No match 1590 AB\x{df00}\x{df00}AXYZ\=offset=5 1591No match 1592 1593/.../match_invalid_utf 1594\= Expect no match 1595 A\x{d800}B 1596No match 1597 A\x{110000}B 1598No match 1599 1600/aa/utf,ucp,match_invalid_utf,global 1601 aa\x{d800}aa 1602 0: aa 1603 0: aa 1604 1605/aa/utf,ucp,match_invalid_utf,global 1606 \x{d800}aa 1607 0: aa 1608 1609/A\z/utf,match_invalid_utf 1610 A\x{df00}\n 1611No match 1612 1613# ---------------------------------------------------- 1614 1615/(*UTF)(?=\x{123})/I 1616Capture group count = 0 1617May match empty string 1618Compile options: <none> 1619Overall options: utf 1620First code unit = \x{123} 1621Subject length lower bound = 1 1622 1623/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf 1624Capture group count = 0 1625Options: utf 1626First code unit = \xc1 (caseless) 1627Last code unit = \x{145} (caseless) 1628Subject length lower bound = 3 1629 1630/[\xff\x{ffff}]/I,utf 1631Capture group count = 0 1632Options: utf 1633Starting code units: \xff 1634Subject length lower bound = 1 1635 1636/[\xff\x{ff}]/I,utf 1637Capture group count = 0 1638Options: utf 1639Starting code units: \xff 1640Subject length lower bound = 1 1641 1642/[\xff\x{ff}]/I 1643Capture group count = 0 1644Starting code units: \xff 1645Subject length lower bound = 1 1646 1647/[Ss]/I 1648Capture group count = 0 1649First code unit = 'S' (caseless) 1650Subject length lower bound = 1 1651 1652/[Ss]/I,utf 1653Capture group count = 0 1654Options: utf 1655Starting code units: S s 1656Subject length lower bound = 1 1657 1658/(?:\x{ff}|\x{3000})/I,utf 1659Capture group count = 0 1660Options: utf 1661Starting code units: \xff 1662Subject length lower bound = 1 1663 1664# ---------------------------------------------------- 1665# UCP and casing tests 1666 1667/\x{120}/i,I 1668Capture group count = 0 1669Options: caseless 1670First code unit = \x{120} 1671Subject length lower bound = 1 1672 1673/\x{c1}/i,I,ucp 1674Capture group count = 0 1675Options: caseless ucp 1676First code unit = \xc1 (caseless) 1677Subject length lower bound = 1 1678 1679/[\x{120}\x{121}]/iB,ucp 1680------------------------------------------------------------------ 1681 Bra 1682 /i \x{120} 1683 Ket 1684 End 1685------------------------------------------------------------------ 1686 1687/[ab\x{120}]+/iB,ucp 1688------------------------------------------------------------------ 1689 Bra 1690 [ABab\x{120}-\x{121}]++ 1691 Ket 1692 End 1693------------------------------------------------------------------ 1694 aABb\x{121}\x{120} 1695 0: aABb\x{121}\x{120} 1696 1697/\x{c1}/i,no_start_optimize 1698\= Expect no match 1699 \x{e1} 1700No match 1701 1702/\x{120}\x{c1}/i,ucp,no_start_optimize 1703 \x{121}\x{e1} 1704 0: \x{121}\xe1 1705 1706/\x{120}\x{c1}/i,ucp 1707 \x{121}\x{e1} 1708 0: \x{121}\xe1 1709 1710/[^\x{120}]/i,no_start_optimize 1711 \x{121} 1712 0: \x{121} 1713 1714/[^\x{120}]/i,ucp,no_start_optimize 1715\= Expect no match 1716 \x{121} 1717No match 1718 1719/[^\x{120}]/i 1720 \x{121} 1721 0: \x{121} 1722 1723/[^\x{120}]/i,ucp 1724\= Expect no match 1725 \x{121} 1726No match 1727 1728/\x{120}{2}/i,ucp 1729 \x{121}\x{121} 1730 0: \x{121}\x{121} 1731 1732/[^\x{120}]{2}/i,ucp 1733\= Expect no match 1734 \x{121}\x{121} 1735No match 1736 1737/\x{c1}+\x{e1}/iB,ucp 1738------------------------------------------------------------------ 1739 Bra 1740 /i \x{c1}+ 1741 /i \x{e1} 1742 Ket 1743 End 1744------------------------------------------------------------------ 1745 \x{c1}\x{c1}\x{c1} 1746 0: \xc1\xc1\xc1 1747 1748/\x{c1}+\x{e1}/iIB,ucp 1749------------------------------------------------------------------ 1750 Bra 1751 /i \x{c1}+ 1752 /i \x{e1} 1753 Ket 1754 End 1755------------------------------------------------------------------ 1756Capture group count = 0 1757Options: caseless ucp 1758First code unit = \xc1 (caseless) 1759Last code unit = \xe1 (caseless) 1760Subject length lower bound = 2 1761 \x{c1}\x{c1}\x{c1} 1762 0: \xc1\xc1\xc1 1763 \x{e1}\x{e1}\x{e1} 1764 0: \xe1\xe1\xe1 1765 1766/a|\x{c1}/iI,ucp 1767Capture group count = 0 1768Options: caseless ucp 1769Starting code units: A a \xc1 \xe1 1770Subject length lower bound = 1 1771 \x{e1}xxx 1772 0: \xe1 1773 1774/\x{c1}|\x{e1}/iI,ucp 1775Capture group count = 0 1776Options: caseless ucp 1777First code unit = \xc1 (caseless) 1778Subject length lower bound = 1 1779 1780/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended 1781 X\x{e1}Y 1782 1: >\xc1< 1783 1784/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended 1785 X\x{121}Y 1786 1: >\x{120}< 1787 1788/s/i,ucp 1789 \x{17f} 1790 0: \x{17f} 1791 1792/s/i,utf 1793 \x{17f} 1794 0: \x{17f} 1795 1796/[^s]/i,ucp 1797\= Expect no match 1798 \x{17f} 1799No match 1800 1801/[^s]/i,utf 1802\= Expect no match 1803 \x{17f} 1804No match 1805 1806# ---------------------------------------------------- 1807 1808# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This 1809# fails in 16-bit mode, but is OK for 32-bit. 1810 1811/\x{802a0000}*/ 1812 \x{802a0000}\x{802a0000} 1813 0: \x{802a0000}\x{802a0000} 1814 1815# UTF matching without UTF, check invalid UTF characters 1816/\X++/ 1817 a\x{110000}\x{ffffffff} 1818 0: a\x{110000}\x{ffffffff} 1819 1820# This used to loop in 32-bit mode; it will fail in 16-bit mode. 1821/[\x{ffffffff}]/caseless,ucp 1822 \x{ffffffff}xyz 1823 0: \x{ffffffff} 1824 1825# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They 1826# will give errors in 16-bit mode. 1827 1828/k*\x{ffffffff}/caseless,ucp 1829 \x{ffffffff} 1830 0: \x{ffffffff} 1831 1832/k+\x{ffffffff}/caseless,ucp,no_start_optimize 1833 K\x{ffffffff} 1834 0: K\x{ffffffff} 1835\= Expect no match 1836 \x{ffffffff}\x{ffffffff} 1837No match 1838 1839/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize 1840\= Expect no match 1841 \x{ffffffff}\x{ffffffff}\x{ffffffff} 1842No match 1843 1844/k\x{ffffffff}/caseless,ucp,no_start_optimize 1845 K\x{ffffffff} 1846 0: K\x{ffffffff} 1847\= Expect no match 1848 \x{ffffffff}\x{ffffffff}\x{ffffffff} 1849No match 1850 1851/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess 1852\= Expect no match 1853 Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z 1854No match 1855 1856# --------------------------------------------------------- 1857 1858# End of testinput12 1859