1# This set of tests is for UTF-8 support and Unicode property support, with 2# relevance only for the 8-bit library. 3 4#newline_default lf any anycrlf 5 6# The next 5 patterns have UTF-8 errors 7 8/[�]/utf 9Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80 10 11/�/utf 12Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end 13 14/���xxx/utf 15Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80 16 17/Â��������/utf 18Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set 19 20/Â��������/match_invalid_utf 21Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set 22 23# Now test subjects 24 25/badutf/utf 26\= Expect UTF-8 errors 27 X\xdf 28Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1 29 XX\xef 30Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 31 XXX\xef\x80 32Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 33 X\xf7 34Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1 35 XX\xf7\x80 36Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 37 XXX\xf7\x80\x80 38Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 39 \xfb 40Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 41 \xfb\x80 42Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 43 \xfb\x80\x80 44Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 45 \xfb\x80\x80\x80 46Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 47 \xfd 48Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0 49 \xfd\x80 50Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 51 \xfd\x80\x80 52Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 53 \xfd\x80\x80\x80 54Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 55 \xfd\x80\x80\x80\x80 56Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 57 \xdf\x7f 58Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 59 \xef\x7f\x80 60Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 61 \xef\x80\x7f 62Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 63 \xf7\x7f\x80\x80 64Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 65 \xf7\x80\x7f\x80 66Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 67 \xf7\x80\x80\x7f 68Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0 69 \xfb\x7f\x80\x80\x80 70Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 71 \xfb\x80\x7f\x80\x80 72Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 73 \xfb\x80\x80\x7f\x80 74Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0 75 \xfb\x80\x80\x80\x7f 76Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0 77 \xfd\x7f\x80\x80\x80\x80 78Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 79 \xfd\x80\x7f\x80\x80\x80 80Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 81 \xfd\x80\x80\x7f\x80\x80 82Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0 83 \xfd\x80\x80\x80\x7f\x80 84Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0 85 \xfd\x80\x80\x80\x80\x7f 86Failed: error -12: UTF-8 error: byte 6 top bits not 0x80 at offset 0 87 \xed\xa0\x80 88Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 89 \xc0\x8f 90Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 0 91 \xe0\x80\x8f 92Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 0 93 \xf0\x80\x80\x8f 94Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0 95 \xf8\x80\x80\x80\x8f 96Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0 97 \xfc\x80\x80\x80\x80\x8f 98Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 99 \x80 100Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 101 \xfe 102Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 103 \xff 104Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 105 106/badutf/utf 107\= Expect UTF-8 errors 108 XX\xfb\x80\x80\x80\x80 109Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2 110 XX\xfd\x80\x80\x80\x80\x80 111Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 2 112 XX\xf7\xbf\xbf\xbf 113Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2 114 115/shortutf/utf 116\= Expect UTF-8 errors 117 XX\xdf\=ph 118Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 119 XX\xef\=ph 120Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 121 XX\xef\x80\=ph 122Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 123 \xf7\=ph 124Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 125 \xf7\x80\=ph 126Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 127 \xf7\x80\x80\=ph 128Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 129 \xfb\=ph 130Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 131 \xfb\x80\=ph 132Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 133 \xfb\x80\x80\=ph 134Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 135 \xfb\x80\x80\x80\=ph 136Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 137 \xfd\=ph 138Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0 139 \xfd\x80\=ph 140Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 141 \xfd\x80\x80\=ph 142Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 143 \xfd\x80\x80\x80\=ph 144Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 145 \xfd\x80\x80\x80\x80\=ph 146Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 147 148/anything/utf 149\= Expect UTF-8 errors 150 X\xc0\x80 151Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1 152 XX\xc1\x8f 153Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 2 154 XXX\xe0\x9f\x80 155Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 3 156 \xf0\x8f\x80\x80 157Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0 158 \xf8\x87\x80\x80\x80 159Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0 160 \xfc\x83\x80\x80\x80\x80 161Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 162 \xfe\x80\x80\x80\x80\x80 163Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 164 \xff\x80\x80\x80\x80\x80 165Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 166 \xf8\x88\x80\x80\x80 167Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 168 \xf9\x87\x80\x80\x80 169Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 170 \xfc\x84\x80\x80\x80\x80 171Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 172 \xfd\x83\x80\x80\x80\x80 173Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 174\= Expect no match 175 \xc3\x8f 176No match 177 \xe0\xaf\x80 178No match 179 \xe1\x80\x80 180No match 181 \xf0\x9f\x80\x80 182No match 183 \xf1\x8f\x80\x80 184No match 185 \xf8\x88\x80\x80\x80\=no_utf_check 186No match 187 \xf9\x87\x80\x80\x80\=no_utf_check 188No match 189 \xfc\x84\x80\x80\x80\x80\=no_utf_check 190No match 191 \xfd\x83\x80\x80\x80\x80\=no_utf_check 192No match 193 194# Similar tests with offsets 195 196/badutf/utf 197\= Expect UTF-8 errors 198 X\xdfabcd 199Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 200 X\xdfabcd\=offset=1 201Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 202\= Expect no match 203 X\xdfabcd\=offset=2 204No match 205 206/(?<=x)badutf/utf 207\= Expect UTF-8 errors 208 X\xdfabcd 209Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 210 X\xdfabcd\=offset=1 211Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 212 X\xdfabcd\=offset=2 213Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 214 X\xdfabcd\xdf\=offset=3 215Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6 216\= Expect no match 217 X\xdfabcd\=offset=3 218No match 219 220/(?<=xx)badutf/utf 221\= Expect UTF-8 errors 222 X\xdfabcd 223Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 224 X\xdfabcd\=offset=1 225Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 226 X\xdfabcd\=offset=2 227Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 228 X\xdfabcd\=offset=3 229Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 230 231/(?<=xxxx)badutf/utf 232\= Expect UTF-8 errors 233 X\xdfabcd 234Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 235 X\xdfabcd\=offset=1 236Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 237 X\xdfabcd\=offset=2 238Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 239 X\xdfabcd\=offset=3 240Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 241 X\xdfabc\xdf\=offset=6 242Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5 243 X\xdfabc\xdf\=offset=7 244Failed: error -33: bad offset value 245\= Expect no match 246 X\xdfabcd\=offset=6 247No match 248 249/\x{100}/IB,utf 250------------------------------------------------------------------ 251 Bra 252 \x{100} 253 Ket 254 End 255------------------------------------------------------------------ 256Capture group count = 0 257Options: utf 258First code unit = \xc4 259Last code unit = \x80 260Subject length lower bound = 1 261 262/\x{1000}/IB,utf 263------------------------------------------------------------------ 264 Bra 265 \x{1000} 266 Ket 267 End 268------------------------------------------------------------------ 269Capture group count = 0 270Options: utf 271First code unit = \xe1 272Last code unit = \x80 273Subject length lower bound = 1 274 275/\x{10000}/IB,utf 276------------------------------------------------------------------ 277 Bra 278 \x{10000} 279 Ket 280 End 281------------------------------------------------------------------ 282Capture group count = 0 283Options: utf 284First code unit = \xf0 285Last code unit = \x80 286Subject length lower bound = 1 287 288/\x{100000}/IB,utf 289------------------------------------------------------------------ 290 Bra 291 \x{100000} 292 Ket 293 End 294------------------------------------------------------------------ 295Capture group count = 0 296Options: utf 297First code unit = \xf4 298Last code unit = \x80 299Subject length lower bound = 1 300 301/\x{10ffff}/IB,utf 302------------------------------------------------------------------ 303 Bra 304 \x{10ffff} 305 Ket 306 End 307------------------------------------------------------------------ 308Capture group count = 0 309Options: utf 310First code unit = \xf4 311Last code unit = \xbf 312Subject length lower bound = 1 313 314/[\x{ff}]/IB,utf 315------------------------------------------------------------------ 316 Bra 317 \x{ff} 318 Ket 319 End 320------------------------------------------------------------------ 321Capture group count = 0 322Options: utf 323First code unit = \xc3 324Last code unit = \xbf 325Subject length lower bound = 1 326 327/[\x{100}]/IB,utf 328------------------------------------------------------------------ 329 Bra 330 \x{100} 331 Ket 332 End 333------------------------------------------------------------------ 334Capture group count = 0 335Options: utf 336First code unit = \xc4 337Last code unit = \x80 338Subject length lower bound = 1 339 340/\x80/IB,utf 341------------------------------------------------------------------ 342 Bra 343 \x{80} 344 Ket 345 End 346------------------------------------------------------------------ 347Capture group count = 0 348Options: utf 349First code unit = \xc2 350Last code unit = \x80 351Subject length lower bound = 1 352 353/\xff/IB,utf 354------------------------------------------------------------------ 355 Bra 356 \x{ff} 357 Ket 358 End 359------------------------------------------------------------------ 360Capture group count = 0 361Options: utf 362First code unit = \xc3 363Last code unit = \xbf 364Subject length lower bound = 1 365 366/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 367------------------------------------------------------------------ 368 Bra 369 \x{d55c}\x{ad6d}\x{c5b4} 370 Ket 371 End 372------------------------------------------------------------------ 373Capture group count = 0 374Options: utf 375First code unit = \xed 376Last code unit = \xb4 377Subject length lower bound = 3 378 \x{D55c}\x{ad6d}\x{C5B4} 379 0: \x{d55c}\x{ad6d}\x{c5b4} 380 381/\x{65e5}\x{672c}\x{8a9e}/IB,utf 382------------------------------------------------------------------ 383 Bra 384 \x{65e5}\x{672c}\x{8a9e} 385 Ket 386 End 387------------------------------------------------------------------ 388Capture group count = 0 389Options: utf 390First code unit = \xe6 391Last code unit = \x9e 392Subject length lower bound = 3 393 \x{65e5}\x{672c}\x{8a9e} 394 0: \x{65e5}\x{672c}\x{8a9e} 395 396/\x{80}/IB,utf 397------------------------------------------------------------------ 398 Bra 399 \x{80} 400 Ket 401 End 402------------------------------------------------------------------ 403Capture group count = 0 404Options: utf 405First code unit = \xc2 406Last code unit = \x80 407Subject length lower bound = 1 408 409/\x{084}/IB,utf 410------------------------------------------------------------------ 411 Bra 412 \x{84} 413 Ket 414 End 415------------------------------------------------------------------ 416Capture group count = 0 417Options: utf 418First code unit = \xc2 419Last code unit = \x84 420Subject length lower bound = 1 421 422/\x{104}/IB,utf 423------------------------------------------------------------------ 424 Bra 425 \x{104} 426 Ket 427 End 428------------------------------------------------------------------ 429Capture group count = 0 430Options: utf 431First code unit = \xc4 432Last code unit = \x84 433Subject length lower bound = 1 434 435/\x{861}/IB,utf 436------------------------------------------------------------------ 437 Bra 438 \x{861} 439 Ket 440 End 441------------------------------------------------------------------ 442Capture group count = 0 443Options: utf 444First code unit = \xe0 445Last code unit = \xa1 446Subject length lower bound = 1 447 448/\x{212ab}/IB,utf 449------------------------------------------------------------------ 450 Bra 451 \x{212ab} 452 Ket 453 End 454------------------------------------------------------------------ 455Capture group count = 0 456Options: utf 457First code unit = \xf0 458Last code unit = \xab 459Subject length lower bound = 1 460 461/[^ab\xC0-\xF0]/IB,utf 462------------------------------------------------------------------ 463 Bra 464 [\x00-`c-\xbf\xf1-\xff] (neg) 465 Ket 466 End 467------------------------------------------------------------------ 468Capture group count = 0 469Options: utf 470Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 471 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 472 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 473 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 474 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 475 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 476 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 477 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 478 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 479 \xfe \xff 480Subject length lower bound = 1 481 \x{f1} 482 0: \x{f1} 483 \x{bf} 484 0: \x{bf} 485 \x{100} 486 0: \x{100} 487 \x{1000} 488 0: \x{1000} 489\= Expect no match 490 \x{c0} 491No match 492 \x{f0} 493No match 494 495/Ā{3,4}/IB,utf 496------------------------------------------------------------------ 497 Bra 498 \x{100}{3} 499 \x{100}?+ 500 Ket 501 End 502------------------------------------------------------------------ 503Capture group count = 0 504Options: utf 505First code unit = \xc4 506Last code unit = \x80 507Subject length lower bound = 3 508 \x{100}\x{100}\x{100}\x{100\x{100} 509 0: \x{100}\x{100}\x{100} 510 511/(\x{100}+|x)/IB,utf 512------------------------------------------------------------------ 513 Bra 514 CBra 1 515 \x{100}++ 516 Alt 517 x 518 Ket 519 Ket 520 End 521------------------------------------------------------------------ 522Capture group count = 1 523Options: utf 524Starting code units: x \xc4 525Subject length lower bound = 1 526 527/(\x{100}*a|x)/IB,utf 528------------------------------------------------------------------ 529 Bra 530 CBra 1 531 \x{100}*+ 532 a 533 Alt 534 x 535 Ket 536 Ket 537 End 538------------------------------------------------------------------ 539Capture group count = 1 540Options: utf 541Starting code units: a x \xc4 542Subject length lower bound = 1 543 544/(\x{100}{0,2}a|x)/IB,utf 545------------------------------------------------------------------ 546 Bra 547 CBra 1 548 \x{100}{0,2}+ 549 a 550 Alt 551 x 552 Ket 553 Ket 554 End 555------------------------------------------------------------------ 556Capture group count = 1 557Options: utf 558Starting code units: a x \xc4 559Subject length lower bound = 1 560 561/(\x{100}{1,2}a|x)/IB,utf 562------------------------------------------------------------------ 563 Bra 564 CBra 1 565 \x{100} 566 \x{100}{0,1}+ 567 a 568 Alt 569 x 570 Ket 571 Ket 572 End 573------------------------------------------------------------------ 574Capture group count = 1 575Options: utf 576Starting code units: x \xc4 577Subject length lower bound = 1 578 579/\x{100}/IB,utf 580------------------------------------------------------------------ 581 Bra 582 \x{100} 583 Ket 584 End 585------------------------------------------------------------------ 586Capture group count = 0 587Options: utf 588First code unit = \xc4 589Last code unit = \x80 590Subject length lower bound = 1 591 592/a\x{100}\x{101}*/IB,utf 593------------------------------------------------------------------ 594 Bra 595 a\x{100} 596 \x{101}*+ 597 Ket 598 End 599------------------------------------------------------------------ 600Capture group count = 0 601Options: utf 602First code unit = 'a' 603Last code unit = \x80 604Subject length lower bound = 2 605 606/a\x{100}\x{101}+/IB,utf 607------------------------------------------------------------------ 608 Bra 609 a\x{100} 610 \x{101}++ 611 Ket 612 End 613------------------------------------------------------------------ 614Capture group count = 0 615Options: utf 616First code unit = 'a' 617Last code unit = \x81 618Subject length lower bound = 3 619 620/[^\x{c4}]/IB 621------------------------------------------------------------------ 622 Bra 623 [^\x{c4}] 624 Ket 625 End 626------------------------------------------------------------------ 627Capture group count = 0 628Subject length lower bound = 1 629 630/[\x{100}]/IB,utf 631------------------------------------------------------------------ 632 Bra 633 \x{100} 634 Ket 635 End 636------------------------------------------------------------------ 637Capture group count = 0 638Options: utf 639First code unit = \xc4 640Last code unit = \x80 641Subject length lower bound = 1 642 \x{100} 643 0: \x{100} 644 Z\x{100} 645 0: \x{100} 646 \x{100}Z 647 0: \x{100} 648 649/[\xff]/IB,utf 650------------------------------------------------------------------ 651 Bra 652 \x{ff} 653 Ket 654 End 655------------------------------------------------------------------ 656Capture group count = 0 657Options: utf 658First code unit = \xc3 659Last code unit = \xbf 660Subject length lower bound = 1 661 >\x{ff}< 662 0: \x{ff} 663 664/[^\xff]/IB,utf 665------------------------------------------------------------------ 666 Bra 667 [^\x{ff}] 668 Ket 669 End 670------------------------------------------------------------------ 671Capture group count = 0 672Options: utf 673Subject length lower bound = 1 674 675/\x{100}abc(xyz(?1))/IB,utf 676------------------------------------------------------------------ 677 Bra 678 \x{100}abc 679 CBra 1 680 xyz 681 Recurse 682 Ket 683 Ket 684 End 685------------------------------------------------------------------ 686Capture group count = 1 687Options: utf 688First code unit = \xc4 689Last code unit = 'z' 690Subject length lower bound = 7 691 692/\777/I,utf 693Capture group count = 0 694Options: utf 695First code unit = \xc7 696Last code unit = \xbf 697Subject length lower bound = 1 698 \x{1ff} 699 0: \x{1ff} 700 \777 701 0: \x{1ff} 702 703/\x{100}+\x{200}/IB,utf 704------------------------------------------------------------------ 705 Bra 706 \x{100}++ 707 \x{200} 708 Ket 709 End 710------------------------------------------------------------------ 711Capture group count = 0 712Options: utf 713First code unit = \xc4 714Last code unit = \x80 715Subject length lower bound = 2 716 717/\x{100}+X/IB,utf 718------------------------------------------------------------------ 719 Bra 720 \x{100}++ 721 X 722 Ket 723 End 724------------------------------------------------------------------ 725Capture group count = 0 726Options: utf 727First code unit = \xc4 728Last code unit = 'X' 729Subject length lower bound = 2 730 731/^[\QĀ\E-\QŐ\E/B,utf 732Failed: error 106 at offset 15: missing terminating ] for character class 733 734# This tests the stricter UTF-8 check according to RFC 3629. 735 736/X/utf 737\= Expect UTF-8 errors 738 \x{d800} 739Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 740 \x{da00} 741Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 742 \x{dfff} 743Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 744 \x{110000} 745Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0 746 \x{2000000} 747Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 748 \x{7fffffff} 749Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 750\= Expect no match 751 \x{d800}\=no_utf_check 752No match 753 \x{da00}\=no_utf_check 754No match 755 \x{dfff}\=no_utf_check 756No match 757 \x{110000}\=no_utf_check 758No match 759 \x{2000000}\=no_utf_check 760No match 761 \x{7fffffff}\=no_utf_check 762No match 763 764/(*UTF8)\x{1234}/ 765 abcd\x{1234}pqr 766 0: \x{1234} 767 768/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I 769Capture group count = 0 770Compile options: <none> 771Overall options: utf 772\R matches any Unicode newline 773Forced newline is CRLF 774First code unit = 'a' 775Last code unit = 'b' 776Subject length lower bound = 3 777 778/\h/I,utf 779Capture group count = 0 780Options: utf 781Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3 782Subject length lower bound = 1 783 ABC\x{09} 784 0: \x{09} 785 ABC\x{20} 786 0: 787 ABC\x{a0} 788 0: \x{a0} 789 ABC\x{1680} 790 0: \x{1680} 791 ABC\x{180e} 792 0: \x{180e} 793 ABC\x{2000} 794 0: \x{2000} 795 ABC\x{202f} 796 0: \x{202f} 797 ABC\x{205f} 798 0: \x{205f} 799 ABC\x{3000} 800 0: \x{3000} 801 802/\v/I,utf 803Capture group count = 0 804Options: utf 805Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 806Subject length lower bound = 1 807 ABC\x{0a} 808 0: \x{0a} 809 ABC\x{0b} 810 0: \x{0b} 811 ABC\x{0c} 812 0: \x{0c} 813 ABC\x{0d} 814 0: \x{0d} 815 ABC\x{85} 816 0: \x{85} 817 ABC\x{2028} 818 0: \x{2028} 819 820/\h*A/I,utf 821Capture group count = 0 822Options: utf 823Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 824Last code unit = 'A' 825Subject length lower bound = 1 826 CDBABC 827 0: A 828 829/\v+A/I,utf 830Capture group count = 0 831Options: utf 832Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 833Last code unit = 'A' 834Subject length lower bound = 2 835 836/\s?xxx\s/I,utf 837Capture group count = 0 838Options: utf 839Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x 840Last code unit = 'x' 841Subject length lower bound = 4 842 843/\sxxx\s/I,utf,tables=2 844Capture group count = 0 845Options: utf 846Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 847Last code unit = 'x' 848Subject length lower bound = 5 849 AB\x{85}xxx\x{a0}XYZ 850 0: \x{85}xxx\x{a0} 851 AB\x{a0}xxx\x{85}XYZ 852 0: \x{a0}xxx\x{85} 853 854/\S \S/I,utf,tables=2 855Capture group count = 0 856Options: utf 857Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 858 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 859 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 860 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 861 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 862 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 863 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 864 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 865 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 866Last code unit = ' ' 867Subject length lower bound = 3 868 \x{a2} \x{84} 869 0: \x{a2} \x{84} 870 A Z 871 0: A Z 872 873/a+/utf 874 a\x{123}aa\=offset=1 875 0: aa 876 a\x{123}aa\=offset=3 877 0: aa 878 a\x{123}aa\=offset=4 879 0: a 880\= Expect bad offset value 881 a\x{123}aa\=offset=6 882Failed: error -33: bad offset value 883\= Expect bad UTF-8 offset 884 a\x{123}aa\=offset=2 885Error -36 (bad UTF-8 offset) 886\= Expect no match 887 a\x{123}aa\=offset=5 888No match 889 890/\x{1234}+/Ii,utf 891Capture group count = 0 892Options: caseless utf 893Starting code units: \xe1 894Subject length lower bound = 1 895 896/\x{1234}+?/Ii,utf 897Capture group count = 0 898Options: caseless utf 899Starting code units: \xe1 900Subject length lower bound = 1 901 902/\x{1234}++/Ii,utf 903Capture group count = 0 904Options: caseless utf 905Starting code units: \xe1 906Subject length lower bound = 1 907 908/\x{1234}{2}/Ii,utf 909Capture group count = 0 910Options: caseless utf 911Starting code units: \xe1 912Subject length lower bound = 2 913 914/[^\x{c4}]/IB,utf 915------------------------------------------------------------------ 916 Bra 917 [^\x{c4}] 918 Ket 919 End 920------------------------------------------------------------------ 921Capture group count = 0 922Options: utf 923Subject length lower bound = 1 924 925/X+\x{200}/IB,utf 926------------------------------------------------------------------ 927 Bra 928 X++ 929 \x{200} 930 Ket 931 End 932------------------------------------------------------------------ 933Capture group count = 0 934Options: utf 935First code unit = 'X' 936Last code unit = \x80 937Subject length lower bound = 2 938 939/\R/I,utf 940Capture group count = 0 941Options: utf 942Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 943Subject length lower bound = 1 944 945/\777/IB,utf 946------------------------------------------------------------------ 947 Bra 948 \x{1ff} 949 Ket 950 End 951------------------------------------------------------------------ 952Capture group count = 0 953Options: utf 954First code unit = \xc7 955Last code unit = \xbf 956Subject length lower bound = 1 957 958/\w+\x{C4}/B,utf 959------------------------------------------------------------------ 960 Bra 961 \w++ 962 \x{c4} 963 Ket 964 End 965------------------------------------------------------------------ 966 a\x{C4}\x{C4} 967 0: a\x{c4} 968 969/\w+\x{C4}/B,utf,tables=2 970------------------------------------------------------------------ 971 Bra 972 \w+ 973 \x{c4} 974 Ket 975 End 976------------------------------------------------------------------ 977 a\x{C4}\x{C4} 978 0: a\x{c4}\x{c4} 979 980/\W+\x{C4}/B,utf 981------------------------------------------------------------------ 982 Bra 983 \W+ 984 \x{c4} 985 Ket 986 End 987------------------------------------------------------------------ 988 !\x{C4} 989 0: !\x{c4} 990 991/\W+\x{C4}/B,utf,tables=2 992------------------------------------------------------------------ 993 Bra 994 \W++ 995 \x{c4} 996 Ket 997 End 998------------------------------------------------------------------ 999 !\x{C4} 1000 0: !\x{c4} 1001 1002/\W+\x{A1}/B,utf 1003------------------------------------------------------------------ 1004 Bra 1005 \W+ 1006 \x{a1} 1007 Ket 1008 End 1009------------------------------------------------------------------ 1010 !\x{A1} 1011 0: !\x{a1} 1012 1013/\W+\x{A1}/B,utf,tables=2 1014------------------------------------------------------------------ 1015 Bra 1016 \W+ 1017 \x{a1} 1018 Ket 1019 End 1020------------------------------------------------------------------ 1021 !\x{A1} 1022 0: !\x{a1} 1023 1024/X\s+\x{A0}/B,utf 1025------------------------------------------------------------------ 1026 Bra 1027 X 1028 \s++ 1029 \x{a0} 1030 Ket 1031 End 1032------------------------------------------------------------------ 1033 X\x20\x{A0}\x{A0} 1034 0: X \x{a0} 1035 1036/X\s+\x{A0}/B,utf,tables=2 1037------------------------------------------------------------------ 1038 Bra 1039 X 1040 \s+ 1041 \x{a0} 1042 Ket 1043 End 1044------------------------------------------------------------------ 1045 X\x20\x{A0}\x{A0} 1046 0: X \x{a0}\x{a0} 1047 1048/\S+\x{A0}/B,utf 1049------------------------------------------------------------------ 1050 Bra 1051 \S+ 1052 \x{a0} 1053 Ket 1054 End 1055------------------------------------------------------------------ 1056 X\x{A0}\x{A0} 1057 0: X\x{a0}\x{a0} 1058 1059/\S+\x{A0}/B,utf,tables=2 1060------------------------------------------------------------------ 1061 Bra 1062 \S++ 1063 \x{a0} 1064 Ket 1065 End 1066------------------------------------------------------------------ 1067 X\x{A0}\x{A0} 1068 0: X\x{a0} 1069 1070/\x{a0}+\s!/B,utf 1071------------------------------------------------------------------ 1072 Bra 1073 \x{a0}++ 1074 \s 1075 ! 1076 Ket 1077 End 1078------------------------------------------------------------------ 1079 \x{a0}\x20! 1080 0: \x{a0} ! 1081 1082/\x{a0}+\s!/B,utf,tables=2 1083------------------------------------------------------------------ 1084 Bra 1085 \x{a0}+ 1086 \s 1087 ! 1088 Ket 1089 End 1090------------------------------------------------------------------ 1091 \x{a0}\x20! 1092 0: \x{a0} ! 1093 1094/A/utf 1095 \x{ff000041} 1096** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8 1097 \x{7f000041} 1098Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 1099 1100/(*UTF8)abc/never_utf 1101Failed: error 174 at offset 7: using UTF is disabled by the application 1102 1103/abc/utf,never_utf 1104Failed: error 174 at offset 0: using UTF is disabled by the application 1105 1106/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 1107------------------------------------------------------------------ 1108 Bra 1109 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} 1110 Ket 1111 End 1112------------------------------------------------------------------ 1113Capture group count = 0 1114Options: caseless utf 1115First code unit = 'A' (caseless) 1116Subject length lower bound = 5 1117 1118/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 1119------------------------------------------------------------------ 1120 Bra 1121 A\x{391}\x{10427}\x{ff3a}\x{1fb0} 1122 Ket 1123 End 1124------------------------------------------------------------------ 1125Capture group count = 0 1126Options: utf 1127First code unit = 'A' 1128Last code unit = \xb0 1129Subject length lower bound = 5 1130 1131/AB\x{1fb0}/IB,utf 1132------------------------------------------------------------------ 1133 Bra 1134 AB\x{1fb0} 1135 Ket 1136 End 1137------------------------------------------------------------------ 1138Capture group count = 0 1139Options: utf 1140First code unit = 'A' 1141Last code unit = \xb0 1142Subject length lower bound = 3 1143 1144/AB\x{1fb0}/IBi,utf 1145------------------------------------------------------------------ 1146 Bra 1147 /i AB\x{1fb0} 1148 Ket 1149 End 1150------------------------------------------------------------------ 1151Capture group count = 0 1152Options: caseless utf 1153First code unit = 'A' (caseless) 1154Last code unit = 'B' (caseless) 1155Subject length lower bound = 3 1156 1157/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 1158Capture group count = 0 1159Options: caseless utf 1160Starting code units: \xd0 \xd1 1161Subject length lower bound = 17 1162 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 1163 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 1164 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 1165 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 1166 1167/[ⱥ]/Bi,utf 1168------------------------------------------------------------------ 1169 Bra 1170 /i \x{2c65} 1171 Ket 1172 End 1173------------------------------------------------------------------ 1174 1175/[^ⱥ]/Bi,utf 1176------------------------------------------------------------------ 1177 Bra 1178 /i [^\x{2c65}] 1179 Ket 1180 End 1181------------------------------------------------------------------ 1182 1183/\h/I 1184Capture group count = 0 1185Starting code units: \x09 \x20 \xa0 1186Subject length lower bound = 1 1187 1188/\v/I 1189Capture group count = 0 1190Starting code units: \x0a \x0b \x0c \x0d \x85 1191Subject length lower bound = 1 1192 1193/\R/I 1194Capture group count = 0 1195Starting code units: \x0a \x0b \x0c \x0d \x85 1196Subject length lower bound = 1 1197 1198/[[:blank:]]/B,ucp 1199------------------------------------------------------------------ 1200 Bra 1201 [\x09 \xa0] 1202 Ket 1203 End 1204------------------------------------------------------------------ 1205 1206/\x{212a}+/Ii,utf 1207Capture group count = 0 1208Options: caseless utf 1209Starting code units: K k \xe2 1210Subject length lower bound = 1 1211 KKkk\x{212a} 1212 0: KKkk\x{212a} 1213 1214/s+/Ii,utf 1215Capture group count = 0 1216Options: caseless utf 1217Starting code units: S s \xc5 1218Subject length lower bound = 1 1219 SSss\x{17f} 1220 0: SSss\x{17f} 1221 1222/\x{100}*A/IB,utf 1223------------------------------------------------------------------ 1224 Bra 1225 \x{100}*+ 1226 A 1227 Ket 1228 End 1229------------------------------------------------------------------ 1230Capture group count = 0 1231Options: utf 1232Starting code units: A \xc4 1233Last code unit = 'A' 1234Subject length lower bound = 1 1235 A 1236 0: A 1237 1238/\x{100}*\d(?R)/IB,utf 1239------------------------------------------------------------------ 1240 Bra 1241 \x{100}*+ 1242 \d 1243 Recurse 1244 Ket 1245 End 1246------------------------------------------------------------------ 1247Capture group count = 0 1248Options: utf 1249Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 1250Subject length lower bound = 1 1251 1252/[Z\x{100}]/IB,utf 1253------------------------------------------------------------------ 1254 Bra 1255 [Z\x{100}] 1256 Ket 1257 End 1258------------------------------------------------------------------ 1259Capture group count = 0 1260Options: utf 1261Starting code units: Z \xc4 1262Subject length lower bound = 1 1263 Z\x{100} 1264 0: Z 1265 \x{100} 1266 0: \x{100} 1267 \x{100}Z 1268 0: \x{100} 1269 1270/[z-\x{100}]/IB,utf 1271------------------------------------------------------------------ 1272 Bra 1273 [z-\xff\x{100}] 1274 Ket 1275 End 1276------------------------------------------------------------------ 1277Capture group count = 0 1278Options: utf 1279Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 1280Subject length lower bound = 1 1281 1282/[z\Qa-d]Ā\E]/IB,utf 1283------------------------------------------------------------------ 1284 Bra 1285 [\-\]adz\x{100}] 1286 Ket 1287 End 1288------------------------------------------------------------------ 1289Capture group count = 0 1290Options: utf 1291Starting code units: - ] a d z \xc4 1292Subject length lower bound = 1 1293 \x{100} 1294 0: \x{100} 1295 Ā 1296 0: \x{100} 1297 1298/[ab\x{100}]abc(xyz(?1))/IB,utf 1299------------------------------------------------------------------ 1300 Bra 1301 [ab\x{100}] 1302 abc 1303 CBra 1 1304 xyz 1305 Recurse 1306 Ket 1307 Ket 1308 End 1309------------------------------------------------------------------ 1310Capture group count = 1 1311Options: utf 1312Starting code units: a b \xc4 1313Last code unit = 'z' 1314Subject length lower bound = 7 1315 1316/\x{100}*\s/IB,utf 1317------------------------------------------------------------------ 1318 Bra 1319 \x{100}*+ 1320 \s 1321 Ket 1322 End 1323------------------------------------------------------------------ 1324Capture group count = 0 1325Options: utf 1326Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4 1327Subject length lower bound = 1 1328 1329/\x{100}*\d/IB,utf 1330------------------------------------------------------------------ 1331 Bra 1332 \x{100}*+ 1333 \d 1334 Ket 1335 End 1336------------------------------------------------------------------ 1337Capture group count = 0 1338Options: utf 1339Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 1340Subject length lower bound = 1 1341 1342/\x{100}*\w/IB,utf 1343------------------------------------------------------------------ 1344 Bra 1345 \x{100}*+ 1346 \w 1347 Ket 1348 End 1349------------------------------------------------------------------ 1350Capture group count = 0 1351Options: utf 1352Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 1353 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 1354 \xc4 1355Subject length lower bound = 1 1356 1357/\x{100}*\D/IB,utf 1358------------------------------------------------------------------ 1359 Bra 1360 \x{100}* 1361 \D 1362 Ket 1363 End 1364------------------------------------------------------------------ 1365Capture group count = 0 1366Options: utf 1367Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1368 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1369 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1370 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 1371 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 1372 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1373 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1374 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1375 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1376 \xff 1377Subject length lower bound = 1 1378 1379/\x{100}*\S/IB,utf 1380------------------------------------------------------------------ 1381 Bra 1382 \x{100}* 1383 \S 1384 Ket 1385 End 1386------------------------------------------------------------------ 1387Capture group count = 0 1388Options: utf 1389Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 1390 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 1391 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 1392 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 1393 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 1394 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 1395 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 1396 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 1397 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1398Subject length lower bound = 1 1399 1400/\x{100}*\W/IB,utf 1401------------------------------------------------------------------ 1402 Bra 1403 \x{100}* 1404 \W 1405 Ket 1406 End 1407------------------------------------------------------------------ 1408Capture group count = 0 1409Options: utf 1410Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1411 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1412 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1413 ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 1414 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 1415 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 1416 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 1417 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1418Subject length lower bound = 1 1419 1420/[\x{105}-\x{109}]/IBi,utf 1421------------------------------------------------------------------ 1422 Bra 1423 [\x{104}-\x{109}] 1424 Ket 1425 End 1426------------------------------------------------------------------ 1427Capture group count = 0 1428Options: caseless utf 1429Starting code units: \xc4 1430Subject length lower bound = 1 1431 \x{104} 1432 0: \x{104} 1433 \x{105} 1434 0: \x{105} 1435 \x{109} 1436 0: \x{109} 1437\= Expect no match 1438 \x{100} 1439No match 1440 \x{10a} 1441No match 1442 1443/[z-\x{100}]/IBi,utf 1444------------------------------------------------------------------ 1445 Bra 1446 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1447 Ket 1448 End 1449------------------------------------------------------------------ 1450Capture group count = 0 1451Options: caseless utf 1452Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 1453Subject length lower bound = 1 1454 Z 1455 0: Z 1456 z 1457 0: z 1458 \x{39c} 1459 0: \x{39c} 1460 \x{178} 1461 0: \x{178} 1462 | 1463 0: | 1464 \x{80} 1465 0: \x{80} 1466 \x{ff} 1467 0: \x{ff} 1468 \x{100} 1469 0: \x{100} 1470 \x{101} 1471 0: \x{101} 1472\= Expect no match 1473 \x{102} 1474No match 1475 Y 1476No match 1477 y 1478No match 1479 1480/[z-\x{100}]/IBi,utf 1481------------------------------------------------------------------ 1482 Bra 1483 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1484 Ket 1485 End 1486------------------------------------------------------------------ 1487Capture group count = 0 1488Options: caseless utf 1489Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 1490Subject length lower bound = 1 1491 1492/\x{3a3}B/IBi,utf 1493------------------------------------------------------------------ 1494 Bra 1495 clist 03a3 03c2 03c3 1496 /i B 1497 Ket 1498 End 1499------------------------------------------------------------------ 1500Capture group count = 0 1501Options: caseless utf 1502Starting code units: \xce \xcf 1503Last code unit = 'B' (caseless) 1504Subject length lower bound = 2 1505 1506/abc/utf,replace=� 1507 abc 1508Failed: error -3: UTF-8 error: 1 byte missing at end 1509 1510/(?<=(a)(?-1))x/I,utf 1511Capture group count = 1 1512Max lookbehind = 2 1513Options: utf 1514First code unit = 'x' 1515Subject length lower bound = 1 1516 a\x80zx\=offset=3 1517Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 1518 1519/[\W\p{Any}]/B 1520------------------------------------------------------------------ 1521 Bra 1522 [\x00-/:-@[-^`{-\xff\p{Any}] 1523 Ket 1524 End 1525------------------------------------------------------------------ 1526 abc 1527 0: a 1528 123 1529 0: 1 1530 1531/[\W\pL]/B 1532------------------------------------------------------------------ 1533 Bra 1534 [\x00-/:-@[-^`{-\xff\p{L}] 1535 Ket 1536 End 1537------------------------------------------------------------------ 1538 abc 1539 0: a 1540\= Expect no match 1541 123 1542No match 1543 1544/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf 1545Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) 1546 1547/[\s[:^ascii:]]/B,ucp 1548------------------------------------------------------------------ 1549 Bra 1550 [\x80-\xff\p{Xsp}] 1551 Ket 1552 End 1553------------------------------------------------------------------ 1554 1555# A special extra option allows excaped surrogate code points in 8-bit mode, 1556# but subjects containing them must not be UTF-checked. 1557 1558/\x{d800}/I,utf,allow_surrogate_escapes 1559Capture group count = 0 1560Options: utf 1561Extra options: allow_surrogate_escapes 1562First code unit = \xed 1563Last code unit = \x80 1564Subject length lower bound = 1 1565 \x{d800}\=no_utf_check 1566 0: \x{d800} 1567 1568/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes 1569 \x{dfff}\x{df01}\=no_utf_check 1570 0: \x{dfff}\x{df01} 1571 1572# This has different starting code units in 8-bit mode. 1573 1574/^[^ab]/IB,utf 1575------------------------------------------------------------------ 1576 Bra 1577 ^ 1578 [\x00-`c-\xff] (neg) 1579 Ket 1580 End 1581------------------------------------------------------------------ 1582Capture group count = 0 1583Compile options: utf 1584Overall options: anchored utf 1585Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1586 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1587 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 1588 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 1589 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 1590 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 1591 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 1592 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 1593 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 1594 \xfe \xff 1595Subject length lower bound = 1 1596 c 1597 0: c 1598 \x{ff} 1599 0: \x{ff} 1600 \x{100} 1601 0: \x{100} 1602\= Expect no match 1603 aaa 1604No match 1605 1606# Offsets are different in 8-bit mode. 1607 1608/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout 1609 123abcáyzabcdef789abcሴqr 1610 1(2) Old 6 6 "" New 6 8 "<>" 1611 2(2) Old 13 13 "" New 15 17 "<>" 1612 3(2) Old 13 16 "def" New 17 22 "<def>" 1613 4(2) Old 22 22 "" New 28 30 "<>" 1614 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr 1615 1616# Check name length with non-ASCII characters 1617 1618/(?'ABáC678901234567890123456789012012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf 1619 1620/(?'ABáC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf 1621Failed: error 148 at offset 132: subpattern name is too long (maximum 128 code units) 1622 1623/(?'ABZC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf 1624 1625/(?(n/utf 1626Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) 1627 1628/(?(á/utf 1629Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) 1630 1631# Invalid UTF-8 tests 1632 1633/.../g,match_invalid_utf 1634 abcd\x80wxzy\x80pqrs 1635 0: abc 1636 0: wxz 1637 0: pqr 1638 abcd\x{80}wxzy\x80pqrs 1639 0: abc 1640 0: d\x{80}w 1641 0: xzy 1642 0: pqr 1643 1644/abc/match_invalid_utf 1645 ab\x80ab\=ph 1646Partial match: ab 1647\= Expect no match 1648 ab\x80cdef\=ph 1649No match 1650 1651/.a/match_invalid_utf 1652 ab\=ph 1653Partial match: b 1654 ab\=ps 1655Partial match: b 1656 b\xf0\x91\x88b\=ph 1657Partial match: b 1658 b\xf0\x91\x88b\=ps 1659Partial match: b 1660 b\xf0\x91\x88\xb4a 1661 0: \x{11234}a 1662\= Expect no match 1663 b\x80\=ph 1664No match 1665 b\x80\=ps 1666No match 1667 b\xf0\x91\x88\=ph 1668No match 1669 b\xf0\x91\x88\=ps 1670No match 1671 1672/.a$/match_invalid_utf 1673 ab\=ph 1674Partial match: b 1675 ab\=ps 1676Partial match: b 1677\= Expect no match 1678 b\xf0\x91\x98\=ph 1679No match 1680 b\xf0\x91\x98\=ps 1681No match 1682 1683/ab$/match_invalid_utf 1684 ab\x80cdeab 1685 0: ab 1686\= Expect no match 1687 ab\x80cde 1688No match 1689 1690/.../g,match_invalid_utf 1691 abcd\x{80}wxzy\x80pqrs 1692 0: abc 1693 0: d\x{80}w 1694 0: xzy 1695 0: pqr 1696 1697/(?<=x)../g,match_invalid_utf 1698 abcd\x{80}wxzy\x80pqrs 1699 0: zy 1700 abcd\x{80}wxzy\x80xpqrs 1701 0: zy 1702 0: pq 1703 1704/X$/match_invalid_utf 1705\= Expect no match 1706 X\xc4 1707No match 1708 1709/(?<=..)X/match_invalid_utf,aftertext 1710 AB\x80AQXYZ 1711 0: X 1712 0+ YZ 1713 AB\x80AQXYZ\=offset=5 1714 0: X 1715 0+ YZ 1716 AB\x80\x80AXYZXC\=offset=5 1717 0: X 1718 0+ C 1719\= Expect no match 1720 AB\x80XYZ 1721No match 1722 AB\x80XYZ\=offset=3 1723No match 1724 AB\xfeXYZ 1725No match 1726 AB\xffXYZ\=offset=3 1727No match 1728 AB\x80AXYZ 1729No match 1730 AB\x80AXYZ\=offset=4 1731No match 1732 AB\x80\x80AXYZ\=offset=5 1733No match 1734 1735/.../match_invalid_utf 1736 AB\xc4CCC 1737 0: CCC 1738\= Expect no match 1739 A\x{d800}B 1740No match 1741 A\x{110000}B 1742No match 1743 A\xc4B 1744No match 1745 1746/\bX/match_invalid_utf 1747 A\x80X 1748 0: X 1749 1750/\BX/match_invalid_utf 1751\= Expect no match 1752 A\x80X 1753No match 1754 1755/(?<=...)X/match_invalid_utf 1756 AAA\x80BBBXYZ 1757 0: X 1758\= Expect no match 1759 AAA\x80BXYZ 1760No match 1761 AAA\x80BBXYZ 1762No match 1763 1764# ------------------------------------- 1765 1766/(*UTF)(?=\x{123})/I 1767Capture group count = 0 1768May match empty string 1769Compile options: <none> 1770Overall options: utf 1771First code unit = \xc4 1772Last code unit = \xa3 1773Subject length lower bound = 1 1774 1775/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf 1776Capture group count = 0 1777Options: utf 1778Starting code units: \xc3 1779Last code unit = 'X' 1780Subject length lower bound = 3 1781 1782/[,]/BI,utf 1783------------------------------------------------------------------ 1784 Bra 1785 [,\x{fff9f}] 1786 Ket 1787 End 1788------------------------------------------------------------------ 1789Capture group count = 0 1790Options: utf 1791Starting code units: , \xf3 1792Subject length lower bound = 1 1793 1794/[\x{fff4}-\x{ffff8}]/I,utf 1795Capture group count = 0 1796Options: utf 1797Starting code units: \xef \xf0 \xf1 \xf2 \xf3 1798Subject length lower bound = 1 1799 1800/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf 1801Capture group count = 0 1802Options: utf 1803Starting code units: \xef \xf0 \xf1 \xf2 \xf4 1804Subject length lower bound = 1 1805 1806/[\xff\x{ffff}]/I,utf 1807Capture group count = 0 1808Options: utf 1809Starting code units: \xc3 \xef 1810Subject length lower bound = 1 1811 1812/[\xff\x{ff}]/I,utf 1813Capture group count = 0 1814Options: utf 1815Starting code units: \xc3 1816Subject length lower bound = 1 1817 abc\x{ff}def 1818 0: \x{ff} 1819 1820/[\xff\x{ff}]/I 1821Capture group count = 0 1822First code unit = \xff 1823Subject length lower bound = 1 1824 abc\x{ff}def 1825 0: \xff 1826 1827/[Ss]/I 1828Capture group count = 0 1829First code unit = 'S' (caseless) 1830Subject length lower bound = 1 1831 1832/[Ss]/I,utf 1833Capture group count = 0 1834Options: utf 1835Starting code units: S s 1836Subject length lower bound = 1 1837 1838/(?:\x{ff}|\x{3000})/I,utf 1839Capture group count = 0 1840Options: utf 1841Starting code units: \xc3 \xe3 1842Subject length lower bound = 1 1843 1844/x/utf 1845 abxyz 1846 0: x 1847 \x80\=startchar 1848Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 1849 abc\x80\=startchar 1850Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3 1851 abc\x80\=startchar,offset=3 1852Error -36 (bad UTF-8 offset) 1853 1854/\x{c1}+\x{e1}/iIB,ucp 1855------------------------------------------------------------------ 1856 Bra 1857 /i \x{c1}+ 1858 /i \x{e1} 1859 Ket 1860 End 1861------------------------------------------------------------------ 1862Capture group count = 0 1863Options: caseless ucp 1864First code unit = \xc1 (caseless) 1865Last code unit = \xe1 (caseless) 1866Subject length lower bound = 2 1867 \x{c1}\x{c1}\x{c1} 1868 0: \xc1\xc1\xc1 1869 \x{e1}\x{e1}\x{e1} 1870 0: \xe1\xe1\xe1 1871 1872/a|\x{c1}/iI,ucp 1873Capture group count = 0 1874Options: caseless ucp 1875Starting code units: A a \xc1 \xe1 1876Subject length lower bound = 1 1877 \x{e1}xxx 1878 0: \xe1 1879 1880/a|\x{c1}/iI,utf 1881Capture group count = 0 1882Options: caseless utf 1883Starting code units: A a \xc3 1884Subject length lower bound = 1 1885 \x{e1}xxx 1886 0: \x{e1} 1887 1888/\x{c1}|\x{e1}/iI,ucp 1889Capture group count = 0 1890Options: caseless ucp 1891First code unit = \xc1 (caseless) 1892Subject length lower bound = 1 1893 1894/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended 1895 X\x{e1}Y 1896 1: >\xc1< 1897 1898/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended 1899 X\x{c1}Y 1900 1: >\xe1< 1901 1902# Without UTF or UCP characters > 127 have only one case in the default locale. 1903 1904/X(\x{e1})Y/replace=>\U$1<,substitute_extended 1905 X\x{e1}Y 1906 1: >\xe1< 1907 1908/A/utf,match_invalid_utf,caseless 1909 \xe5A 1910 0: A 1911 1912/\bch\b/utf,match_invalid_utf 1913 qchq\=ph 1914Partial match: 1915 qchq\=ps 1916Partial match: 1917 1918/line1\nbreak/firstline,utf,match_invalid_utf 1919 line1\nbreak 1920 0: line1\x{0a}break 1921 line0\nline1\nbreak 1922No match 1923 1924/A\z/utf,match_invalid_utf 1925 A\x80\x42\n 1926No match 1927 1928# End of testinput10 1929