xref: /aosp_15_r20/external/pcre/testdata/testoutput12-32 (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1# This set of tests is for UTF-16 and UTF-32 support, including Unicode
2# properties. It is relevant only to the 16-bit and 32-bit libraries. The
3# output is different for each library, so there are separate output files.
4
5/���xxx/IB,utf,no_utf_check
6** Failed: invalid UTF-8 string cannot be converted to 32-bit string
7
8/abc/utf
9    �]
10** Failed: invalid UTF-8 string cannot be used as input in UTF mode
11
12# Check maximum character size
13
14/\x{ffff}/IB,utf
15------------------------------------------------------------------
16        Bra
17        \x{ffff}
18        Ket
19        End
20------------------------------------------------------------------
21Capture group count = 0
22Options: utf
23First code unit = \x{ffff}
24Subject length lower bound = 1
25
26/\x{10000}/IB,utf
27------------------------------------------------------------------
28        Bra
29        \x{10000}
30        Ket
31        End
32------------------------------------------------------------------
33Capture group count = 0
34Options: utf
35First code unit = \x{10000}
36Subject length lower bound = 1
37
38/\x{100}/IB,utf
39------------------------------------------------------------------
40        Bra
41        \x{100}
42        Ket
43        End
44------------------------------------------------------------------
45Capture group count = 0
46Options: utf
47First code unit = \x{100}
48Subject length lower bound = 1
49
50/\x{1000}/IB,utf
51------------------------------------------------------------------
52        Bra
53        \x{1000}
54        Ket
55        End
56------------------------------------------------------------------
57Capture group count = 0
58Options: utf
59First code unit = \x{1000}
60Subject length lower bound = 1
61
62/\x{10000}/IB,utf
63------------------------------------------------------------------
64        Bra
65        \x{10000}
66        Ket
67        End
68------------------------------------------------------------------
69Capture group count = 0
70Options: utf
71First code unit = \x{10000}
72Subject length lower bound = 1
73
74/\x{100000}/IB,utf
75------------------------------------------------------------------
76        Bra
77        \x{100000}
78        Ket
79        End
80------------------------------------------------------------------
81Capture group count = 0
82Options: utf
83First code unit = \x{100000}
84Subject length lower bound = 1
85
86/\x{10ffff}/IB,utf
87------------------------------------------------------------------
88        Bra
89        \x{10ffff}
90        Ket
91        End
92------------------------------------------------------------------
93Capture group count = 0
94Options: utf
95First code unit = \x{10ffff}
96Subject length lower bound = 1
97
98/[\x{ff}]/IB,utf
99------------------------------------------------------------------
100        Bra
101        \x{ff}
102        Ket
103        End
104------------------------------------------------------------------
105Capture group count = 0
106Options: utf
107First code unit = \xff
108Subject length lower bound = 1
109
110/[\x{100}]/IB,utf
111------------------------------------------------------------------
112        Bra
113        \x{100}
114        Ket
115        End
116------------------------------------------------------------------
117Capture group count = 0
118Options: utf
119First code unit = \x{100}
120Subject length lower bound = 1
121
122/\x80/IB,utf
123------------------------------------------------------------------
124        Bra
125        \x{80}
126        Ket
127        End
128------------------------------------------------------------------
129Capture group count = 0
130Options: utf
131First code unit = \x80
132Subject length lower bound = 1
133
134/\xff/IB,utf
135------------------------------------------------------------------
136        Bra
137        \x{ff}
138        Ket
139        End
140------------------------------------------------------------------
141Capture group count = 0
142Options: utf
143First code unit = \xff
144Subject length lower bound = 1
145
146/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
147------------------------------------------------------------------
148        Bra
149        \x{d55c}\x{ad6d}\x{c5b4}
150        Ket
151        End
152------------------------------------------------------------------
153Capture group count = 0
154Options: utf
155First code unit = \x{d55c}
156Last code unit = \x{c5b4}
157Subject length lower bound = 3
158    \x{D55c}\x{ad6d}\x{C5B4}
159 0: \x{d55c}\x{ad6d}\x{c5b4}
160
161/\x{65e5}\x{672c}\x{8a9e}/IB,utf
162------------------------------------------------------------------
163        Bra
164        \x{65e5}\x{672c}\x{8a9e}
165        Ket
166        End
167------------------------------------------------------------------
168Capture group count = 0
169Options: utf
170First code unit = \x{65e5}
171Last code unit = \x{8a9e}
172Subject length lower bound = 3
173    \x{65e5}\x{672c}\x{8a9e}
174 0: \x{65e5}\x{672c}\x{8a9e}
175
176/\x{80}/IB,utf
177------------------------------------------------------------------
178        Bra
179        \x{80}
180        Ket
181        End
182------------------------------------------------------------------
183Capture group count = 0
184Options: utf
185First code unit = \x80
186Subject length lower bound = 1
187
188/\x{084}/IB,utf
189------------------------------------------------------------------
190        Bra
191        \x{84}
192        Ket
193        End
194------------------------------------------------------------------
195Capture group count = 0
196Options: utf
197First code unit = \x84
198Subject length lower bound = 1
199
200/\x{104}/IB,utf
201------------------------------------------------------------------
202        Bra
203        \x{104}
204        Ket
205        End
206------------------------------------------------------------------
207Capture group count = 0
208Options: utf
209First code unit = \x{104}
210Subject length lower bound = 1
211
212/\x{861}/IB,utf
213------------------------------------------------------------------
214        Bra
215        \x{861}
216        Ket
217        End
218------------------------------------------------------------------
219Capture group count = 0
220Options: utf
221First code unit = \x{861}
222Subject length lower bound = 1
223
224/\x{212ab}/IB,utf
225------------------------------------------------------------------
226        Bra
227        \x{212ab}
228        Ket
229        End
230------------------------------------------------------------------
231Capture group count = 0
232Options: utf
233First code unit = \x{212ab}
234Subject length lower bound = 1
235
236/[^ab\xC0-\xF0]/IB,utf
237------------------------------------------------------------------
238        Bra
239        [\x00-`c-\xbf\xf1-\xff] (neg)
240        Ket
241        End
242------------------------------------------------------------------
243Capture group count = 0
244Options: utf
245Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
246  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
247  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
248  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
249  Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
250  \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
251  \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
252  \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
253  \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
254  \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
255  \xfc \xfd \xfe \xff
256Subject length lower bound = 1
257    \x{f1}
258 0: \x{f1}
259    \x{bf}
260 0: \x{bf}
261    \x{100}
262 0: \x{100}
263    \x{1000}
264 0: \x{1000}
265\= Expect no match
266    \x{c0}
267No match
268    \x{f0}
269No match
270
271/Ā{3,4}/IB,utf
272------------------------------------------------------------------
273        Bra
274        \x{100}{3}
275        \x{100}?+
276        Ket
277        End
278------------------------------------------------------------------
279Capture group count = 0
280Options: utf
281First code unit = \x{100}
282Last code unit = \x{100}
283Subject length lower bound = 3
284  \x{100}\x{100}\x{100}\x{100\x{100}
285 0: \x{100}\x{100}\x{100}
286
287/(\x{100}+|x)/IB,utf
288------------------------------------------------------------------
289        Bra
290        CBra 1
291        \x{100}++
292        Alt
293        x
294        Ket
295        Ket
296        End
297------------------------------------------------------------------
298Capture group count = 1
299Options: utf
300Starting code units: x \xff
301Subject length lower bound = 1
302
303/(\x{100}*a|x)/IB,utf
304------------------------------------------------------------------
305        Bra
306        CBra 1
307        \x{100}*+
308        a
309        Alt
310        x
311        Ket
312        Ket
313        End
314------------------------------------------------------------------
315Capture group count = 1
316Options: utf
317Starting code units: a x \xff
318Subject length lower bound = 1
319
320/(\x{100}{0,2}a|x)/IB,utf
321------------------------------------------------------------------
322        Bra
323        CBra 1
324        \x{100}{0,2}+
325        a
326        Alt
327        x
328        Ket
329        Ket
330        End
331------------------------------------------------------------------
332Capture group count = 1
333Options: utf
334Starting code units: a x \xff
335Subject length lower bound = 1
336
337/(\x{100}{1,2}a|x)/IB,utf
338------------------------------------------------------------------
339        Bra
340        CBra 1
341        \x{100}
342        \x{100}{0,1}+
343        a
344        Alt
345        x
346        Ket
347        Ket
348        End
349------------------------------------------------------------------
350Capture group count = 1
351Options: utf
352Starting code units: x \xff
353Subject length lower bound = 1
354
355/\x{100}/IB,utf
356------------------------------------------------------------------
357        Bra
358        \x{100}
359        Ket
360        End
361------------------------------------------------------------------
362Capture group count = 0
363Options: utf
364First code unit = \x{100}
365Subject length lower bound = 1
366
367/a\x{100}\x{101}*/IB,utf
368------------------------------------------------------------------
369        Bra
370        a\x{100}
371        \x{101}*+
372        Ket
373        End
374------------------------------------------------------------------
375Capture group count = 0
376Options: utf
377First code unit = 'a'
378Last code unit = \x{100}
379Subject length lower bound = 2
380
381/a\x{100}\x{101}+/IB,utf
382------------------------------------------------------------------
383        Bra
384        a\x{100}
385        \x{101}++
386        Ket
387        End
388------------------------------------------------------------------
389Capture group count = 0
390Options: utf
391First code unit = 'a'
392Last code unit = \x{101}
393Subject length lower bound = 3
394
395/[^\x{c4}]/IB
396------------------------------------------------------------------
397        Bra
398        [^\x{c4}]
399        Ket
400        End
401------------------------------------------------------------------
402Capture group count = 0
403Subject length lower bound = 1
404
405/[\x{100}]/IB,utf
406------------------------------------------------------------------
407        Bra
408        \x{100}
409        Ket
410        End
411------------------------------------------------------------------
412Capture group count = 0
413Options: utf
414First code unit = \x{100}
415Subject length lower bound = 1
416    \x{100}
417 0: \x{100}
418    Z\x{100}
419 0: \x{100}
420    \x{100}Z
421 0: \x{100}
422
423/[\xff]/IB,utf
424------------------------------------------------------------------
425        Bra
426        \x{ff}
427        Ket
428        End
429------------------------------------------------------------------
430Capture group count = 0
431Options: utf
432First code unit = \xff
433Subject length lower bound = 1
434    >\x{ff}<
435 0: \x{ff}
436
437/[^\xff]/IB,utf
438------------------------------------------------------------------
439        Bra
440        [^\x{ff}]
441        Ket
442        End
443------------------------------------------------------------------
444Capture group count = 0
445Options: utf
446Subject length lower bound = 1
447
448/\x{100}abc(xyz(?1))/IB,utf
449------------------------------------------------------------------
450        Bra
451        \x{100}abc
452        CBra 1
453        xyz
454        Recurse
455        Ket
456        Ket
457        End
458------------------------------------------------------------------
459Capture group count = 1
460Options: utf
461First code unit = \x{100}
462Last code unit = 'z'
463Subject length lower bound = 7
464
465/\777/I,utf
466Capture group count = 0
467Options: utf
468First code unit = \x{1ff}
469Subject length lower bound = 1
470  \x{1ff}
471 0: \x{1ff}
472  \777
473 0: \x{1ff}
474
475/\x{100}+\x{200}/IB,utf
476------------------------------------------------------------------
477        Bra
478        \x{100}++
479        \x{200}
480        Ket
481        End
482------------------------------------------------------------------
483Capture group count = 0
484Options: utf
485First code unit = \x{100}
486Last code unit = \x{200}
487Subject length lower bound = 2
488
489/\x{100}+X/IB,utf
490------------------------------------------------------------------
491        Bra
492        \x{100}++
493        X
494        Ket
495        End
496------------------------------------------------------------------
497Capture group count = 0
498Options: utf
499First code unit = \x{100}
500Last code unit = 'X'
501Subject length lower bound = 2
502
503/^[\QĀ\E-\QŐ\E/B,utf
504Failed: error 106 at offset 13: missing terminating ] for character class
505
506/X/utf
507    XX\x{d800}\=no_utf_check
508 0: X
509    XX\x{da00}\=no_utf_check
510 0: X
511    XX\x{dc00}\=no_utf_check
512 0: X
513    XX\x{de00}\=no_utf_check
514 0: X
515    XX\x{dfff}\=no_utf_check
516 0: X
517\= Expect UTF error
518    XX\x{d800}
519Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
520    XX\x{da00}
521Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
522    XX\x{dc00}
523Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
524    XX\x{de00}
525Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
526    XX\x{dfff}
527Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
528    XX\x{110000}
529Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
530    XX\x{d800}\x{1234}
531Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
532\= Expect no match
533    XX\x{d800}\=offset=3
534No match
535
536/(?<=.)X/utf
537    XX\x{d800}\=offset=3
538Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
539
540/(*UTF16)\x{11234}/
541Failed: error 160 at offset 7: (*VERB) not recognized or malformed
542  abcd\x{11234}pqr
543
544/(*UTF)\x{11234}/I
545Capture group count = 0
546Compile options: <none>
547Overall options: utf
548First code unit = \x{11234}
549Subject length lower bound = 1
550  abcd\x{11234}pqr
551 0: \x{11234}
552
553/(*UTF-32)\x{11234}/
554Failed: error 160 at offset 5: (*VERB) not recognized or malformed
555  abcd\x{11234}pqr
556
557/(*UTF-32)\x{112}/
558Failed: error 160 at offset 5: (*VERB) not recognized or malformed
559  abcd\x{11234}pqr
560
561/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
562Failed: error 160 at offset 14: (*VERB) not recognized or malformed
563
564/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
565Capture group count = 0
566Compile options: <none>
567Overall options: utf
568\R matches any Unicode newline
569Forced newline is CRLF
570First code unit = 'a'
571Last code unit = 'b'
572Subject length lower bound = 3
573
574/\h/I,utf
575Capture group count = 0
576Options: utf
577Starting code units: \x09 \x20 \xa0 \xff
578Subject length lower bound = 1
579    ABC\x{09}
580 0: \x{09}
581    ABC\x{20}
582 0:
583    ABC\x{a0}
584 0: \x{a0}
585    ABC\x{1680}
586 0: \x{1680}
587    ABC\x{180e}
588 0: \x{180e}
589    ABC\x{2000}
590 0: \x{2000}
591    ABC\x{202f}
592 0: \x{202f}
593    ABC\x{205f}
594 0: \x{205f}
595    ABC\x{3000}
596 0: \x{3000}
597
598/\v/I,utf
599Capture group count = 0
600Options: utf
601Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
602Subject length lower bound = 1
603    ABC\x{0a}
604 0: \x{0a}
605    ABC\x{0b}
606 0: \x{0b}
607    ABC\x{0c}
608 0: \x{0c}
609    ABC\x{0d}
610 0: \x{0d}
611    ABC\x{85}
612 0: \x{85}
613    ABC\x{2028}
614 0: \x{2028}
615
616/\h*A/I,utf
617Capture group count = 0
618Options: utf
619Starting code units: \x09 \x20 A \xa0 \xff
620Last code unit = 'A'
621Subject length lower bound = 1
622    CDBABC
623 0: A
624    \x{2000}ABC
625 0: \x{2000}A
626
627/\R*A/I,bsr=unicode,utf
628Capture group count = 0
629Options: utf
630\R matches any Unicode newline
631Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
632Last code unit = 'A'
633Subject length lower bound = 1
634    CDBABC
635 0: A
636    \x{2028}A
637 0: \x{2028}A
638
639/\v+A/I,utf
640Capture group count = 0
641Options: utf
642Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
643Last code unit = 'A'
644Subject length lower bound = 2
645
646/\s?xxx\s/I,utf
647Capture group count = 0
648Options: utf
649Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
650Last code unit = 'x'
651Subject length lower bound = 4
652
653/\sxxx\s/I,utf,tables=2
654Capture group count = 0
655Options: utf
656Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
657Last code unit = 'x'
658Subject length lower bound = 5
659    AB\x{85}xxx\x{a0}XYZ
660 0: \x{85}xxx\x{a0}
661    AB\x{a0}xxx\x{85}XYZ
662 0: \x{a0}xxx\x{85}
663
664/\S \S/I,utf,tables=2
665Capture group count = 0
666Options: utf
667Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
668  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
669  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
670  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
671  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
672  \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
673  \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
674  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
675  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
676  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
677  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
678  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
679  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
680  \xff
681Last code unit = ' '
682Subject length lower bound = 3
683    \x{a2} \x{84}
684 0: \x{a2} \x{84}
685    A Z
686 0: A Z
687
688/a+/utf
689    a\x{123}aa\=offset=1
690 0: aa
691    a\x{123}aa\=offset=2
692 0: aa
693    a\x{123}aa\=offset=3
694 0: a
695\= Expect no match
696    a\x{123}aa\=offset=4
697No match
698\= Expect bad offset error
699    a\x{123}aa\=offset=5
700Failed: error -33: bad offset value
701    a\x{123}aa\=offset=6
702Failed: error -33: bad offset value
703
704/\x{1234}+/Ii,utf
705Capture group count = 0
706Options: caseless utf
707First code unit = \x{1234}
708Subject length lower bound = 1
709
710/\x{1234}+?/Ii,utf
711Capture group count = 0
712Options: caseless utf
713First code unit = \x{1234}
714Subject length lower bound = 1
715
716/\x{1234}++/Ii,utf
717Capture group count = 0
718Options: caseless utf
719First code unit = \x{1234}
720Subject length lower bound = 1
721
722/\x{1234}{2}/Ii,utf
723Capture group count = 0
724Options: caseless utf
725First code unit = \x{1234}
726Last code unit = \x{1234}
727Subject length lower bound = 2
728
729/[^\x{c4}]/IB,utf
730------------------------------------------------------------------
731        Bra
732        [^\x{c4}]
733        Ket
734        End
735------------------------------------------------------------------
736Capture group count = 0
737Options: utf
738Subject length lower bound = 1
739
740/X+\x{200}/IB,utf
741------------------------------------------------------------------
742        Bra
743        X++
744        \x{200}
745        Ket
746        End
747------------------------------------------------------------------
748Capture group count = 0
749Options: utf
750First code unit = 'X'
751Last code unit = \x{200}
752Subject length lower bound = 2
753
754/\R/I,utf
755Capture group count = 0
756Options: utf
757Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
758Subject length lower bound = 1
759
760# Check bad offset
761
762/a/utf
763\= Expect bad UTF-16 offset, or no match in 32-bit
764    \x{10000}\=offset=1
765No match
766    \x{10000}ab\=offset=1
767 0: a
768\= Expect 16-bit match, 32-bit no match
769    \x{10000}ab\=offset=2
770No match
771\= Expect no match
772    \x{10000}ab\=offset=3
773No match
774\= Expect no match in 16-bit, bad offset in 32-bit
775    \x{10000}ab\=offset=4
776Failed: error -33: bad offset value
777\= Expect bad offset
778    \x{10000}ab\=offset=5
779Failed: error -33: bad offset value
780
781/�/utf
782Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined
783
784/\w+\x{C4}/B,utf
785------------------------------------------------------------------
786        Bra
787        \w++
788        \x{c4}
789        Ket
790        End
791------------------------------------------------------------------
792    a\x{C4}\x{C4}
793 0: a\x{c4}
794
795/\w+\x{C4}/B,utf,tables=2
796------------------------------------------------------------------
797        Bra
798        \w+
799        \x{c4}
800        Ket
801        End
802------------------------------------------------------------------
803    a\x{C4}\x{C4}
804 0: a\x{c4}\x{c4}
805
806/\W+\x{C4}/B,utf
807------------------------------------------------------------------
808        Bra
809        \W+
810        \x{c4}
811        Ket
812        End
813------------------------------------------------------------------
814    !\x{C4}
815 0: !\x{c4}
816
817/\W+\x{C4}/B,utf,tables=2
818------------------------------------------------------------------
819        Bra
820        \W++
821        \x{c4}
822        Ket
823        End
824------------------------------------------------------------------
825    !\x{C4}
826 0: !\x{c4}
827
828/\W+\x{A1}/B,utf
829------------------------------------------------------------------
830        Bra
831        \W+
832        \x{a1}
833        Ket
834        End
835------------------------------------------------------------------
836    !\x{A1}
837 0: !\x{a1}
838
839/\W+\x{A1}/B,utf,tables=2
840------------------------------------------------------------------
841        Bra
842        \W+
843        \x{a1}
844        Ket
845        End
846------------------------------------------------------------------
847    !\x{A1}
848 0: !\x{a1}
849
850/X\s+\x{A0}/B,utf
851------------------------------------------------------------------
852        Bra
853        X
854        \s++
855        \x{a0}
856        Ket
857        End
858------------------------------------------------------------------
859    X\x20\x{A0}\x{A0}
860 0: X \x{a0}
861
862/X\s+\x{A0}/B,utf,tables=2
863------------------------------------------------------------------
864        Bra
865        X
866        \s+
867        \x{a0}
868        Ket
869        End
870------------------------------------------------------------------
871    X\x20\x{A0}\x{A0}
872 0: X \x{a0}\x{a0}
873
874/\S+\x{A0}/B,utf
875------------------------------------------------------------------
876        Bra
877        \S+
878        \x{a0}
879        Ket
880        End
881------------------------------------------------------------------
882    X\x{A0}\x{A0}
883 0: X\x{a0}\x{a0}
884
885/\S+\x{A0}/B,utf,tables=2
886------------------------------------------------------------------
887        Bra
888        \S++
889        \x{a0}
890        Ket
891        End
892------------------------------------------------------------------
893    X\x{A0}\x{A0}
894 0: X\x{a0}
895
896/\x{a0}+\s!/B,utf
897------------------------------------------------------------------
898        Bra
899        \x{a0}++
900        \s
901        !
902        Ket
903        End
904------------------------------------------------------------------
905    \x{a0}\x20!
906 0: \x{a0} !
907
908/\x{a0}+\s!/B,utf,tables=2
909------------------------------------------------------------------
910        Bra
911        \x{a0}+
912        \s
913        !
914        Ket
915        End
916------------------------------------------------------------------
917    \x{a0}\x20!
918 0: \x{a0} !
919
920/(*UTF)abc/never_utf
921Failed: error 174 at offset 6: using UTF is disabled by the application
922
923/abc/utf,never_utf
924Failed: error 174 at offset 0: using UTF is disabled by the application
925
926/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
927------------------------------------------------------------------
928        Bra
929     /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
930        Ket
931        End
932------------------------------------------------------------------
933Capture group count = 0
934Options: caseless utf
935First code unit = 'A' (caseless)
936Last code unit = \x{1fb0} (caseless)
937Subject length lower bound = 5
938
939/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
940------------------------------------------------------------------
941        Bra
942        A\x{391}\x{10427}\x{ff3a}\x{1fb0}
943        Ket
944        End
945------------------------------------------------------------------
946Capture group count = 0
947Options: utf
948First code unit = 'A'
949Last code unit = \x{1fb0}
950Subject length lower bound = 5
951
952/AB\x{1fb0}/IB,utf
953------------------------------------------------------------------
954        Bra
955        AB\x{1fb0}
956        Ket
957        End
958------------------------------------------------------------------
959Capture group count = 0
960Options: utf
961First code unit = 'A'
962Last code unit = \x{1fb0}
963Subject length lower bound = 3
964
965/AB\x{1fb0}/IBi,utf
966------------------------------------------------------------------
967        Bra
968     /i AB\x{1fb0}
969        Ket
970        End
971------------------------------------------------------------------
972Capture group count = 0
973Options: caseless utf
974First code unit = 'A' (caseless)
975Last code unit = \x{1fb0} (caseless)
976Subject length lower bound = 3
977
978/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
979Capture group count = 0
980Options: caseless utf
981First code unit = \x{401} (caseless)
982Last code unit = \x{42f} (caseless)
983Subject length lower bound = 17
984    \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
985 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
986    \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
987 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
988
989/[ⱥ]/Bi,utf
990------------------------------------------------------------------
991        Bra
992     /i \x{2c65}
993        Ket
994        End
995------------------------------------------------------------------
996
997/[^ⱥ]/Bi,utf
998------------------------------------------------------------------
999        Bra
1000     /i [^\x{2c65}]
1001        Ket
1002        End
1003------------------------------------------------------------------
1004
1005/[[:blank:]]/B,ucp
1006------------------------------------------------------------------
1007        Bra
1008        [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
1009        Ket
1010        End
1011------------------------------------------------------------------
1012
1013/\x{212a}+/Ii,utf
1014Capture group count = 0
1015Options: caseless utf
1016Starting code units: K k \xff
1017Subject length lower bound = 1
1018    KKkk\x{212a}
1019 0: KKkk\x{212a}
1020
1021/s+/Ii,utf
1022Capture group count = 0
1023Options: caseless utf
1024Starting code units: S s \xff
1025Subject length lower bound = 1
1026    SSss\x{17f}
1027 0: SSss\x{17f}
1028
1029# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
1030
1031/\x{110000}/utf
1032Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
1033
1034/\o{4200000}/utf
1035Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
1036
1037/\x{100}*A/IB,utf
1038------------------------------------------------------------------
1039        Bra
1040        \x{100}*+
1041        A
1042        Ket
1043        End
1044------------------------------------------------------------------
1045Capture group count = 0
1046Options: utf
1047Starting code units: A \xff
1048Last code unit = 'A'
1049Subject length lower bound = 1
1050    A
1051 0: A
1052
1053/\x{100}*\d(?R)/IB,utf
1054------------------------------------------------------------------
1055        Bra
1056        \x{100}*+
1057        \d
1058        Recurse
1059        Ket
1060        End
1061------------------------------------------------------------------
1062Capture group count = 0
1063Options: utf
1064Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1065Subject length lower bound = 1
1066
1067/[Z\x{100}]/IB,utf
1068------------------------------------------------------------------
1069        Bra
1070        [Z\x{100}]
1071        Ket
1072        End
1073------------------------------------------------------------------
1074Capture group count = 0
1075Options: utf
1076Starting code units: Z \xff
1077Subject length lower bound = 1
1078    Z\x{100}
1079 0: Z
1080    \x{100}
1081 0: \x{100}
1082    \x{100}Z
1083 0: \x{100}
1084
1085/[z-\x{100}]/IB,utf
1086------------------------------------------------------------------
1087        Bra
1088        [z-\xff\x{100}]
1089        Ket
1090        End
1091------------------------------------------------------------------
1092Capture group count = 0
1093Options: utf
1094Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
1095  \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
1096  \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5
1097  \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4
1098  \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3
1099  \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
1100  \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
1101  \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
1102  \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1103Subject length lower bound = 1
1104
1105/[z\Qa-d]Ā\E]/IB,utf
1106------------------------------------------------------------------
1107        Bra
1108        [\-\]adz\x{100}]
1109        Ket
1110        End
1111------------------------------------------------------------------
1112Capture group count = 0
1113Options: utf
1114Starting code units: - ] a d z \xff
1115Subject length lower bound = 1
1116    \x{100}
1117 0: \x{100}
1118    Ā
1119 0: \x{100}
1120
1121/[ab\x{100}]abc(xyz(?1))/IB,utf
1122------------------------------------------------------------------
1123        Bra
1124        [ab\x{100}]
1125        abc
1126        CBra 1
1127        xyz
1128        Recurse
1129        Ket
1130        Ket
1131        End
1132------------------------------------------------------------------
1133Capture group count = 1
1134Options: utf
1135Starting code units: a b \xff
1136Last code unit = 'z'
1137Subject length lower bound = 7
1138
1139/\x{100}*\s/IB,utf
1140------------------------------------------------------------------
1141        Bra
1142        \x{100}*+
1143        \s
1144        Ket
1145        End
1146------------------------------------------------------------------
1147Capture group count = 0
1148Options: utf
1149Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
1150Subject length lower bound = 1
1151
1152/\x{100}*\d/IB,utf
1153------------------------------------------------------------------
1154        Bra
1155        \x{100}*+
1156        \d
1157        Ket
1158        End
1159------------------------------------------------------------------
1160Capture group count = 0
1161Options: utf
1162Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1163Subject length lower bound = 1
1164
1165/\x{100}*\w/IB,utf
1166------------------------------------------------------------------
1167        Bra
1168        \x{100}*+
1169        \w
1170        Ket
1171        End
1172------------------------------------------------------------------
1173Capture group count = 0
1174Options: utf
1175Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1176  Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1177  \xff
1178Subject length lower bound = 1
1179
1180/\x{100}*\D/IB,utf
1181------------------------------------------------------------------
1182        Bra
1183        \x{100}*
1184        \D
1185        Ket
1186        End
1187------------------------------------------------------------------
1188Capture group count = 0
1189Options: utf
1190Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1191  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1192  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1193  ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1194  d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
1195  \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
1196  \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
1197  \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
1198  \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
1199  \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
1200  \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
1201  \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
1202  \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
1203  \xfb \xfc \xfd \xfe \xff
1204Subject length lower bound = 1
1205
1206/\x{100}*\S/IB,utf
1207------------------------------------------------------------------
1208        Bra
1209        \x{100}*
1210        \S
1211        Ket
1212        End
1213------------------------------------------------------------------
1214Capture group count = 0
1215Options: utf
1216Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1217  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1218  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1219  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1220  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
1221  \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
1222  \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
1223  \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
1224  \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
1225  \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
1226  \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
1227  \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
1228  \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
1229  \xfd \xfe \xff
1230Subject length lower bound = 1
1231
1232/\x{100}*\W/IB,utf
1233------------------------------------------------------------------
1234        Bra
1235        \x{100}*
1236        \W
1237        Ket
1238        End
1239------------------------------------------------------------------
1240Capture group count = 0
1241Options: utf
1242Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1243  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1244  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1245  ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89
1246  \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98
1247  \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7
1248  \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6
1249  \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5
1250  \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4
1251  \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3
1252  \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2
1253  \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1254Subject length lower bound = 1
1255
1256/[\x{105}-\x{109}]/IBi,utf
1257------------------------------------------------------------------
1258        Bra
1259        [\x{104}-\x{109}]
1260        Ket
1261        End
1262------------------------------------------------------------------
1263Capture group count = 0
1264Options: caseless utf
1265Starting code units: \xff
1266Subject length lower bound = 1
1267    \x{104}
1268 0: \x{104}
1269    \x{105}
1270 0: \x{105}
1271    \x{109}
1272 0: \x{109}
1273\= Expect no match
1274    \x{100}
1275No match
1276    \x{10a}
1277No match
1278
1279/[z-\x{100}]/IBi,utf
1280------------------------------------------------------------------
1281        Bra
1282        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1283        Ket
1284        End
1285------------------------------------------------------------------
1286Capture group count = 0
1287Options: caseless utf
1288Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1289  \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1290  \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1291  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1292  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1293  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1294  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1295  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1296  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1297  \xff
1298Subject length lower bound = 1
1299    Z
1300 0: Z
1301    z
1302 0: z
1303    \x{39c}
1304 0: \x{39c}
1305    \x{178}
1306 0: \x{178}
1307    |
1308 0: |
1309    \x{80}
1310 0: \x{80}
1311    \x{ff}
1312 0: \x{ff}
1313    \x{100}
1314 0: \x{100}
1315    \x{101}
1316 0: \x{101}
1317\= Expect no match
1318    \x{102}
1319No match
1320    Y
1321No match
1322    y
1323No match
1324
1325/[z-\x{100}]/IBi,utf
1326------------------------------------------------------------------
1327        Bra
1328        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1329        Ket
1330        End
1331------------------------------------------------------------------
1332Capture group count = 0
1333Options: caseless utf
1334Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1335  \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1336  \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1337  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1338  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1339  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1340  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1341  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1342  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1343  \xff
1344Subject length lower bound = 1
1345
1346/\x{3a3}B/IBi,utf
1347------------------------------------------------------------------
1348        Bra
1349        clist 03a3 03c2 03c3
1350     /i B
1351        Ket
1352        End
1353------------------------------------------------------------------
1354Capture group count = 0
1355Options: caseless utf
1356Starting code units: \xff
1357Last code unit = 'B' (caseless)
1358Subject length lower bound = 2
1359
1360/./utf
1361    \x{110000}
1362Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
1363
1364/(*UTF)ab������z/B
1365------------------------------------------------------------------
1366        Bra
1367        ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z
1368        Ket
1369        End
1370------------------------------------------------------------------
1371
1372/ab������z/utf
1373** Failed: character value greater than 0x10ffff cannot be converted to UTF
1374
1375/[\W\p{Any}]/B
1376------------------------------------------------------------------
1377        Bra
1378        [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}]
1379        Ket
1380        End
1381------------------------------------------------------------------
1382    abc
1383 0: a
1384    123
1385 0: 1
1386
1387/[\W\pL]/B
1388------------------------------------------------------------------
1389        Bra
1390        [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}]
1391        Ket
1392        End
1393------------------------------------------------------------------
1394    abc
1395 0: a
1396    \x{100}
1397 0: \x{100}
1398    \x{308}
1399 0: \x{308}
1400\= Expect no match
1401    123
1402No match
1403
1404/[\s[:^ascii:]]/B,ucp
1405------------------------------------------------------------------
1406        Bra
1407        [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}]
1408        Ket
1409        End
1410------------------------------------------------------------------
1411
1412/\pP/ucp
1413    \x{7fffffff}
1414No match
1415
1416# A special extra option allows excaped surrogate code points in 32-bit mode,
1417# but subjects containing them must not be UTF-checked. These patterns give
1418# errors in 16-bit mode.
1419
1420/\x{d800}/I,utf,allow_surrogate_escapes
1421Capture group count = 0
1422Options: utf
1423Extra options: allow_surrogate_escapes
1424First code unit = \x{d800}
1425Subject length lower bound = 1
1426    \x{d800}\=no_utf_check
1427 0: \x{d800}
1428
1429/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
1430    \x{dfff}\x{df01}\=no_utf_check
1431 0: \x{dfff}\x{df01}
1432
1433# This has different starting code units in 8-bit mode.
1434
1435/^[^ab]/IB,utf
1436------------------------------------------------------------------
1437        Bra
1438        ^
1439        [\x00-`c-\xff] (neg)
1440        Ket
1441        End
1442------------------------------------------------------------------
1443Capture group count = 0
1444Compile options: utf
1445Overall options: anchored utf
1446Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1447  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1448  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1449  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
1450  Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
1451  \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
1452  \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
1453  \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
1454  \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
1455  \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
1456  \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
1457  \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
1458  \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
1459  \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1460Subject length lower bound = 1
1461    c
1462 0: c
1463    \x{ff}
1464 0: \x{ff}
1465    \x{100}
1466 0: \x{100}
1467\= Expect no match
1468    aaa
1469No match
1470
1471# Offsets are different in 8-bit mode.
1472
1473/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
1474    123abcáyzabcdef789abcሴqr
1475 1(2) Old 6 6 "" New 6 8 "<>"
1476 2(2) Old 12 12 "" New 14 16 "<>"
1477 3(2) Old 12 15 "def" New 16 21 "<def>"
1478 4(2) Old 21 21 "" New 27 29 "<>"
1479 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
1480
1481# A few script run tests in non-UTF mode (but they need Unicode support)
1482
1483/^(*script_run:.{4})/
1484    \x{3041}\x{30a1}\x{3007}\x{3007}   Hiragana Katakana Han Han
1485 0: \x{3041}\x{30a1}\x{3007}\x{3007}
1486    \x{30a1}\x{3041}\x{3007}\x{3007}   Katakana Hiragana Han Han
1487 0: \x{30a1}\x{3041}\x{3007}\x{3007}
1488    \x{1100}\x{2e80}\x{2e80}\x{1101}   Hangul Han Han Hangul
1489 0: \x{1100}\x{2e80}\x{2e80}\x{1101}
1490
1491/^(*sr:.*)/utf,allow_surrogate_escapes
1492    \x{2e80}\x{3105}\x{2e80}\x{30a1}   Han Bopomofo Han Katakana
1493 0: \x{2e80}\x{3105}\x{2e80}
1494    \x{d800}\x{dfff}                   Surrogates (Unknown) \=no_utf_check
1495 0: \x{d800}
1496
1497/(?(n/utf
1498Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
1499
1500/(?(á/utf
1501Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
1502
1503# Invalid UTF-16/32 tests.
1504
1505/.../g,match_invalid_utf
1506    abcd\x{df00}wxzy\x{df00}pqrs
1507 0: abc
1508 0: wxz
1509 0: pqr
1510    abcd\x{80}wxzy\x{df00}pqrs
1511 0: abc
1512 0: d\x{80}w
1513 0: xzy
1514 0: pqr
1515
1516/abc/match_invalid_utf
1517    ab\x{df00}ab\=ph
1518Partial match: ab
1519\= Expect no match
1520    ab\x{df00}cdef\=ph
1521No match
1522
1523/.a/match_invalid_utf
1524    ab\=ph
1525Partial match: b
1526    ab\=ps
1527Partial match: b
1528\= Expect no match
1529    b\x{df00}\=ph
1530No match
1531    b\x{df00}\=ps
1532No match
1533
1534/.a$/match_invalid_utf
1535    ab\=ph
1536Partial match: b
1537    ab\=ps
1538Partial match: b
1539\= Expect no match
1540    b\x{df00}\=ph
1541No match
1542    b\x{df00}\=ps
1543No match
1544
1545/ab$/match_invalid_utf
1546    ab\x{df00}cdeab
1547 0: ab
1548\= Expect no match
1549    ab\x{df00}cde
1550No match
1551
1552/.../g,match_invalid_utf
1553    abcd\x{80}wxzy\x{df00}pqrs
1554 0: abc
1555 0: d\x{80}w
1556 0: xzy
1557 0: pqr
1558
1559/(?<=x)../g,match_invalid_utf
1560    abcd\x{80}wxzy\x{df00}pqrs
1561 0: zy
1562    abcd\x{80}wxzy\x{df00}xpqrs
1563 0: zy
1564 0: pq
1565
1566/X$/match_invalid_utf
1567\= Expect no match
1568    X\x{df00}
1569No match
1570
1571/(?<=..)X/match_invalid_utf,aftertext
1572    AB\x{df00}AQXYZ
1573 0: X
1574 0+ YZ
1575    AB\x{df00}AQXYZ\=offset=5
1576 0: X
1577 0+ YZ
1578    AB\x{df00}\x{df00}AXYZXC\=offset=5
1579 0: X
1580 0+ C
1581\= Expect no match
1582    AB\x{df00}XYZ
1583No match
1584    AB\x{df00}XYZ\=offset=3
1585No match
1586    AB\x{df00}AXYZ
1587No match
1588    AB\x{df00}AXYZ\=offset=4
1589No match
1590    AB\x{df00}\x{df00}AXYZ\=offset=5
1591No match
1592
1593/.../match_invalid_utf
1594\= Expect no match
1595    A\x{d800}B
1596No match
1597    A\x{110000}B
1598No match
1599
1600/aa/utf,ucp,match_invalid_utf,global
1601    aa\x{d800}aa
1602 0: aa
1603 0: aa
1604
1605/aa/utf,ucp,match_invalid_utf,global
1606    \x{d800}aa
1607 0: aa
1608
1609/A\z/utf,match_invalid_utf
1610    A\x{df00}\n
1611No match
1612
1613# ----------------------------------------------------
1614
1615/(*UTF)(?=\x{123})/I
1616Capture group count = 0
1617May match empty string
1618Compile options: <none>
1619Overall options: utf
1620First code unit = \x{123}
1621Subject length lower bound = 1
1622
1623/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
1624Capture group count = 0
1625Options: utf
1626First code unit = \xc1 (caseless)
1627Last code unit = \x{145} (caseless)
1628Subject length lower bound = 3
1629
1630/[\xff\x{ffff}]/I,utf
1631Capture group count = 0
1632Options: utf
1633Starting code units: \xff
1634Subject length lower bound = 1
1635
1636/[\xff\x{ff}]/I,utf
1637Capture group count = 0
1638Options: utf
1639Starting code units: \xff
1640Subject length lower bound = 1
1641
1642/[\xff\x{ff}]/I
1643Capture group count = 0
1644Starting code units: \xff
1645Subject length lower bound = 1
1646
1647/[Ss]/I
1648Capture group count = 0
1649First code unit = 'S' (caseless)
1650Subject length lower bound = 1
1651
1652/[Ss]/I,utf
1653Capture group count = 0
1654Options: utf
1655Starting code units: S s
1656Subject length lower bound = 1
1657
1658/(?:\x{ff}|\x{3000})/I,utf
1659Capture group count = 0
1660Options: utf
1661Starting code units: \xff
1662Subject length lower bound = 1
1663
1664# ----------------------------------------------------
1665# UCP and casing tests
1666
1667/\x{120}/i,I
1668Capture group count = 0
1669Options: caseless
1670First code unit = \x{120}
1671Subject length lower bound = 1
1672
1673/\x{c1}/i,I,ucp
1674Capture group count = 0
1675Options: caseless ucp
1676First code unit = \xc1 (caseless)
1677Subject length lower bound = 1
1678
1679/[\x{120}\x{121}]/iB,ucp
1680------------------------------------------------------------------
1681        Bra
1682     /i \x{120}
1683        Ket
1684        End
1685------------------------------------------------------------------
1686
1687/[ab\x{120}]+/iB,ucp
1688------------------------------------------------------------------
1689        Bra
1690        [ABab\x{120}-\x{121}]++
1691        Ket
1692        End
1693------------------------------------------------------------------
1694    aABb\x{121}\x{120}
1695 0: aABb\x{121}\x{120}
1696
1697/\x{c1}/i,no_start_optimize
1698\= Expect no match
1699    \x{e1}
1700No match
1701
1702/\x{120}\x{c1}/i,ucp,no_start_optimize
1703    \x{121}\x{e1}
1704 0: \x{121}\xe1
1705
1706/\x{120}\x{c1}/i,ucp
1707    \x{121}\x{e1}
1708 0: \x{121}\xe1
1709
1710/[^\x{120}]/i,no_start_optimize
1711    \x{121}
1712 0: \x{121}
1713
1714/[^\x{120}]/i,ucp,no_start_optimize
1715\= Expect no match
1716    \x{121}
1717No match
1718
1719/[^\x{120}]/i
1720    \x{121}
1721 0: \x{121}
1722
1723/[^\x{120}]/i,ucp
1724\= Expect no match
1725    \x{121}
1726No match
1727
1728/\x{120}{2}/i,ucp
1729    \x{121}\x{121}
1730 0: \x{121}\x{121}
1731
1732/[^\x{120}]{2}/i,ucp
1733\= Expect no match
1734    \x{121}\x{121}
1735No match
1736
1737/\x{c1}+\x{e1}/iB,ucp
1738------------------------------------------------------------------
1739        Bra
1740     /i \x{c1}+
1741     /i \x{e1}
1742        Ket
1743        End
1744------------------------------------------------------------------
1745    \x{c1}\x{c1}\x{c1}
1746 0: \xc1\xc1\xc1
1747
1748/\x{c1}+\x{e1}/iIB,ucp
1749------------------------------------------------------------------
1750        Bra
1751     /i \x{c1}+
1752     /i \x{e1}
1753        Ket
1754        End
1755------------------------------------------------------------------
1756Capture group count = 0
1757Options: caseless ucp
1758First code unit = \xc1 (caseless)
1759Last code unit = \xe1 (caseless)
1760Subject length lower bound = 2
1761    \x{c1}\x{c1}\x{c1}
1762 0: \xc1\xc1\xc1
1763    \x{e1}\x{e1}\x{e1}
1764 0: \xe1\xe1\xe1
1765
1766/a|\x{c1}/iI,ucp
1767Capture group count = 0
1768Options: caseless ucp
1769Starting code units: A a \xc1 \xe1
1770Subject length lower bound = 1
1771    \x{e1}xxx
1772 0: \xe1
1773
1774/\x{c1}|\x{e1}/iI,ucp
1775Capture group count = 0
1776Options: caseless ucp
1777First code unit = \xc1 (caseless)
1778Subject length lower bound = 1
1779
1780/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
1781    X\x{e1}Y
1782 1: >\xc1<
1783
1784/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
1785    X\x{121}Y
1786 1: >\x{120}<
1787
1788/s/i,ucp
1789    \x{17f}
1790 0: \x{17f}
1791
1792/s/i,utf
1793    \x{17f}
1794 0: \x{17f}
1795
1796/[^s]/i,ucp
1797\= Expect no match
1798    \x{17f}
1799No match
1800
1801/[^s]/i,utf
1802\= Expect no match
1803    \x{17f}
1804No match
1805
1806# ----------------------------------------------------
1807
1808# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
1809# fails in 16-bit mode, but is OK for 32-bit.
1810
1811/\x{802a0000}*/
1812    \x{802a0000}\x{802a0000}
1813 0: \x{802a0000}\x{802a0000}
1814
1815# UTF matching without UTF, check invalid UTF characters
1816/\X++/
1817    a\x{110000}\x{ffffffff}
1818 0: a\x{110000}\x{ffffffff}
1819
1820# This used to loop in 32-bit mode; it will fail in 16-bit mode.
1821/[\x{ffffffff}]/caseless,ucp
1822    \x{ffffffff}xyz
1823 0: \x{ffffffff}
1824
1825# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
1826# will give errors in 16-bit mode.
1827
1828/k*\x{ffffffff}/caseless,ucp
1829    \x{ffffffff}
1830 0: \x{ffffffff}
1831
1832/k+\x{ffffffff}/caseless,ucp,no_start_optimize
1833    K\x{ffffffff}
1834 0: K\x{ffffffff}
1835\= Expect no match
1836    \x{ffffffff}\x{ffffffff}
1837No match
1838
1839/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
1840\= Expect no match
1841    \x{ffffffff}\x{ffffffff}\x{ffffffff}
1842No match
1843
1844/k\x{ffffffff}/caseless,ucp,no_start_optimize
1845    K\x{ffffffff}
1846 0: K\x{ffffffff}
1847\= Expect no match
1848    \x{ffffffff}\x{ffffffff}\x{ffffffff}
1849No match
1850
1851/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
1852\= Expect no match
1853    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
1854No match
1855
1856# ---------------------------------------------------------
1857
1858# End of testinput12
1859