xref: /aosp_15_r20/external/pcre/testdata/testoutput10 (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1# This set of tests is for UTF-8 support and Unicode property support, with
2# relevance only for the 8-bit library.
3
4#newline_default lf any anycrlf
5
6# The next 5 patterns have UTF-8 errors
7
8/[�]/utf
9Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
10
11/�/utf
12Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
13
14/���xxx/utf
15Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
16
17/��������/utf
18Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
19
20/��������/match_invalid_utf
21Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
22
23# Now test subjects
24
25/badutf/utf
26\= Expect UTF-8 errors
27    X\xdf
28Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
29    XX\xef
30Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
31    XXX\xef\x80
32Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
33    X\xf7
34Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
35    XX\xf7\x80
36Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
37    XXX\xf7\x80\x80
38Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
39    \xfb
40Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
41    \xfb\x80
42Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
43    \xfb\x80\x80
44Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
45    \xfb\x80\x80\x80
46Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
47    \xfd
48Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
49    \xfd\x80
50Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
51    \xfd\x80\x80
52Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
53    \xfd\x80\x80\x80
54Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
55    \xfd\x80\x80\x80\x80
56Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
57    \xdf\x7f
58Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
59    \xef\x7f\x80
60Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
61    \xef\x80\x7f
62Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
63    \xf7\x7f\x80\x80
64Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
65    \xf7\x80\x7f\x80
66Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
67    \xf7\x80\x80\x7f
68Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
69    \xfb\x7f\x80\x80\x80
70Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
71    \xfb\x80\x7f\x80\x80
72Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
73    \xfb\x80\x80\x7f\x80
74Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
75    \xfb\x80\x80\x80\x7f
76Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
77    \xfd\x7f\x80\x80\x80\x80
78Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
79    \xfd\x80\x7f\x80\x80\x80
80Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
81    \xfd\x80\x80\x7f\x80\x80
82Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
83    \xfd\x80\x80\x80\x7f\x80
84Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
85    \xfd\x80\x80\x80\x80\x7f
86Failed: error -12: UTF-8 error: byte 6 top bits not 0x80 at offset 0
87    \xed\xa0\x80
88Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
89    \xc0\x8f
90Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 0
91    \xe0\x80\x8f
92Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 0
93    \xf0\x80\x80\x8f
94Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
95    \xf8\x80\x80\x80\x8f
96Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
97    \xfc\x80\x80\x80\x80\x8f
98Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
99    \x80
100Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
101    \xfe
102Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
103    \xff
104Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
105
106/badutf/utf
107\= Expect UTF-8 errors
108    XX\xfb\x80\x80\x80\x80
109Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
110    XX\xfd\x80\x80\x80\x80\x80
111Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 2
112    XX\xf7\xbf\xbf\xbf
113Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
114
115/shortutf/utf
116\= Expect UTF-8 errors
117    XX\xdf\=ph
118Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
119    XX\xef\=ph
120Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
121    XX\xef\x80\=ph
122Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
123    \xf7\=ph
124Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
125    \xf7\x80\=ph
126Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
127    \xf7\x80\x80\=ph
128Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
129    \xfb\=ph
130Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
131    \xfb\x80\=ph
132Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
133    \xfb\x80\x80\=ph
134Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
135    \xfb\x80\x80\x80\=ph
136Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
137    \xfd\=ph
138Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
139    \xfd\x80\=ph
140Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
141    \xfd\x80\x80\=ph
142Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
143    \xfd\x80\x80\x80\=ph
144Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
145    \xfd\x80\x80\x80\x80\=ph
146Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
147
148/anything/utf
149\= Expect UTF-8 errors
150    X\xc0\x80
151Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
152    XX\xc1\x8f
153Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 2
154    XXX\xe0\x9f\x80
155Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 3
156    \xf0\x8f\x80\x80
157Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
158    \xf8\x87\x80\x80\x80
159Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
160    \xfc\x83\x80\x80\x80\x80
161Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
162    \xfe\x80\x80\x80\x80\x80
163Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
164    \xff\x80\x80\x80\x80\x80
165Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
166    \xf8\x88\x80\x80\x80
167Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
168    \xf9\x87\x80\x80\x80
169Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
170    \xfc\x84\x80\x80\x80\x80
171Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
172    \xfd\x83\x80\x80\x80\x80
173Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
174\= Expect no match
175    \xc3\x8f
176No match
177    \xe0\xaf\x80
178No match
179    \xe1\x80\x80
180No match
181    \xf0\x9f\x80\x80
182No match
183    \xf1\x8f\x80\x80
184No match
185    \xf8\x88\x80\x80\x80\=no_utf_check
186No match
187    \xf9\x87\x80\x80\x80\=no_utf_check
188No match
189    \xfc\x84\x80\x80\x80\x80\=no_utf_check
190No match
191    \xfd\x83\x80\x80\x80\x80\=no_utf_check
192No match
193
194# Similar tests with offsets
195
196/badutf/utf
197\= Expect UTF-8 errors
198    X\xdfabcd
199Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
200    X\xdfabcd\=offset=1
201Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
202\= Expect no match
203    X\xdfabcd\=offset=2
204No match
205
206/(?<=x)badutf/utf
207\= Expect UTF-8 errors
208    X\xdfabcd
209Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
210    X\xdfabcd\=offset=1
211Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
212    X\xdfabcd\=offset=2
213Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
214    X\xdfabcd\xdf\=offset=3
215Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
216\= Expect no match
217    X\xdfabcd\=offset=3
218No match
219
220/(?<=xx)badutf/utf
221\= Expect UTF-8 errors
222    X\xdfabcd
223Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
224    X\xdfabcd\=offset=1
225Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
226    X\xdfabcd\=offset=2
227Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
228    X\xdfabcd\=offset=3
229Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
230
231/(?<=xxxx)badutf/utf
232\= Expect UTF-8 errors
233    X\xdfabcd
234Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
235    X\xdfabcd\=offset=1
236Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
237    X\xdfabcd\=offset=2
238Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
239    X\xdfabcd\=offset=3
240Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
241    X\xdfabc\xdf\=offset=6
242Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
243    X\xdfabc\xdf\=offset=7
244Failed: error -33: bad offset value
245\= Expect no match
246    X\xdfabcd\=offset=6
247No match
248
249/\x{100}/IB,utf
250------------------------------------------------------------------
251        Bra
252        \x{100}
253        Ket
254        End
255------------------------------------------------------------------
256Capture group count = 0
257Options: utf
258First code unit = \xc4
259Last code unit = \x80
260Subject length lower bound = 1
261
262/\x{1000}/IB,utf
263------------------------------------------------------------------
264        Bra
265        \x{1000}
266        Ket
267        End
268------------------------------------------------------------------
269Capture group count = 0
270Options: utf
271First code unit = \xe1
272Last code unit = \x80
273Subject length lower bound = 1
274
275/\x{10000}/IB,utf
276------------------------------------------------------------------
277        Bra
278        \x{10000}
279        Ket
280        End
281------------------------------------------------------------------
282Capture group count = 0
283Options: utf
284First code unit = \xf0
285Last code unit = \x80
286Subject length lower bound = 1
287
288/\x{100000}/IB,utf
289------------------------------------------------------------------
290        Bra
291        \x{100000}
292        Ket
293        End
294------------------------------------------------------------------
295Capture group count = 0
296Options: utf
297First code unit = \xf4
298Last code unit = \x80
299Subject length lower bound = 1
300
301/\x{10ffff}/IB,utf
302------------------------------------------------------------------
303        Bra
304        \x{10ffff}
305        Ket
306        End
307------------------------------------------------------------------
308Capture group count = 0
309Options: utf
310First code unit = \xf4
311Last code unit = \xbf
312Subject length lower bound = 1
313
314/[\x{ff}]/IB,utf
315------------------------------------------------------------------
316        Bra
317        \x{ff}
318        Ket
319        End
320------------------------------------------------------------------
321Capture group count = 0
322Options: utf
323First code unit = \xc3
324Last code unit = \xbf
325Subject length lower bound = 1
326
327/[\x{100}]/IB,utf
328------------------------------------------------------------------
329        Bra
330        \x{100}
331        Ket
332        End
333------------------------------------------------------------------
334Capture group count = 0
335Options: utf
336First code unit = \xc4
337Last code unit = \x80
338Subject length lower bound = 1
339
340/\x80/IB,utf
341------------------------------------------------------------------
342        Bra
343        \x{80}
344        Ket
345        End
346------------------------------------------------------------------
347Capture group count = 0
348Options: utf
349First code unit = \xc2
350Last code unit = \x80
351Subject length lower bound = 1
352
353/\xff/IB,utf
354------------------------------------------------------------------
355        Bra
356        \x{ff}
357        Ket
358        End
359------------------------------------------------------------------
360Capture group count = 0
361Options: utf
362First code unit = \xc3
363Last code unit = \xbf
364Subject length lower bound = 1
365
366/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
367------------------------------------------------------------------
368        Bra
369        \x{d55c}\x{ad6d}\x{c5b4}
370        Ket
371        End
372------------------------------------------------------------------
373Capture group count = 0
374Options: utf
375First code unit = \xed
376Last code unit = \xb4
377Subject length lower bound = 3
378    \x{D55c}\x{ad6d}\x{C5B4}
379 0: \x{d55c}\x{ad6d}\x{c5b4}
380
381/\x{65e5}\x{672c}\x{8a9e}/IB,utf
382------------------------------------------------------------------
383        Bra
384        \x{65e5}\x{672c}\x{8a9e}
385        Ket
386        End
387------------------------------------------------------------------
388Capture group count = 0
389Options: utf
390First code unit = \xe6
391Last code unit = \x9e
392Subject length lower bound = 3
393    \x{65e5}\x{672c}\x{8a9e}
394 0: \x{65e5}\x{672c}\x{8a9e}
395
396/\x{80}/IB,utf
397------------------------------------------------------------------
398        Bra
399        \x{80}
400        Ket
401        End
402------------------------------------------------------------------
403Capture group count = 0
404Options: utf
405First code unit = \xc2
406Last code unit = \x80
407Subject length lower bound = 1
408
409/\x{084}/IB,utf
410------------------------------------------------------------------
411        Bra
412        \x{84}
413        Ket
414        End
415------------------------------------------------------------------
416Capture group count = 0
417Options: utf
418First code unit = \xc2
419Last code unit = \x84
420Subject length lower bound = 1
421
422/\x{104}/IB,utf
423------------------------------------------------------------------
424        Bra
425        \x{104}
426        Ket
427        End
428------------------------------------------------------------------
429Capture group count = 0
430Options: utf
431First code unit = \xc4
432Last code unit = \x84
433Subject length lower bound = 1
434
435/\x{861}/IB,utf
436------------------------------------------------------------------
437        Bra
438        \x{861}
439        Ket
440        End
441------------------------------------------------------------------
442Capture group count = 0
443Options: utf
444First code unit = \xe0
445Last code unit = \xa1
446Subject length lower bound = 1
447
448/\x{212ab}/IB,utf
449------------------------------------------------------------------
450        Bra
451        \x{212ab}
452        Ket
453        End
454------------------------------------------------------------------
455Capture group count = 0
456Options: utf
457First code unit = \xf0
458Last code unit = \xab
459Subject length lower bound = 1
460
461/[^ab\xC0-\xF0]/IB,utf
462------------------------------------------------------------------
463        Bra
464        [\x00-`c-\xbf\xf1-\xff] (neg)
465        Ket
466        End
467------------------------------------------------------------------
468Capture group count = 0
469Options: utf
470Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
471  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
472  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
473  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
474  Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
475  \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
476  \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
477  \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
478  \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
479  \xfe \xff
480Subject length lower bound = 1
481    \x{f1}
482 0: \x{f1}
483    \x{bf}
484 0: \x{bf}
485    \x{100}
486 0: \x{100}
487    \x{1000}
488 0: \x{1000}
489\= Expect no match
490    \x{c0}
491No match
492    \x{f0}
493No match
494
495/Ā{3,4}/IB,utf
496------------------------------------------------------------------
497        Bra
498        \x{100}{3}
499        \x{100}?+
500        Ket
501        End
502------------------------------------------------------------------
503Capture group count = 0
504Options: utf
505First code unit = \xc4
506Last code unit = \x80
507Subject length lower bound = 3
508  \x{100}\x{100}\x{100}\x{100\x{100}
509 0: \x{100}\x{100}\x{100}
510
511/(\x{100}+|x)/IB,utf
512------------------------------------------------------------------
513        Bra
514        CBra 1
515        \x{100}++
516        Alt
517        x
518        Ket
519        Ket
520        End
521------------------------------------------------------------------
522Capture group count = 1
523Options: utf
524Starting code units: x \xc4
525Subject length lower bound = 1
526
527/(\x{100}*a|x)/IB,utf
528------------------------------------------------------------------
529        Bra
530        CBra 1
531        \x{100}*+
532        a
533        Alt
534        x
535        Ket
536        Ket
537        End
538------------------------------------------------------------------
539Capture group count = 1
540Options: utf
541Starting code units: a x \xc4
542Subject length lower bound = 1
543
544/(\x{100}{0,2}a|x)/IB,utf
545------------------------------------------------------------------
546        Bra
547        CBra 1
548        \x{100}{0,2}+
549        a
550        Alt
551        x
552        Ket
553        Ket
554        End
555------------------------------------------------------------------
556Capture group count = 1
557Options: utf
558Starting code units: a x \xc4
559Subject length lower bound = 1
560
561/(\x{100}{1,2}a|x)/IB,utf
562------------------------------------------------------------------
563        Bra
564        CBra 1
565        \x{100}
566        \x{100}{0,1}+
567        a
568        Alt
569        x
570        Ket
571        Ket
572        End
573------------------------------------------------------------------
574Capture group count = 1
575Options: utf
576Starting code units: x \xc4
577Subject length lower bound = 1
578
579/\x{100}/IB,utf
580------------------------------------------------------------------
581        Bra
582        \x{100}
583        Ket
584        End
585------------------------------------------------------------------
586Capture group count = 0
587Options: utf
588First code unit = \xc4
589Last code unit = \x80
590Subject length lower bound = 1
591
592/a\x{100}\x{101}*/IB,utf
593------------------------------------------------------------------
594        Bra
595        a\x{100}
596        \x{101}*+
597        Ket
598        End
599------------------------------------------------------------------
600Capture group count = 0
601Options: utf
602First code unit = 'a'
603Last code unit = \x80
604Subject length lower bound = 2
605
606/a\x{100}\x{101}+/IB,utf
607------------------------------------------------------------------
608        Bra
609        a\x{100}
610        \x{101}++
611        Ket
612        End
613------------------------------------------------------------------
614Capture group count = 0
615Options: utf
616First code unit = 'a'
617Last code unit = \x81
618Subject length lower bound = 3
619
620/[^\x{c4}]/IB
621------------------------------------------------------------------
622        Bra
623        [^\x{c4}]
624        Ket
625        End
626------------------------------------------------------------------
627Capture group count = 0
628Subject length lower bound = 1
629
630/[\x{100}]/IB,utf
631------------------------------------------------------------------
632        Bra
633        \x{100}
634        Ket
635        End
636------------------------------------------------------------------
637Capture group count = 0
638Options: utf
639First code unit = \xc4
640Last code unit = \x80
641Subject length lower bound = 1
642    \x{100}
643 0: \x{100}
644    Z\x{100}
645 0: \x{100}
646    \x{100}Z
647 0: \x{100}
648
649/[\xff]/IB,utf
650------------------------------------------------------------------
651        Bra
652        \x{ff}
653        Ket
654        End
655------------------------------------------------------------------
656Capture group count = 0
657Options: utf
658First code unit = \xc3
659Last code unit = \xbf
660Subject length lower bound = 1
661    >\x{ff}<
662 0: \x{ff}
663
664/[^\xff]/IB,utf
665------------------------------------------------------------------
666        Bra
667        [^\x{ff}]
668        Ket
669        End
670------------------------------------------------------------------
671Capture group count = 0
672Options: utf
673Subject length lower bound = 1
674
675/\x{100}abc(xyz(?1))/IB,utf
676------------------------------------------------------------------
677        Bra
678        \x{100}abc
679        CBra 1
680        xyz
681        Recurse
682        Ket
683        Ket
684        End
685------------------------------------------------------------------
686Capture group count = 1
687Options: utf
688First code unit = \xc4
689Last code unit = 'z'
690Subject length lower bound = 7
691
692/\777/I,utf
693Capture group count = 0
694Options: utf
695First code unit = \xc7
696Last code unit = \xbf
697Subject length lower bound = 1
698  \x{1ff}
699 0: \x{1ff}
700  \777
701 0: \x{1ff}
702
703/\x{100}+\x{200}/IB,utf
704------------------------------------------------------------------
705        Bra
706        \x{100}++
707        \x{200}
708        Ket
709        End
710------------------------------------------------------------------
711Capture group count = 0
712Options: utf
713First code unit = \xc4
714Last code unit = \x80
715Subject length lower bound = 2
716
717/\x{100}+X/IB,utf
718------------------------------------------------------------------
719        Bra
720        \x{100}++
721        X
722        Ket
723        End
724------------------------------------------------------------------
725Capture group count = 0
726Options: utf
727First code unit = \xc4
728Last code unit = 'X'
729Subject length lower bound = 2
730
731/^[\QĀ\E-\QŐ\E/B,utf
732Failed: error 106 at offset 15: missing terminating ] for character class
733
734# This tests the stricter UTF-8 check according to RFC 3629.
735
736/X/utf
737\= Expect UTF-8 errors
738    \x{d800}
739Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
740    \x{da00}
741Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
742    \x{dfff}
743Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
744    \x{110000}
745Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
746    \x{2000000}
747Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
748    \x{7fffffff}
749Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
750\= Expect no match
751    \x{d800}\=no_utf_check
752No match
753    \x{da00}\=no_utf_check
754No match
755    \x{dfff}\=no_utf_check
756No match
757    \x{110000}\=no_utf_check
758No match
759    \x{2000000}\=no_utf_check
760No match
761    \x{7fffffff}\=no_utf_check
762No match
763
764/(*UTF8)\x{1234}/
765    abcd\x{1234}pqr
766 0: \x{1234}
767
768/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
769Capture group count = 0
770Compile options: <none>
771Overall options: utf
772\R matches any Unicode newline
773Forced newline is CRLF
774First code unit = 'a'
775Last code unit = 'b'
776Subject length lower bound = 3
777
778/\h/I,utf
779Capture group count = 0
780Options: utf
781Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3
782Subject length lower bound = 1
783    ABC\x{09}
784 0: \x{09}
785    ABC\x{20}
786 0:
787    ABC\x{a0}
788 0: \x{a0}
789    ABC\x{1680}
790 0: \x{1680}
791    ABC\x{180e}
792 0: \x{180e}
793    ABC\x{2000}
794 0: \x{2000}
795    ABC\x{202f}
796 0: \x{202f}
797    ABC\x{205f}
798 0: \x{205f}
799    ABC\x{3000}
800 0: \x{3000}
801
802/\v/I,utf
803Capture group count = 0
804Options: utf
805Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
806Subject length lower bound = 1
807    ABC\x{0a}
808 0: \x{0a}
809    ABC\x{0b}
810 0: \x{0b}
811    ABC\x{0c}
812 0: \x{0c}
813    ABC\x{0d}
814 0: \x{0d}
815    ABC\x{85}
816 0: \x{85}
817    ABC\x{2028}
818 0: \x{2028}
819
820/\h*A/I,utf
821Capture group count = 0
822Options: utf
823Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3
824Last code unit = 'A'
825Subject length lower bound = 1
826    CDBABC
827 0: A
828
829/\v+A/I,utf
830Capture group count = 0
831Options: utf
832Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
833Last code unit = 'A'
834Subject length lower bound = 2
835
836/\s?xxx\s/I,utf
837Capture group count = 0
838Options: utf
839Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
840Last code unit = 'x'
841Subject length lower bound = 4
842
843/\sxxx\s/I,utf,tables=2
844Capture group count = 0
845Options: utf
846Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2
847Last code unit = 'x'
848Subject length lower bound = 5
849    AB\x{85}xxx\x{a0}XYZ
850 0: \x{85}xxx\x{a0}
851    AB\x{a0}xxx\x{85}XYZ
852 0: \x{a0}xxx\x{85}
853
854/\S \S/I,utf,tables=2
855Capture group count = 0
856Options: utf
857Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
858  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
859  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
860  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
861  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
862  \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
863  \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
864  \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
865  \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
866Last code unit = ' '
867Subject length lower bound = 3
868    \x{a2} \x{84}
869 0: \x{a2} \x{84}
870    A Z
871 0: A Z
872
873/a+/utf
874    a\x{123}aa\=offset=1
875 0: aa
876    a\x{123}aa\=offset=3
877 0: aa
878    a\x{123}aa\=offset=4
879 0: a
880\= Expect bad offset value
881    a\x{123}aa\=offset=6
882Failed: error -33: bad offset value
883\= Expect bad UTF-8 offset
884    a\x{123}aa\=offset=2
885Error -36 (bad UTF-8 offset)
886\= Expect no match
887    a\x{123}aa\=offset=5
888No match
889
890/\x{1234}+/Ii,utf
891Capture group count = 0
892Options: caseless utf
893Starting code units: \xe1
894Subject length lower bound = 1
895
896/\x{1234}+?/Ii,utf
897Capture group count = 0
898Options: caseless utf
899Starting code units: \xe1
900Subject length lower bound = 1
901
902/\x{1234}++/Ii,utf
903Capture group count = 0
904Options: caseless utf
905Starting code units: \xe1
906Subject length lower bound = 1
907
908/\x{1234}{2}/Ii,utf
909Capture group count = 0
910Options: caseless utf
911Starting code units: \xe1
912Subject length lower bound = 2
913
914/[^\x{c4}]/IB,utf
915------------------------------------------------------------------
916        Bra
917        [^\x{c4}]
918        Ket
919        End
920------------------------------------------------------------------
921Capture group count = 0
922Options: utf
923Subject length lower bound = 1
924
925/X+\x{200}/IB,utf
926------------------------------------------------------------------
927        Bra
928        X++
929        \x{200}
930        Ket
931        End
932------------------------------------------------------------------
933Capture group count = 0
934Options: utf
935First code unit = 'X'
936Last code unit = \x80
937Subject length lower bound = 2
938
939/\R/I,utf
940Capture group count = 0
941Options: utf
942Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
943Subject length lower bound = 1
944
945/\777/IB,utf
946------------------------------------------------------------------
947        Bra
948        \x{1ff}
949        Ket
950        End
951------------------------------------------------------------------
952Capture group count = 0
953Options: utf
954First code unit = \xc7
955Last code unit = \xbf
956Subject length lower bound = 1
957
958/\w+\x{C4}/B,utf
959------------------------------------------------------------------
960        Bra
961        \w++
962        \x{c4}
963        Ket
964        End
965------------------------------------------------------------------
966    a\x{C4}\x{C4}
967 0: a\x{c4}
968
969/\w+\x{C4}/B,utf,tables=2
970------------------------------------------------------------------
971        Bra
972        \w+
973        \x{c4}
974        Ket
975        End
976------------------------------------------------------------------
977    a\x{C4}\x{C4}
978 0: a\x{c4}\x{c4}
979
980/\W+\x{C4}/B,utf
981------------------------------------------------------------------
982        Bra
983        \W+
984        \x{c4}
985        Ket
986        End
987------------------------------------------------------------------
988    !\x{C4}
989 0: !\x{c4}
990
991/\W+\x{C4}/B,utf,tables=2
992------------------------------------------------------------------
993        Bra
994        \W++
995        \x{c4}
996        Ket
997        End
998------------------------------------------------------------------
999    !\x{C4}
1000 0: !\x{c4}
1001
1002/\W+\x{A1}/B,utf
1003------------------------------------------------------------------
1004        Bra
1005        \W+
1006        \x{a1}
1007        Ket
1008        End
1009------------------------------------------------------------------
1010    !\x{A1}
1011 0: !\x{a1}
1012
1013/\W+\x{A1}/B,utf,tables=2
1014------------------------------------------------------------------
1015        Bra
1016        \W+
1017        \x{a1}
1018        Ket
1019        End
1020------------------------------------------------------------------
1021    !\x{A1}
1022 0: !\x{a1}
1023
1024/X\s+\x{A0}/B,utf
1025------------------------------------------------------------------
1026        Bra
1027        X
1028        \s++
1029        \x{a0}
1030        Ket
1031        End
1032------------------------------------------------------------------
1033    X\x20\x{A0}\x{A0}
1034 0: X \x{a0}
1035
1036/X\s+\x{A0}/B,utf,tables=2
1037------------------------------------------------------------------
1038        Bra
1039        X
1040        \s+
1041        \x{a0}
1042        Ket
1043        End
1044------------------------------------------------------------------
1045    X\x20\x{A0}\x{A0}
1046 0: X \x{a0}\x{a0}
1047
1048/\S+\x{A0}/B,utf
1049------------------------------------------------------------------
1050        Bra
1051        \S+
1052        \x{a0}
1053        Ket
1054        End
1055------------------------------------------------------------------
1056    X\x{A0}\x{A0}
1057 0: X\x{a0}\x{a0}
1058
1059/\S+\x{A0}/B,utf,tables=2
1060------------------------------------------------------------------
1061        Bra
1062        \S++
1063        \x{a0}
1064        Ket
1065        End
1066------------------------------------------------------------------
1067    X\x{A0}\x{A0}
1068 0: X\x{a0}
1069
1070/\x{a0}+\s!/B,utf
1071------------------------------------------------------------------
1072        Bra
1073        \x{a0}++
1074        \s
1075        !
1076        Ket
1077        End
1078------------------------------------------------------------------
1079    \x{a0}\x20!
1080 0: \x{a0} !
1081
1082/\x{a0}+\s!/B,utf,tables=2
1083------------------------------------------------------------------
1084        Bra
1085        \x{a0}+
1086        \s
1087        !
1088        Ket
1089        End
1090------------------------------------------------------------------
1091    \x{a0}\x20!
1092 0: \x{a0} !
1093
1094/A/utf
1095  \x{ff000041}
1096** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
1097  \x{7f000041}
1098Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
1099
1100/(*UTF8)abc/never_utf
1101Failed: error 174 at offset 7: using UTF is disabled by the application
1102
1103/abc/utf,never_utf
1104Failed: error 174 at offset 0: using UTF is disabled by the application
1105
1106/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
1107------------------------------------------------------------------
1108        Bra
1109     /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
1110        Ket
1111        End
1112------------------------------------------------------------------
1113Capture group count = 0
1114Options: caseless utf
1115First code unit = 'A' (caseless)
1116Subject length lower bound = 5
1117
1118/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
1119------------------------------------------------------------------
1120        Bra
1121        A\x{391}\x{10427}\x{ff3a}\x{1fb0}
1122        Ket
1123        End
1124------------------------------------------------------------------
1125Capture group count = 0
1126Options: utf
1127First code unit = 'A'
1128Last code unit = \xb0
1129Subject length lower bound = 5
1130
1131/AB\x{1fb0}/IB,utf
1132------------------------------------------------------------------
1133        Bra
1134        AB\x{1fb0}
1135        Ket
1136        End
1137------------------------------------------------------------------
1138Capture group count = 0
1139Options: utf
1140First code unit = 'A'
1141Last code unit = \xb0
1142Subject length lower bound = 3
1143
1144/AB\x{1fb0}/IBi,utf
1145------------------------------------------------------------------
1146        Bra
1147     /i AB\x{1fb0}
1148        Ket
1149        End
1150------------------------------------------------------------------
1151Capture group count = 0
1152Options: caseless utf
1153First code unit = 'A' (caseless)
1154Last code unit = 'B' (caseless)
1155Subject length lower bound = 3
1156
1157/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
1158Capture group count = 0
1159Options: caseless utf
1160Starting code units: \xd0 \xd1
1161Subject length lower bound = 17
1162    \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
1163 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
1164    \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
1165 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
1166
1167/[ⱥ]/Bi,utf
1168------------------------------------------------------------------
1169        Bra
1170     /i \x{2c65}
1171        Ket
1172        End
1173------------------------------------------------------------------
1174
1175/[^ⱥ]/Bi,utf
1176------------------------------------------------------------------
1177        Bra
1178     /i [^\x{2c65}]
1179        Ket
1180        End
1181------------------------------------------------------------------
1182
1183/\h/I
1184Capture group count = 0
1185Starting code units: \x09 \x20 \xa0
1186Subject length lower bound = 1
1187
1188/\v/I
1189Capture group count = 0
1190Starting code units: \x0a \x0b \x0c \x0d \x85
1191Subject length lower bound = 1
1192
1193/\R/I
1194Capture group count = 0
1195Starting code units: \x0a \x0b \x0c \x0d \x85
1196Subject length lower bound = 1
1197
1198/[[:blank:]]/B,ucp
1199------------------------------------------------------------------
1200        Bra
1201        [\x09 \xa0]
1202        Ket
1203        End
1204------------------------------------------------------------------
1205
1206/\x{212a}+/Ii,utf
1207Capture group count = 0
1208Options: caseless utf
1209Starting code units: K k \xe2
1210Subject length lower bound = 1
1211    KKkk\x{212a}
1212 0: KKkk\x{212a}
1213
1214/s+/Ii,utf
1215Capture group count = 0
1216Options: caseless utf
1217Starting code units: S s \xc5
1218Subject length lower bound = 1
1219    SSss\x{17f}
1220 0: SSss\x{17f}
1221
1222/\x{100}*A/IB,utf
1223------------------------------------------------------------------
1224        Bra
1225        \x{100}*+
1226        A
1227        Ket
1228        End
1229------------------------------------------------------------------
1230Capture group count = 0
1231Options: utf
1232Starting code units: A \xc4
1233Last code unit = 'A'
1234Subject length lower bound = 1
1235    A
1236 0: A
1237
1238/\x{100}*\d(?R)/IB,utf
1239------------------------------------------------------------------
1240        Bra
1241        \x{100}*+
1242        \d
1243        Recurse
1244        Ket
1245        End
1246------------------------------------------------------------------
1247Capture group count = 0
1248Options: utf
1249Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
1250Subject length lower bound = 1
1251
1252/[Z\x{100}]/IB,utf
1253------------------------------------------------------------------
1254        Bra
1255        [Z\x{100}]
1256        Ket
1257        End
1258------------------------------------------------------------------
1259Capture group count = 0
1260Options: utf
1261Starting code units: Z \xc4
1262Subject length lower bound = 1
1263    Z\x{100}
1264 0: Z
1265    \x{100}
1266 0: \x{100}
1267    \x{100}Z
1268 0: \x{100}
1269
1270/[z-\x{100}]/IB,utf
1271------------------------------------------------------------------
1272        Bra
1273        [z-\xff\x{100}]
1274        Ket
1275        End
1276------------------------------------------------------------------
1277Capture group count = 0
1278Options: utf
1279Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4
1280Subject length lower bound = 1
1281
1282/[z\Qa-d]Ā\E]/IB,utf
1283------------------------------------------------------------------
1284        Bra
1285        [\-\]adz\x{100}]
1286        Ket
1287        End
1288------------------------------------------------------------------
1289Capture group count = 0
1290Options: utf
1291Starting code units: - ] a d z \xc4
1292Subject length lower bound = 1
1293    \x{100}
1294 0: \x{100}
1295    Ā
1296 0: \x{100}
1297
1298/[ab\x{100}]abc(xyz(?1))/IB,utf
1299------------------------------------------------------------------
1300        Bra
1301        [ab\x{100}]
1302        abc
1303        CBra 1
1304        xyz
1305        Recurse
1306        Ket
1307        Ket
1308        End
1309------------------------------------------------------------------
1310Capture group count = 1
1311Options: utf
1312Starting code units: a b \xc4
1313Last code unit = 'z'
1314Subject length lower bound = 7
1315
1316/\x{100}*\s/IB,utf
1317------------------------------------------------------------------
1318        Bra
1319        \x{100}*+
1320        \s
1321        Ket
1322        End
1323------------------------------------------------------------------
1324Capture group count = 0
1325Options: utf
1326Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4
1327Subject length lower bound = 1
1328
1329/\x{100}*\d/IB,utf
1330------------------------------------------------------------------
1331        Bra
1332        \x{100}*+
1333        \d
1334        Ket
1335        End
1336------------------------------------------------------------------
1337Capture group count = 0
1338Options: utf
1339Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
1340Subject length lower bound = 1
1341
1342/\x{100}*\w/IB,utf
1343------------------------------------------------------------------
1344        Bra
1345        \x{100}*+
1346        \w
1347        Ket
1348        End
1349------------------------------------------------------------------
1350Capture group count = 0
1351Options: utf
1352Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1353  Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1354  \xc4
1355Subject length lower bound = 1
1356
1357/\x{100}*\D/IB,utf
1358------------------------------------------------------------------
1359        Bra
1360        \x{100}*
1361        \D
1362        Ket
1363        End
1364------------------------------------------------------------------
1365Capture group count = 0
1366Options: utf
1367Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1368  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1369  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1370  ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1371  d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2
1372  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1373  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1374  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1375  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1376  \xff
1377Subject length lower bound = 1
1378
1379/\x{100}*\S/IB,utf
1380------------------------------------------------------------------
1381        Bra
1382        \x{100}*
1383        \S
1384        Ket
1385        End
1386------------------------------------------------------------------
1387Capture group count = 0
1388Options: utf
1389Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1390  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1391  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1392  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1393  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
1394  \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
1395  \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
1396  \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
1397  \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1398Subject length lower bound = 1
1399
1400/\x{100}*\W/IB,utf
1401------------------------------------------------------------------
1402        Bra
1403        \x{100}*
1404        \W
1405        Ket
1406        End
1407------------------------------------------------------------------
1408Capture group count = 0
1409Options: utf
1410Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1411  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1412  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1413  ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9
1414  \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8
1415  \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7
1416  \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6
1417  \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1418Subject length lower bound = 1
1419
1420/[\x{105}-\x{109}]/IBi,utf
1421------------------------------------------------------------------
1422        Bra
1423        [\x{104}-\x{109}]
1424        Ket
1425        End
1426------------------------------------------------------------------
1427Capture group count = 0
1428Options: caseless utf
1429Starting code units: \xc4
1430Subject length lower bound = 1
1431    \x{104}
1432 0: \x{104}
1433    \x{105}
1434 0: \x{105}
1435    \x{109}
1436 0: \x{109}
1437\= Expect no match
1438    \x{100}
1439No match
1440    \x{10a}
1441No match
1442
1443/[z-\x{100}]/IBi,utf
1444------------------------------------------------------------------
1445        Bra
1446        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1447        Ket
1448        End
1449------------------------------------------------------------------
1450Capture group count = 0
1451Options: caseless utf
1452Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2
1453Subject length lower bound = 1
1454    Z
1455 0: Z
1456    z
1457 0: z
1458    \x{39c}
1459 0: \x{39c}
1460    \x{178}
1461 0: \x{178}
1462    |
1463 0: |
1464    \x{80}
1465 0: \x{80}
1466    \x{ff}
1467 0: \x{ff}
1468    \x{100}
1469 0: \x{100}
1470    \x{101}
1471 0: \x{101}
1472\= Expect no match
1473    \x{102}
1474No match
1475    Y
1476No match
1477    y
1478No match
1479
1480/[z-\x{100}]/IBi,utf
1481------------------------------------------------------------------
1482        Bra
1483        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1484        Ket
1485        End
1486------------------------------------------------------------------
1487Capture group count = 0
1488Options: caseless utf
1489Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2
1490Subject length lower bound = 1
1491
1492/\x{3a3}B/IBi,utf
1493------------------------------------------------------------------
1494        Bra
1495        clist 03a3 03c2 03c3
1496     /i B
1497        Ket
1498        End
1499------------------------------------------------------------------
1500Capture group count = 0
1501Options: caseless utf
1502Starting code units: \xce \xcf
1503Last code unit = 'B' (caseless)
1504Subject length lower bound = 2
1505
1506/abc/utf,replace=�
1507    abc
1508Failed: error -3: UTF-8 error: 1 byte missing at end
1509
1510/(?<=(a)(?-1))x/I,utf
1511Capture group count = 1
1512Max lookbehind = 2
1513Options: utf
1514First code unit = 'x'
1515Subject length lower bound = 1
1516    a\x80zx\=offset=3
1517Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
1518
1519/[\W\p{Any}]/B
1520------------------------------------------------------------------
1521        Bra
1522        [\x00-/:-@[-^`{-\xff\p{Any}]
1523        Ket
1524        End
1525------------------------------------------------------------------
1526    abc
1527 0: a
1528    123
1529 0: 1
1530
1531/[\W\pL]/B
1532------------------------------------------------------------------
1533        Bra
1534        [\x00-/:-@[-^`{-\xff\p{L}]
1535        Ket
1536        End
1537------------------------------------------------------------------
1538    abc
1539 0: a
1540\= Expect no match
1541    123
1542No match
1543
1544/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
1545Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
1546
1547/[\s[:^ascii:]]/B,ucp
1548------------------------------------------------------------------
1549        Bra
1550        [\x80-\xff\p{Xsp}]
1551        Ket
1552        End
1553------------------------------------------------------------------
1554
1555# A special extra option allows excaped surrogate code points in 8-bit mode,
1556# but subjects containing them must not be UTF-checked.
1557
1558/\x{d800}/I,utf,allow_surrogate_escapes
1559Capture group count = 0
1560Options: utf
1561Extra options: allow_surrogate_escapes
1562First code unit = \xed
1563Last code unit = \x80
1564Subject length lower bound = 1
1565    \x{d800}\=no_utf_check
1566 0: \x{d800}
1567
1568/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
1569    \x{dfff}\x{df01}\=no_utf_check
1570 0: \x{dfff}\x{df01}
1571
1572# This has different starting code units in 8-bit mode.
1573
1574/^[^ab]/IB,utf
1575------------------------------------------------------------------
1576        Bra
1577        ^
1578        [\x00-`c-\xff] (neg)
1579        Ket
1580        End
1581------------------------------------------------------------------
1582Capture group count = 0
1583Compile options: utf
1584Overall options: anchored utf
1585Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1586  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1587  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1588  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
1589  Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
1590  \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
1591  \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
1592  \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
1593  \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
1594  \xfe \xff
1595Subject length lower bound = 1
1596    c
1597 0: c
1598    \x{ff}
1599 0: \x{ff}
1600    \x{100}
1601 0: \x{100}
1602\= Expect no match
1603    aaa
1604No match
1605
1606# Offsets are different in 8-bit mode.
1607
1608/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
1609    123abcáyzabcdef789abcሴqr
1610 1(2) Old 6 6 "" New 6 8 "<>"
1611 2(2) Old 13 13 "" New 15 17 "<>"
1612 3(2) Old 13 16 "def" New 17 22 "<def>"
1613 4(2) Old 22 22 "" New 28 30 "<>"
1614 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
1615
1616# Check name length with non-ASCII characters
1617
1618/(?'ABáC678901234567890123456789012012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf
1619
1620/(?'ABáC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf
1621Failed: error 148 at offset 132: subpattern name is too long (maximum 128 code units)
1622
1623/(?'ABZC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf
1624
1625/(?(n/utf
1626Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
1627
1628/(?(á/utf
1629Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
1630
1631# Invalid UTF-8 tests
1632
1633/.../g,match_invalid_utf
1634    abcd\x80wxzy\x80pqrs
1635 0: abc
1636 0: wxz
1637 0: pqr
1638    abcd\x{80}wxzy\x80pqrs
1639 0: abc
1640 0: d\x{80}w
1641 0: xzy
1642 0: pqr
1643
1644/abc/match_invalid_utf
1645    ab\x80ab\=ph
1646Partial match: ab
1647\= Expect no match
1648    ab\x80cdef\=ph
1649No match
1650
1651/.a/match_invalid_utf
1652    ab\=ph
1653Partial match: b
1654    ab\=ps
1655Partial match: b
1656    b\xf0\x91\x88b\=ph
1657Partial match: b
1658    b\xf0\x91\x88b\=ps
1659Partial match: b
1660    b\xf0\x91\x88\xb4a
1661 0: \x{11234}a
1662\= Expect no match
1663    b\x80\=ph
1664No match
1665    b\x80\=ps
1666No match
1667    b\xf0\x91\x88\=ph
1668No match
1669    b\xf0\x91\x88\=ps
1670No match
1671
1672/.a$/match_invalid_utf
1673    ab\=ph
1674Partial match: b
1675    ab\=ps
1676Partial match: b
1677\= Expect no match
1678    b\xf0\x91\x98\=ph
1679No match
1680    b\xf0\x91\x98\=ps
1681No match
1682
1683/ab$/match_invalid_utf
1684    ab\x80cdeab
1685 0: ab
1686\= Expect no match
1687    ab\x80cde
1688No match
1689
1690/.../g,match_invalid_utf
1691    abcd\x{80}wxzy\x80pqrs
1692 0: abc
1693 0: d\x{80}w
1694 0: xzy
1695 0: pqr
1696
1697/(?<=x)../g,match_invalid_utf
1698    abcd\x{80}wxzy\x80pqrs
1699 0: zy
1700    abcd\x{80}wxzy\x80xpqrs
1701 0: zy
1702 0: pq
1703
1704/X$/match_invalid_utf
1705\= Expect no match
1706    X\xc4
1707No match
1708
1709/(?<=..)X/match_invalid_utf,aftertext
1710    AB\x80AQXYZ
1711 0: X
1712 0+ YZ
1713    AB\x80AQXYZ\=offset=5
1714 0: X
1715 0+ YZ
1716    AB\x80\x80AXYZXC\=offset=5
1717 0: X
1718 0+ C
1719\= Expect no match
1720    AB\x80XYZ
1721No match
1722    AB\x80XYZ\=offset=3
1723No match
1724    AB\xfeXYZ
1725No match
1726    AB\xffXYZ\=offset=3
1727No match
1728    AB\x80AXYZ
1729No match
1730    AB\x80AXYZ\=offset=4
1731No match
1732    AB\x80\x80AXYZ\=offset=5
1733No match
1734
1735/.../match_invalid_utf
1736    AB\xc4CCC
1737 0: CCC
1738\= Expect no match
1739    A\x{d800}B
1740No match
1741    A\x{110000}B
1742No match
1743    A\xc4B
1744No match
1745
1746/\bX/match_invalid_utf
1747    A\x80X
1748 0: X
1749
1750/\BX/match_invalid_utf
1751\= Expect no match
1752    A\x80X
1753No match
1754
1755/(?<=...)X/match_invalid_utf
1756    AAA\x80BBBXYZ
1757 0: X
1758\= Expect no match
1759    AAA\x80BXYZ
1760No match
1761    AAA\x80BBXYZ
1762No match
1763
1764# -------------------------------------
1765
1766/(*UTF)(?=\x{123})/I
1767Capture group count = 0
1768May match empty string
1769Compile options: <none>
1770Overall options: utf
1771First code unit = \xc4
1772Last code unit = \xa3
1773Subject length lower bound = 1
1774
1775/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
1776Capture group count = 0
1777Options: utf
1778Starting code units: \xc3
1779Last code unit = 'X'
1780Subject length lower bound = 3
1781
1782/[��,]/BI,utf
1783------------------------------------------------------------------
1784        Bra
1785        [,\x{fff9f}]
1786        Ket
1787        End
1788------------------------------------------------------------------
1789Capture group count = 0
1790Options: utf
1791Starting code units: , \xf3
1792Subject length lower bound = 1
1793
1794/[\x{fff4}-\x{ffff8}]/I,utf
1795Capture group count = 0
1796Options: utf
1797Starting code units: \xef \xf0 \xf1 \xf2 \xf3
1798Subject length lower bound = 1
1799
1800/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf
1801Capture group count = 0
1802Options: utf
1803Starting code units: \xef \xf0 \xf1 \xf2 \xf4
1804Subject length lower bound = 1
1805
1806/[\xff\x{ffff}]/I,utf
1807Capture group count = 0
1808Options: utf
1809Starting code units: \xc3 \xef
1810Subject length lower bound = 1
1811
1812/[\xff\x{ff}]/I,utf
1813Capture group count = 0
1814Options: utf
1815Starting code units: \xc3
1816Subject length lower bound = 1
1817    abc\x{ff}def
1818 0: \x{ff}
1819
1820/[\xff\x{ff}]/I
1821Capture group count = 0
1822First code unit = \xff
1823Subject length lower bound = 1
1824    abc\x{ff}def
1825 0: \xff
1826
1827/[Ss]/I
1828Capture group count = 0
1829First code unit = 'S' (caseless)
1830Subject length lower bound = 1
1831
1832/[Ss]/I,utf
1833Capture group count = 0
1834Options: utf
1835Starting code units: S s
1836Subject length lower bound = 1
1837
1838/(?:\x{ff}|\x{3000})/I,utf
1839Capture group count = 0
1840Options: utf
1841Starting code units: \xc3 \xe3
1842Subject length lower bound = 1
1843
1844/x/utf
1845    abxyz
1846 0: x
1847    \x80\=startchar
1848Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
1849    abc\x80\=startchar
1850Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
1851    abc\x80\=startchar,offset=3
1852Error -36 (bad UTF-8 offset)
1853
1854/\x{c1}+\x{e1}/iIB,ucp
1855------------------------------------------------------------------
1856        Bra
1857     /i \x{c1}+
1858     /i \x{e1}
1859        Ket
1860        End
1861------------------------------------------------------------------
1862Capture group count = 0
1863Options: caseless ucp
1864First code unit = \xc1 (caseless)
1865Last code unit = \xe1 (caseless)
1866Subject length lower bound = 2
1867    \x{c1}\x{c1}\x{c1}
1868 0: \xc1\xc1\xc1
1869    \x{e1}\x{e1}\x{e1}
1870 0: \xe1\xe1\xe1
1871
1872/a|\x{c1}/iI,ucp
1873Capture group count = 0
1874Options: caseless ucp
1875Starting code units: A a \xc1 \xe1
1876Subject length lower bound = 1
1877    \x{e1}xxx
1878 0: \xe1
1879
1880/a|\x{c1}/iI,utf
1881Capture group count = 0
1882Options: caseless utf
1883Starting code units: A a \xc3
1884Subject length lower bound = 1
1885    \x{e1}xxx
1886 0: \x{e1}
1887
1888/\x{c1}|\x{e1}/iI,ucp
1889Capture group count = 0
1890Options: caseless ucp
1891First code unit = \xc1 (caseless)
1892Subject length lower bound = 1
1893
1894/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
1895    X\x{e1}Y
1896 1: >\xc1<
1897
1898/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
1899    X\x{c1}Y
1900 1: >\xe1<
1901
1902# Without UTF or UCP characters > 127 have only one case in the default locale.
1903
1904/X(\x{e1})Y/replace=>\U$1<,substitute_extended
1905    X\x{e1}Y
1906 1: >\xe1<
1907
1908/A/utf,match_invalid_utf,caseless
1909    \xe5A
1910 0: A
1911
1912/\bch\b/utf,match_invalid_utf
1913    qchq\=ph
1914Partial match:
1915    qchq\=ps
1916Partial match:
1917
1918/line1\nbreak/firstline,utf,match_invalid_utf
1919    line1\nbreak
1920 0: line1\x{0a}break
1921    line0\nline1\nbreak
1922No match
1923
1924/A\z/utf,match_invalid_utf
1925    A\x80\x42\n
1926No match
1927
1928# End of testinput10
1929