xref: /aosp_15_r20/external/pcre/testdata/testinput5 (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1# This set of tests checks the API, internals, and non-Perl stuff for UTF
2# support, including Unicode properties. However, tests that give different
3# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
4# 12).
5
6#newline_default lf any anycrlf
7
8# PCRE2 and Perl disagree about the characteristics of certain Unicode
9# characters. For example, 061C was considered by Perl to be Arabic, though
10# it was not listed as such in the Unicode Scripts.txt file for Unicode 8.
11# However, it *is* in that file for Unicode 10, but when I came to re-check,
12# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
13
14# 2066-2069 are graphic and printable according to Perl, though they are
15# actually "isolate" control characters. That is why the following tests are
16# here rather than in test 4.
17
18/^[\p{Arabic}]/utf
19    \x{061c}
20
21/^[[:graph:]]+$/utf,ucp
22\= Expect no match
23    \x{61c}
24    \x{2066}
25    \x{2067}
26    \x{2068}
27    \x{2069}
28
29/^[[:print:]]+$/utf,ucp
30\= Expect no match
31    \x{61c}
32    \x{2066}
33    \x{2067}
34    \x{2068}
35    \x{2069}
36
37/^[[:^graph:]]+$/utf,ucp
38    \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
39    \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
40
41/^[[:^print:]]+$/utf,ucp
42    \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
43    \x{2068}\x{2069}
44
45# Perl does not consider U+180e to be a space character. It is true that it
46# does not appear in the Unicode PropList.txt file as such, but in many other
47# sources it is listed as a space, and has been treated as such in PCRE for
48# a long time.
49
50/^>[[:blank:]]*/utf,ucp
51    >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
52
53/^A\s+Z/utf,ucp
54    A\x{85}\x{180e}\x{2005}Z
55
56/^A[\s]+Z/utf,ucp
57    A\x{2005}Z
58    A\x{85}\x{2005}Z
59
60/^[[:graph:]]+$/utf,ucp
61\= Expect no match
62    \x{180e}
63
64/^[[:print:]]+$/utf,ucp
65    \x{180e}
66
67/^[[:^graph:]]+$/utf,ucp
68    \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
69
70/^[[:^print:]]+$/utf,ucp
71\= Expect no match
72    \x{180e}
73
74# End of U+180E tests.
75
76# ---------------------------------------------------------------------
77
78/\x{110000}/IB,utf
79
80/\o{4200000}/IB,utf
81
82/\x{ffffffff}/utf
83
84/\o{37777777777}/utf
85
86/\x{100000000}/utf
87
88/\o{77777777777}/utf
89
90/\x{d800}/utf
91
92/\o{154000}/utf
93
94/\x{dfff}/utf
95
96/\o{157777}/utf
97
98/\x{d7ff}/utf
99
100/\o{153777}/utf
101
102/\x{e000}/utf
103
104/\o{170000}/utf
105
106/^\x{100}a\x{1234}/utf
107    \x{100}a\x{1234}bcd
108
109/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
110    \x{0041}\x{2262}\x{0391}\x{002e}
111
112/.{3,5}X/IB,utf
113    \x{212ab}\x{212ab}\x{212ab}\x{861}X
114
115/.{3,5}?/IB,utf
116    \x{212ab}\x{212ab}\x{212ab}\x{861}
117
118/^[ab]/IB,utf
119    bar
120\= Expect no match
121    c
122    \x{ff}
123    \x{100}
124
125/\x{100}*(\d+|"(?1)")/utf
126    1234
127    "1234"
128    \x{100}1234
129    "\x{100}1234"
130    \x{100}\x{100}12ab
131    \x{100}\x{100}"12"
132\= Expect no match
133    \x{100}\x{100}abcd
134
135/\x{100}*/IB,utf
136
137/a\x{100}*/IB,utf
138
139/ab\x{100}*/IB,utf
140
141/[\x{200}-\x{100}]/utf
142
143/[Ā-Ą]/utf
144    \x{100}
145    \x{104}
146\= Expect no match
147    \x{105}
148    \x{ff}
149
150/[\xFF]/IB
151    >\xff<
152
153/[^\xFF]/IB
154
155/[Ä-Ü]/utf
156    Ö # Matches without Study
157    \x{d6}
158
159/[Ä-Ü]/utf
160    Ö <-- Same with Study
161    \x{d6}
162
163/[\x{c4}-\x{dc}]/utf
164    Ö # Matches without Study
165    \x{d6}
166
167/[\x{c4}-\x{dc}]/utf
168    Ö <-- Same with Study
169    \x{d6}
170
171/[^\x{100}]abc(xyz(?1))/IB,utf
172
173/(\x{100}(b(?2)c))?/IB,utf
174
175/(\x{100}(b(?2)c)){0,2}/IB,utf
176
177/(\x{100}(b(?1)c))?/IB,utf
178
179/(\x{100}(b(?1)c)){0,2}/IB,utf
180
181/\W/utf
182    A.B
183    A\x{100}B
184
185/\w/utf
186    \x{100}X
187
188# Use no_start_optimize because the first code unit is different in 8-bit from
189# the wider modes.
190
191/^\ሴ/IB,utf,no_start_optimize
192
193/()()()()()()()()()()
194 ()()()()()()()()()()
195 ()()()()()()()()()()
196 ()()()()()()()()()()
197 A (x) (?41) B/x,utf
198    AxxB
199
200/^[\x{100}\E-\Q\E\x{150}]/B,utf
201
202/^[\QĀ\E-\QŐ\E]/B,utf
203
204/^abc./gmx,newline=any,utf
205    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
206
207/abc.$/gmx,newline=any,utf
208    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
209
210/^a\Rb/bsr=unicode,utf
211    a\nb
212    a\rb
213    a\r\nb
214    a\x0bb
215    a\x0cb
216    a\x{85}b
217    a\x{2028}b
218    a\x{2029}b
219\= Expect no match
220    a\n\rb
221
222/^a\R*b/bsr=unicode,utf
223    ab
224    a\nb
225    a\rb
226    a\r\nb
227    a\x0bb
228    a\x0c\x{2028}\x{2029}b
229    a\x{85}b
230    a\n\rb
231    a\n\r\x{85}\x0cb
232
233/^a\R+b/bsr=unicode,utf
234    a\nb
235    a\rb
236    a\r\nb
237    a\x0bb
238    a\x0c\x{2028}\x{2029}b
239    a\x{85}b
240    a\n\rb
241    a\n\r\x{85}\x0cb
242\= Expect no match
243    ab
244
245/^a\R{1,3}b/bsr=unicode,utf
246    a\nb
247    a\n\rb
248    a\n\r\x{85}b
249    a\r\n\r\nb
250    a\r\n\r\n\r\nb
251    a\n\r\n\rb
252    a\n\n\r\nb
253\= Expect no match
254    a\n\n\n\rb
255    a\r
256
257/\H\h\V\v/utf
258    X X\x0a
259    X\x09X\x0b
260\= Expect no match
261    \x{a0} X\x0a
262
263/\H*\h+\V?\v{3,4}/utf
264    \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
265    \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
266    \x09\x20\x{a0}\x0a\x0b\x0c
267\= Expect no match
268    \x09\x20\x{a0}\x0a\x0b
269
270/\H\h\V\v/utf
271    \x{3001}\x{3000}\x{2030}\x{2028}
272    X\x{180e}X\x{85}
273\= Expect no match
274    \x{2009} X\x0a
275
276/\H*\h+\V?\v{3,4}/utf
277    \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
278    \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
279    \x09\x20\x{202f}\x0a\x0b\x0c
280\= Expect no match
281    \x09\x{200a}\x{a0}\x{2028}\x0b
282
283/[\h]/B,utf
284    >\x{1680}
285
286/[\h]{3,}/B,utf
287    >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
288
289/[\v]/B,utf
290
291/[\H]/B,utf
292
293/[\V]/B,utf
294
295/.*$/newline=any,utf
296    \x{1ec5}
297
298/a\Rb/I,bsr=anycrlf,utf
299    a\rb
300    a\nb
301    a\r\nb
302\= Expect no match
303    a\x{85}b
304    a\x0bb
305
306/a\Rb/I,bsr=unicode,utf
307    a\rb
308    a\nb
309    a\r\nb
310    a\x{85}b
311    a\x0bb
312
313/a\R?b/I,bsr=anycrlf,utf
314    a\rb
315    a\nb
316    a\r\nb
317\= Expect no match
318    a\x{85}b
319    a\x0bb
320
321/a\R?b/I,bsr=unicode,utf
322    a\rb
323    a\nb
324    a\r\nb
325    a\x{85}b
326    a\x0bb
327
328/.*a.*=.b.*/utf,newline=any
329    QQQ\x{2029}ABCaXYZ=!bPQR
330\= Expect no match
331    a\x{2029}b
332    \x61\xe2\x80\xa9\x62
333
334/[[:a\x{100}b:]]/utf
335
336/a[^]b/utf,allow_empty_class,match_unset_backref
337    a\x{1234}b
338    a\nb
339\= Expect no match
340    ab
341
342/a[^]+b/utf,allow_empty_class,match_unset_backref
343    aXb
344    a\nX\nX\x{1234}b
345\= Expect no match
346    ab
347
348/(\x{de})\1/
349    \x{de}\x{de}
350
351/X/newline=any,utf,firstline
352    A\x{1ec5}ABCXYZ
353
354/Xa{2,4}b/utf
355    X\=ps
356    Xa\=ps
357    Xaa\=ps
358    Xaaa\=ps
359    Xaaaa\=ps
360
361/Xa{2,4}?b/utf
362    X\=ps
363    Xa\=ps
364    Xaa\=ps
365    Xaaa\=ps
366    Xaaaa\=ps
367
368/Xa{2,4}+b/utf
369    X\=ps
370    Xa\=ps
371    Xaa\=ps
372    Xaaa\=ps
373    Xaaaa\=ps
374
375/X\x{123}{2,4}b/utf
376    X\=ps
377    X\x{123}\=ps
378    X\x{123}\x{123}\=ps
379    X\x{123}\x{123}\x{123}\=ps
380    X\x{123}\x{123}\x{123}\x{123}\=ps
381
382/X\x{123}{2,4}?b/utf
383    X\=ps
384    X\x{123}\=ps
385    X\x{123}\x{123}\=ps
386    X\x{123}\x{123}\x{123}\=ps
387    X\x{123}\x{123}\x{123}\x{123}\=ps
388
389/X\x{123}{2,4}+b/utf
390    X\=ps
391    X\x{123}\=ps
392    X\x{123}\x{123}\=ps
393    X\x{123}\x{123}\x{123}\=ps
394    X\x{123}\x{123}\x{123}\x{123}\=ps
395
396/X\x{123}{2,4}b/utf
397\= Expect no match
398    Xx\=ps
399    X\x{123}x\=ps
400    X\x{123}\x{123}x\=ps
401    X\x{123}\x{123}\x{123}x\=ps
402    X\x{123}\x{123}\x{123}\x{123}x\=ps
403
404/X\x{123}{2,4}?b/utf
405\= Expect no match
406    Xx\=ps
407    X\x{123}x\=ps
408    X\x{123}\x{123}x\=ps
409    X\x{123}\x{123}\x{123}x\=ps
410    X\x{123}\x{123}\x{123}\x{123}x\=ps
411
412/X\x{123}{2,4}+b/utf
413\= Expect no match
414    Xx\=ps
415    X\x{123}x\=ps
416    X\x{123}\x{123}x\=ps
417    X\x{123}\x{123}\x{123}x\=ps
418    X\x{123}\x{123}\x{123}\x{123}x\=ps
419
420/X\d{2,4}b/utf
421    X\=ps
422    X3\=ps
423    X33\=ps
424    X333\=ps
425    X3333\=ps
426
427/X\d{2,4}?b/utf
428    X\=ps
429    X3\=ps
430    X33\=ps
431    X333\=ps
432    X3333\=ps
433
434/X\d{2,4}+b/utf
435    X\=ps
436    X3\=ps
437    X33\=ps
438    X333\=ps
439    X3333\=ps
440
441/X\D{2,4}b/utf
442    X\=ps
443    Xa\=ps
444    Xaa\=ps
445    Xaaa\=ps
446    Xaaaa\=ps
447
448/X\D{2,4}?b/utf
449    X\=ps
450    Xa\=ps
451    Xaa\=ps
452    Xaaa\=ps
453    Xaaaa\=ps
454
455/X\D{2,4}+b/utf
456    X\=ps
457    Xa\=ps
458    Xaa\=ps
459    Xaaa\=ps
460    Xaaaa\=ps
461
462/X\D{2,4}b/utf
463    X\=ps
464    X\x{123}\=ps
465    X\x{123}\x{123}\=ps
466    X\x{123}\x{123}\x{123}\=ps
467    X\x{123}\x{123}\x{123}\x{123}\=ps
468
469/X\D{2,4}?b/utf
470    X\=ps
471    X\x{123}\=ps
472    X\x{123}\x{123}\=ps
473    X\x{123}\x{123}\x{123}\=ps
474    X\x{123}\x{123}\x{123}\x{123}\=ps
475
476/X\D{2,4}+b/utf
477    X\=ps
478    X\x{123}\=ps
479    X\x{123}\x{123}\=ps
480    X\x{123}\x{123}\x{123}\=ps
481    X\x{123}\x{123}\x{123}\x{123}\=ps
482
483/X[abc]{2,4}b/utf
484    X\=ps
485    Xa\=ps
486    Xaa\=ps
487    Xaaa\=ps
488    Xaaaa\=ps
489
490/X[abc]{2,4}?b/utf
491    X\=ps
492    Xa\=ps
493    Xaa\=ps
494    Xaaa\=ps
495    Xaaaa\=ps
496
497/X[abc]{2,4}+b/utf
498    X\=ps
499    Xa\=ps
500    Xaa\=ps
501    Xaaa\=ps
502    Xaaaa\=ps
503
504/X[abc\x{123}]{2,4}b/utf
505    X\=ps
506    X\x{123}\=ps
507    X\x{123}\x{123}\=ps
508    X\x{123}\x{123}\x{123}\=ps
509    X\x{123}\x{123}\x{123}\x{123}\=ps
510
511/X[abc\x{123}]{2,4}?b/utf
512    X\=ps
513    X\x{123}\=ps
514    X\x{123}\x{123}\=ps
515    X\x{123}\x{123}\x{123}\=ps
516    X\x{123}\x{123}\x{123}\x{123}\=ps
517
518/X[abc\x{123}]{2,4}+b/utf
519    X\=ps
520    X\x{123}\=ps
521    X\x{123}\x{123}\=ps
522    X\x{123}\x{123}\x{123}\=ps
523    X\x{123}\x{123}\x{123}\x{123}\=ps
524
525/X[^a]{2,4}b/utf
526    X\=ps
527    Xz\=ps
528    Xzz\=ps
529    Xzzz\=ps
530    Xzzzz\=ps
531
532/X[^a]{2,4}?b/utf
533    X\=ps
534    Xz\=ps
535    Xzz\=ps
536    Xzzz\=ps
537    Xzzzz\=ps
538
539/X[^a]{2,4}+b/utf
540    X\=ps
541    Xz\=ps
542    Xzz\=ps
543    Xzzz\=ps
544    Xzzzz\=ps
545
546/X[^a]{2,4}b/utf
547    X\=ps
548    X\x{123}\=ps
549    X\x{123}\x{123}\=ps
550    X\x{123}\x{123}\x{123}\=ps
551    X\x{123}\x{123}\x{123}\x{123}\=ps
552
553/X[^a]{2,4}?b/utf
554    X\=ps
555    X\x{123}\=ps
556    X\x{123}\x{123}\=ps
557    X\x{123}\x{123}\x{123}\=ps
558    X\x{123}\x{123}\x{123}\x{123}\=ps
559
560/X[^a]{2,4}+b/utf
561    X\=ps
562    X\x{123}\=ps
563    X\x{123}\x{123}\=ps
564    X\x{123}\x{123}\x{123}\=ps
565    X\x{123}\x{123}\x{123}\x{123}\=ps
566
567/(Y)X\1{2,4}b/utf
568    YX\=ps
569    YXY\=ps
570    YXYY\=ps
571    YXYYY\=ps
572    YXYYYY\=ps
573
574/(Y)X\1{2,4}?b/utf
575    YX\=ps
576    YXY\=ps
577    YXYY\=ps
578    YXYYY\=ps
579    YXYYYY\=ps
580
581/(Y)X\1{2,4}+b/utf
582    YX\=ps
583    YXY\=ps
584    YXYY\=ps
585    YXYYY\=ps
586    YXYYYY\=ps
587
588/(\x{123})X\1{2,4}b/utf
589    \x{123}X\=ps
590    \x{123}X\x{123}\=ps
591    \x{123}X\x{123}\x{123}\=ps
592    \x{123}X\x{123}\x{123}\x{123}\=ps
593    \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
594
595/(\x{123})X\1{2,4}?b/utf
596    \x{123}X\=ps
597    \x{123}X\x{123}\=ps
598    \x{123}X\x{123}\x{123}\=ps
599    \x{123}X\x{123}\x{123}\x{123}\=ps
600    \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
601
602/(\x{123})X\1{2,4}+b/utf
603    \x{123}X\=ps
604    \x{123}X\x{123}\=ps
605    \x{123}X\x{123}\x{123}\=ps
606    \x{123}X\x{123}\x{123}\x{123}\=ps
607    \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
608
609/\bthe cat\b/utf
610    the cat\=ps
611    the cat\=ph
612
613/abcd*/utf
614    xxxxabcd\=ps
615    xxxxabcd\=ph
616
617/abcd*/i,utf
618    xxxxabcd\=ps
619    xxxxabcd\=ph
620    XXXXABCD\=ps
621    XXXXABCD\=ph
622
623/abc\d*/utf
624    xxxxabc1\=ps
625    xxxxabc1\=ph
626
627/(a)bc\1*/utf
628    xxxxabca\=ps
629    xxxxabca\=ph
630
631/abc[de]*/utf
632    xxxxabcde\=ps
633    xxxxabcde\=ph
634
635/X\W{3}X/utf
636    X\=ps
637
638/\sxxx\s/utf,tables=2
639    AB\x{85}xxx\x{a0}XYZ
640    AB\x{a0}xxx\x{85}XYZ
641
642/\S \S/utf,tables=2
643    \x{a2} \x{84}
644
645'A#хц'Bx,newline=any,utf
646
647'A#хц
648  PQ'Bx,newline=any,utf
649
650/a+#хaa
651  z#XX?/Bx,newline=any,utf
652
653/a+#хaa
654  z#х?/Bx,newline=any,utf
655
656/\g{A}xxx#bXX(?'A'123)
657(?'A'456)/Bx,newline=any,utf
658
659/\g{A}xxx#bх(?'A'123)
660(?'A'456)/Bx,newline=any,utf
661
662/^\cģ/utf
663
664/(\R*)(.)/s,utf
665    \r\n
666    \r\r\n\n\r
667    \r\r\n\n\r\n
668
669/(\R)*(.)/s,utf
670    \r\n
671    \r\r\n\n\r
672    \r\r\n\n\r\n
673
674/[^\x{1234}]+/Ii,utf
675
676/[^\x{1234}]+?/Ii,utf
677
678/[^\x{1234}]++/Ii,utf
679
680/[^\x{1234}]{2}/Ii,utf
681
682/f.*/
683    for\=ph
684
685/f.*/s
686    for\=ph
687
688/f.*/utf
689    for\=ph
690
691/f.*/s,utf
692    for\=ph
693
694/\x{d7ff}\x{e000}/utf
695
696/\x{d800}/utf
697
698/\x{dfff}/utf
699
700/\h+/utf
701    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
702    \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
703
704/[\h\x{e000}]+/B,utf
705    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
706    \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
707
708/\H+/utf
709    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
710    \x{2000}\x{200a}\x{1fff}\x{200b}
711    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
712    \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
713
714/[\H\x{d7ff}]+/B,utf
715    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
716    \x{2000}\x{200a}\x{1fff}\x{200b}
717    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
718    \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
719
720/\v+/utf
721    \x{2027}\x{2030}\x{2028}\x{2029}
722    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
723
724/[\v\x{e000}]+/B,utf
725    \x{2027}\x{2030}\x{2028}\x{2029}
726    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
727
728/\V+/utf
729    \x{2028}\x{2029}\x{2027}\x{2030}
730    \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
731
732/[\V\x{d7ff}]+/B,utf
733    \x{2028}\x{2029}\x{2027}\x{2030}
734    \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
735
736/\R+/bsr=unicode,utf
737    \x{2027}\x{2030}\x{2028}\x{2029}
738    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
739
740/(..)\1/utf
741    ab\=ps
742    aba\=ps
743    abab\=ps
744
745/(..)\1/i,utf
746    ab\=ps
747    abA\=ps
748    aBAb\=ps
749
750/(..)\1{2,}/utf
751    ab\=ps
752    aba\=ps
753    abab\=ps
754    ababa\=ps
755    ababab\=ps
756    ababab\=ph
757    abababa\=ps
758    abababa\=ph
759
760/(..)\1{2,}/i,utf
761    ab\=ps
762    aBa\=ps
763    aBAb\=ps
764    AbaBA\=ps
765    abABAb\=ps
766    aBAbaB\=ph
767    abABabA\=ps
768    abaBABa\=ph
769
770/(..)\1{2,}?x/i,utf
771    ab\=ps
772    abA\=ps
773    aBAb\=ps
774    abaBA\=ps
775    abAbaB\=ps
776    abaBabA\=ps
777    abAbABaBx\=ps
778
779/./utf,newline=crlf
780    \r\=ps
781    \r\=ph
782
783/.{2,3}/utf,newline=crlf
784    \r\=ps
785    \r\=ph
786    \r\r\=ps
787    \r\r\=ph
788    \r\r\r\=ps
789    \r\r\r\=ph
790
791/.{2,3}?/utf,newline=crlf
792    \r\=ps
793    \r\=ph
794    \r\r\=ps
795    \r\r\=ph
796    \r\r\r\=ps
797    \r\r\r\=ph
798
799/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
800
801/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
802
803/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
804
805/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
806
807/(?<=\x{1234}\x{1234})\bxy/I,utf
808
809/(?<!^)ETA/utf
810\= Expect no match
811    ETA
812
813/\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
814
815/[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
816
817/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
818
819/^\u{0000000000010ffff}/utf,extra_alt_bsux
820    \x{10ffff}
821
822/\u{ 1bb1}/utf,extra_alt_bsux
823    u{ 1bb1}
824\= Expect no match
825    \x{1bb1}
826
827/\u/utf,alt_bsux
828    \\u
829
830/^a+[a\x{200}]/B,utf
831    aa
832
833/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
834
835/[\p{L}]/IB
836
837/[\p{^L}]/IB
838
839/[\P{L}]/IB
840
841/[\P{^L}]/IB
842
843/[abc\p{L}\x{0660}]/IB,utf
844
845/[\p{Nd}]/IB,utf
846    1234
847
848/[\p{Nd}+-]+/IB,utf
849    1234
850    12-34
851    12+\x{661}-34
852\= Expect no match
853    abcd
854
855/(?:[\PPa*]*){8,}/
856
857/[\P{Any}]/B
858
859/[\P{Any}\E]/B
860
861/(\P{Yi}+\277)/
862
863/(\P{Yi}+\277)?/
864
865/(?<=\P{Yi}{3}A)X/
866
867/\p{Yi}+(\P{Yi}+)(?1)/
868
869/(\P{Yi}{2}\277)?/
870
871/[\P{Yi}A]/
872
873/[\P{Yi}\P{Yi}\P{Yi}A]/
874
875/[^\P{Yi}A]/
876
877/[^\P{Yi}\P{Yi}\P{Yi}A]/
878
879/(\P{Yi}*\277)*/
880
881/(\P{Yi}*?\277)*/
882
883/(\p{Yi}*+\277)*/
884
885/(\P{Yi}?\277)*/
886
887/(\P{Yi}??\277)*/
888
889/(\p{Yi}?+\277)*/
890
891/(\P{Yi}{0,3}\277)*/
892
893/(\P{Yi}{0,3}?\277)*/
894
895/(\p{Yi}{0,3}+\277)*/
896
897/\p{Zl}{2,3}+/B,utf
898    


899    \x{2028}\x{2028}\x{2028}
900
901/\p{Zl}/B,utf
902
903/\p{Lu}{3}+/B,utf
904
905/\pL{2}+/B,utf
906
907/\p{Cc}{2}+/B,utf
908
909/^\p{Cf}/utf
910    \x{180e}
911    \x{061c}
912    \x{2066}
913    \x{2067}
914    \x{2068}
915    \x{2069}
916
917/^\p{Cs}/utf
918    \x{dfff}\=no_utf_check
919\= Expect no match
920    \x{09f}
921
922/^\p{Mn}/utf
923    \x{1a1b}
924
925/^\p{Pe}/utf
926    \x{2309}
927    \x{230b}
928
929/^\p{Ps}/utf
930    \x{2308}
931    \x{230a}
932
933/^\p{Sc}+/utf
934    $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
935    \x{9f2}
936\= Expect no match
937    X
938    \x{2c2}
939
940/^\p{Zs}/utf
941    \ \
942    \x{a0}
943    \x{1680}
944    \x{2000}
945    \x{2001}
946\= Expect no match
947    \x{2028}
948    \x{200d}
949
950# These are here because Perl has problems with the negative versions of the
951# properties and has changed how it behaves for caseless matching.
952
953/\p{^Lu}/i,utf
954    1234
955\= Expect no match
956    ABC
957
958/\P{Lu}/i,utf
959    1234
960\= Expect no match
961    ABC
962
963/\p{Ll}/i,utf
964    a
965    Az
966\= Expect no match
967    ABC
968
969/\p{Lu}/i,utf
970    A
971    a\x{10a0}B
972\= Expect no match
973    a
974    \x{1d00}
975
976/\p{Lu}/i,utf
977    A
978    aZ
979\= Expect no match
980    abc
981
982/[\x{c0}\x{391}]/i,utf
983    \x{c0}
984    \x{e0}
985
986# The next two are special cases where the lengths of the different cases of
987# the same character differ. The first went wrong with heap frame storage; the
988# second was broken in all cases.
989
990/^\x{023a}+?(\x{0130}+)/i,utf
991  \x{023a}\x{2c65}\x{0130}
992
993/^\x{023a}+([^X])/i,utf
994  \x{023a}\x{2c65}X
995
996/\x{c0}+\x{116}+/i,utf
997    \x{c0}\x{e0}\x{116}\x{117}
998
999/[\x{c0}\x{116}]+/i,utf
1000    \x{c0}\x{e0}\x{116}\x{117}
1001
1002/(\x{de})\1/i,utf
1003    \x{de}\x{de}
1004    \x{de}\x{fe}
1005    \x{fe}\x{fe}
1006    \x{fe}\x{de}
1007
1008/^\x{c0}$/i,utf
1009    \x{c0}
1010    \x{e0}
1011
1012/^\x{e0}$/i,utf
1013    \x{c0}
1014    \x{e0}
1015
1016# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
1017# will match it only with UCP support, because without that it has no notion
1018# of case for anything other than the ASCII letters.
1019
1020/((?i)[\x{c0}])/utf
1021    \x{c0}
1022    \x{e0}
1023
1024/(?i:[\x{c0}])/utf
1025    \x{c0}
1026    \x{e0}
1027
1028# These are PCRE's extra properties to help with Unicodizing \d etc.
1029
1030/^\p{Xan}/utf
1031    ABCD
1032    1234
1033    \x{6ca}
1034    \x{a6c}
1035    \x{10a7}
1036\= Expect no match
1037    _ABC
1038
1039/^\p{Xan}+/utf
1040    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1041\= Expect no match
1042    _ABC
1043
1044/^\p{Xan}+?/utf
1045    \x{6ca}\x{a6c}\x{10a7}_
1046
1047/^\p{Xan}*/utf
1048    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1049
1050/^\p{Xan}{2,9}/utf
1051    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1052
1053/^\p{Xan}{2,9}?/utf
1054    \x{6ca}\x{a6c}\x{10a7}_
1055
1056/^[\p{Xan}]/utf
1057    ABCD1234_
1058    1234abcd_
1059    \x{6ca}
1060    \x{a6c}
1061    \x{10a7}
1062\= Expect no match
1063    _ABC
1064
1065/^[\p{Xan}]+/utf
1066    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1067\= Expect no match
1068    _ABC
1069
1070/^>\p{Xsp}/utf
1071    >\x{1680}\x{2028}\x{0b}
1072    >\x{a0}
1073\= Expect no match
1074    \x{0b}
1075
1076/^>\p{Xsp}+/utf
1077    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1078
1079/^>\p{Xsp}+?/utf
1080    >\x{1680}\x{2028}\x{0b}
1081
1082/^>\p{Xsp}*/utf
1083    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1084
1085/^>\p{Xsp}{2,9}/utf
1086    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1087
1088/^>\p{Xsp}{2,9}?/utf
1089    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1090
1091/^>[\p{Xsp}]/utf
1092    >\x{2028}\x{0b}
1093
1094/^>[\p{Xsp}]+/utf
1095    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1096
1097/^>\p{Xps}/utf
1098    >\x{1680}\x{2028}\x{0b}
1099    >\x{a0}
1100\= Expect no match
1101    \x{0b}
1102
1103/^>\p{Xps}+/utf
1104    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1105
1106/^>\p{Xps}+?/utf
1107    >\x{1680}\x{2028}\x{0b}
1108
1109/^>\p{Xps}*/utf
1110    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1111
1112/^>\p{Xps}{2,9}/utf
1113    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1114
1115/^>\p{Xps}{2,9}?/utf
1116    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1117
1118/^>[\p{Xps}]/utf
1119    >\x{2028}\x{0b}
1120
1121/^>[\p{Xps}]+/utf
1122    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1123
1124/^\p{Xwd}/utf
1125    ABCD
1126    1234
1127    \x{6ca}
1128    \x{a6c}
1129    \x{10a7}
1130    _ABC
1131\= Expect no match
1132    []
1133
1134/^\p{Xwd}+/utf
1135    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1136
1137/^\p{Xwd}+?/utf
1138    \x{6ca}\x{a6c}\x{10a7}_
1139
1140/^\p{Xwd}*/utf
1141    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1142
1143/^\p{Xwd}{2,9}/utf
1144    A_B12\x{6ca}\x{a6c}\x{10a7}
1145
1146/^\p{Xwd}{2,9}?/utf
1147    \x{6ca}\x{a6c}\x{10a7}_
1148
1149/^[\p{Xwd}]/utf
1150    ABCD1234_
1151    1234abcd_
1152    \x{6ca}
1153    \x{a6c}
1154    \x{10a7}
1155    _ABC
1156\= Expect no match
1157    []
1158
1159/^[\p{Xwd}]+/utf
1160    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1161
1162# A check not in UTF-8 mode
1163
1164/^[\p{Xwd}]+/
1165    ABCD1234_
1166
1167# Some negative checks
1168
1169/^[\P{Xwd}]+/utf
1170    !.+\x{019}\x{482}AB
1171
1172/^[\p{^Xwd}]+/utf
1173    !.+\x{019}\x{589}AB
1174
1175/[\D]/B,utf,ucp
1176    1\x{3c8}2
1177
1178/[\d]/B,utf,ucp
1179    >\x{6f4}<
1180
1181/[\S]/B,utf,ucp
1182    \x{1680}\x{6f4}\x{1680}
1183
1184/[\s]/B,utf,ucp
1185    >\x{1680}<
1186
1187/[\W]/B,utf,ucp
1188    A\x{1735}B
1189
1190/[\w]/B,utf,ucp
1191    >\x{1723}<
1192
1193/\D/B,utf,ucp
1194    1\x{3c8}2
1195
1196/\d/B,utf,ucp
1197    >\x{6f4}<
1198
1199/\S/B,utf,ucp
1200    \x{1680}\x{6f4}\x{1680}
1201
1202/\s/B,utf,ucp
1203    >\x{1680}>
1204
1205/\W/B,utf,ucp
1206    A\x{1735}B
1207
1208/\w/B,utf,ucp
1209    >\x{1723}<
1210
1211/[[:alpha:]]/B,ucp
1212
1213/[[:lower:]]/B,ucp
1214
1215/[[:upper:]]/B,ucp
1216
1217/[[:alnum:]]/B,ucp
1218
1219/[[:ascii:]]/B,ucp
1220
1221/[[:cntrl:]]/B,ucp
1222
1223/[[:digit:]]/B,ucp
1224
1225/[[:digit:]]/B,ucp,ascii_digit
1226
1227/[[:graph:]]/B,ucp
1228
1229/[[:print:]]/B,ucp
1230
1231/[[:punct:]]/B,ucp
1232
1233/[[:space:]]/B,ucp
1234
1235/[[:word:]]/B,ucp
1236
1237/[[:xdigit:]]/B,ucp
1238
1239/[[:xdigit:]]/B,ucp,ascii_digit
1240
1241# Unicode properties for \b and \B
1242
1243/\b...\B/utf,ucp
1244    abc_
1245    \x{37e}abc\x{376}
1246    \x{37e}\x{376}\x{371}\x{393}\x{394}
1247    !\x{c0}++\x{c1}\x{c2}
1248    !\x{c0}+++++
1249
1250# Without PCRE_UCP, non-ASCII always fail, even if < 256
1251
1252/\b...\B/utf
1253    abc_
1254\= Expect no match
1255    \x{37e}abc\x{376}
1256    \x{37e}\x{376}\x{371}\x{393}\x{394}
1257    !\x{c0}++\x{c1}\x{c2}
1258    !\x{c0}+++++
1259
1260# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
1261
1262/\b...\B/ucp
1263    abc_
1264    !\x{c0}++\x{c1}\x{c2}
1265    !\x{c0}+++++
1266
1267# Some of these are silly, but they check various combinations
1268
1269/[[:^alpha:][:^cntrl:]]+/B,utf,ucp
1270    123
1271    abc
1272
1273/[[:^cntrl:][:^alpha:]]+/B,utf,ucp
1274    123
1275    abc
1276
1277/[[:alpha:]]+/B,utf,ucp
1278    abc
1279
1280/[[:^alpha:]\S]+/B,utf,ucp
1281    123
1282    abc
1283
1284/[^\d]+/B,utf,ucp
1285    abc123
1286    abc\x{123}
1287    \x{660}abc
1288
1289/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
1290
1291/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
1292
1293/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
1294
1295/\p{Han}+X\p{Greek}+\x{370}/B,utf
1296
1297/\p{Xan}+!\p{Xan}+A/B
1298
1299/\p{Xsp}+!\p{Xsp}\t/B
1300
1301/\p{Xps}+!\p{Xps}\t/B
1302
1303/\p{Xwd}+!\p{Xwd}_/B
1304
1305/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
1306
1307# These behaved oddly in Perl, so they are kept in this test
1308
1309/(\x{23a}\x{23a}\x{23a})?\1/i,utf
1310\= Expect no match
1311    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1312
1313/(ȺȺȺ)?\1/i,utf
1314\= Expect no match
1315    ȺȺȺⱥⱥ
1316
1317/(\x{23a}\x{23a}\x{23a})?\1/i,utf
1318    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1319
1320/(ȺȺȺ)?\1/i,utf
1321    ȺȺȺⱥⱥⱥ
1322
1323/(\x{23a}\x{23a}\x{23a})\1/i,utf
1324\= Expect no match
1325    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1326
1327/(ȺȺȺ)\1/i,utf
1328\= Expect no match
1329    ȺȺȺⱥⱥ
1330
1331/(\x{23a}\x{23a}\x{23a})\1/i,utf
1332    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1333
1334/(ȺȺȺ)\1/i,utf
1335    ȺȺȺⱥⱥⱥ
1336
1337/(\x{2c65}\x{2c65})\1/i,utf
1338    \x{2c65}\x{2c65}\x{23a}\x{23a}
1339
1340/(ⱥⱥ)\1/i,utf
1341    ⱥⱥȺȺ
1342
1343/(\x{23a}\x{23a}\x{23a})\1Y/i,utf
1344    X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
1345
1346/(\x{2c65}\x{2c65})\1Y/i,utf
1347    X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
1348
1349# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
1350
1351/^[\p{Batak}]/utf
1352    \x{1bc0}
1353    \x{1bff}
1354\= Expect no match
1355    \x{1bf4}
1356
1357/^[\p{Brahmi}]/utf
1358    \x{11000}
1359    \x{1106f}
1360\= Expect no match
1361    \x{1104e}
1362
1363/^[\p{Mandaic}]/utf
1364    \x{840}
1365    \x{85e}
1366\= Expect no match
1367    \x{85c}
1368    \x{85d}
1369
1370/(\X*)(.)/s,utf
1371    A\x{300}
1372
1373/^S(\X*)e(\X*)$/utf
1374    Stéréo
1375
1376/^\X/utf
1377    ́réo
1378
1379/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
1380    aX41z
1381\= Expect no match
1382    aAz
1383
1384/\X/
1385    a\=ps
1386    a\=ph
1387
1388/\Xa/
1389    aa\=ps
1390    aa\=ph
1391
1392/\X{2}/
1393    aa\=ps
1394    aa\=ph
1395
1396/\X+a/
1397    a\=ps
1398    aa\=ps
1399    aa\=ph
1400
1401/\X+?a/
1402    a\=ps
1403    ab\=ps
1404    aa\=ps
1405    aa\=ph
1406    aba\=ps
1407
1408# These Unicode 6.1.0 scripts are not known to Perl.
1409
1410/\p{Chakma}\d/utf,ucp
1411    \x{11100}\x{1113c}
1412
1413/\p{Takri}\d/utf,ucp
1414    \x{11680}\x{116c0}
1415
1416/^\X/utf
1417    A\=ps
1418    A\=ph
1419    A\x{300}\x{301}\=ps
1420    A\x{300}\x{301}\=ph
1421    A\x{301}\=ps
1422    A\x{301}\=ph
1423
1424/^\X{2,3}/utf
1425    A\=ps
1426    A\=ph
1427    AA\=ps
1428    AA\=ph
1429    A\x{300}\x{301}\=ps
1430    A\x{300}\x{301}\=ph
1431    A\x{300}\x{301}A\x{300}\x{301}\=ps
1432    A\x{300}\x{301}A\x{300}\x{301}\=ph
1433
1434/^\X{2}/utf
1435    AA\=ps
1436    AA\=ph
1437    A\x{300}\x{301}A\x{300}\x{301}\=ps
1438    A\x{300}\x{301}A\x{300}\x{301}\=ph
1439
1440/^\X+/utf
1441    AA\=ps
1442    AA\=ph
1443
1444/^\X+?Z/utf
1445    AA\=ps
1446    AA\=ph
1447
1448/A\x{3a3}B/IBi,utf
1449
1450/[\x{3a3}]/Bi,utf
1451
1452/[^\x{3a3}]/Bi,utf
1453
1454/[\x{3a3}]+/Bi,utf
1455
1456/[^\x{3a3}]+/Bi,utf
1457
1458/a*\x{3a3}/Bi,utf
1459
1460/\x{3a3}+a/Bi,utf
1461
1462/\x{3a3}*\x{3c2}/Bi,utf
1463
1464/\x{3a3}{3}/i,utf,aftertext
1465    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1466
1467/\x{3a3}{2,4}/i,utf,aftertext
1468    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1469
1470/\x{3a3}{2,4}?/i,utf,aftertext
1471    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1472
1473/\x{3a3}+./i,utf,aftertext
1474    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1475
1476/\x{3a3}++./i,utf,aftertext
1477\= Expect no match
1478    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1479
1480/\x{3a3}*\x{3c2}/Bi,utf
1481
1482/[^\x{3a3}]*\x{3c2}/Bi,utf
1483
1484/[^a]*\x{3c2}/Bi,utf
1485
1486/ist/Bi,utf
1487\= Expect no match
1488    ikt
1489
1490/is+t/i,utf
1491    iSs\x{17f}t
1492\= Expect no match
1493    ikt
1494
1495/is+?t/i,utf
1496\= Expect no match
1497    ikt
1498
1499/is?t/i,utf
1500\= Expect no match
1501    ikt
1502
1503/is{2}t/i,utf
1504\= Expect no match
1505    iskt
1506
1507# This property is a PCRE special
1508
1509/^\p{Xuc}/utf
1510    $abc
1511    @abc
1512    `abc
1513    \x{1234}abc
1514\= Expect no match
1515    abc
1516
1517/^\p{Xuc}+/utf
1518    $@`\x{a0}\x{1234}\x{e000}**
1519\= Expect no match
1520    \x{9f}
1521
1522/^\p{Xuc}+?/utf
1523    $@`\x{a0}\x{1234}\x{e000}**
1524\= Expect no match
1525    \x{9f}
1526
1527/^\p{Xuc}+?\*/utf
1528    $@`\x{a0}\x{1234}\x{e000}**
1529\= Expect no match
1530    \x{9f}
1531
1532/^\p{Xuc}++/utf
1533    $@`\x{a0}\x{1234}\x{e000}**
1534\= Expect no match
1535    \x{9f}
1536
1537/^\p{Xuc}{3,5}/utf
1538    $@`\x{a0}\x{1234}\x{e000}**
1539\= Expect no match
1540    \x{9f}
1541
1542/^\p{Xuc}{3,5}?/utf
1543    $@`\x{a0}\x{1234}\x{e000}**
1544\= Expect no match
1545    \x{9f}
1546
1547/^[\p{Xuc}]/utf
1548    $@`\x{a0}\x{1234}\x{e000}**
1549\= Expect no match
1550    \x{9f}
1551
1552/^[\p{Xuc}]+/utf
1553    $@`\x{a0}\x{1234}\x{e000}**
1554\= Expect no match
1555    \x{9f}
1556
1557/^\P{Xuc}/utf
1558    abc
1559\= Expect no match
1560    $abc
1561    @abc
1562    `abc
1563    \x{1234}abc
1564
1565/^[\P{Xuc}]/utf
1566    abc
1567\= Expect no match
1568    $abc
1569    @abc
1570    `abc
1571    \x{1234}abc
1572
1573# Some auto-possessification tests
1574
1575/\pN+\z/B
1576
1577/\PN+\z/B
1578
1579/\pN+/B
1580
1581/\PN+/B
1582
1583/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
1584
1585/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
1586
1587/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
1588
1589/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
1590
1591/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
1592
1593/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
1594
1595/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
1596
1597/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
1598
1599/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
1600
1601/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
1602
1603/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
1604
1605/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
1606
1607/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
1608
1609/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
1610
1611/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
1612
1613/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
1614
1615/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
1616
1617/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
1618
1619/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
1620
1621/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
1622
1623# End auto-possessification tests
1624
1625/\w+/B,utf,ucp,auto_callout
1626    abcd
1627
1628/[\p{N}]?+/B,no_auto_possess
1629
1630/[\p{L}ab]{2,3}+/B,no_auto_possess
1631
1632/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
1633
1634/.+\X/Bsx
1635
1636/\X+$/Bmx
1637
1638/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
1639
1640/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
1641
1642/[RST]+/Bi,utf,ucp
1643
1644/[R-T]+/Bi,utf,ucp
1645
1646/[Q-U]+/Bi,utf,ucp
1647
1648/^s?c/Iim,utf
1649    scat
1650
1651/\X?abc/utf,no_start_optimize
1652    \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
1653
1654/\x{100}\x{200}\K\x{300}/utf,startchar
1655    \x{100}\x{200}\x{300}
1656
1657# Test UTF characters in a substitution
1658
1659bc/utf,replace=XሴZ
1660    123ábc123
1661
1662/(?<=abc)(|def)/g,utf,replace=<$0>
1663    123abcáyzabcdef789abcሴqr
1664
1665/[A-`]/iB,utf
1666    abcdefghijklmno
1667
1668/(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk
1669    \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
1670
1671/(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk
1672    \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
1673
1674"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
1675
1676/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
1677
1678"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
1679
1680/[\pS#moq]/
1681    =
1682
1683/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
1684    cxxxz
1685
1686/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
1687    abcd
1688
1689/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
1690    a\x{e0}\x{101}\x{c0}\x{102}
1691
1692/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
1693    ab12cde
1694
1695/(*UCP)(*UTF)[[:>:]]X/B
1696
1697/abc/utf,replace=xyz
1698    abc\=zero_terminate
1699
1700/a[[:punct:]b]/ucp,bincode
1701
1702/a[[:punct:]b]/utf,ucp,bincode
1703
1704/a[b[:punct:]]/utf,ucp,bincode
1705
1706/[[:^ascii:]]/utf,ucp,bincode
1707
1708/[[:^ascii:]\w]/utf,ucp,bincode
1709
1710/[\w[:^ascii:]]/utf,ucp,bincode
1711
1712/[^[:ascii:]\W]/utf,ucp,bincode
1713    \x{de}
1714    \x{200}
1715\= Expect no match
1716    \x{589}
1717    \x{37e}
1718
1719/[[:^ascii:]a]/utf,ucp,bincode
1720
1721/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
1722
1723/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
1724
1725/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
1726
1727/[\D]/utf
1728    \x{1d7cf}
1729
1730/[\D\P{Nd}]/utf
1731    \x{1d7cf}
1732
1733/[^\D]/utf
1734    a9b
1735\= Expect no match
1736    \x{1d7cf}
1737
1738/[^\D\P{Nd}]/utf
1739    a9b
1740    \x{1d7cf}
1741\= Expect no match
1742    \x{10000}
1743
1744# Hex uses pattern length, not zero-terminated. This tests for overrunning
1745# the given length of a pattern.
1746
1747/'(*UTF)'/hex
1748
1749/'#('/hex,extended,utf
1750
1751/a(?<=A\XB)/utf
1752
1753/../utf,auto_callout
1754    \n\x{123}\x{123}\x{123}\x{123}
1755
1756# This tests processing wide characters in extended mode.
1757
1758/XȀ/x,utf
1759
1760# These three test a bug fix that was not clearing up after a locale setting
1761# when the test or a subsequent one matched a wide character.
1762
1763//locale=C
1764
1765/[\P{Yi}]/utf
1766\x{2f000}
1767
1768/[\P{Yi}]/utf,locale=C
1769\x{2f000}
1770
1771/^(?<!(?=��))/B,utf
1772
1773# Horizontal and vertical space lists ignore caseless
1774
1775/[\HH]/Bi,utf
1776
1777/[^\HH]/Bi,utf
1778
1779//g,utf
1780    \=zero_terminate
1781
1782/^(?1)\p{Nd}{3}(a)/
1783    a123a
1784
1785/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
1786
1787# ---------------------------------------------------------------------------
1788
1789# A bunch of tests that hit lines of code that others do not (at least when
1790# these were created).
1791
1792/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess
1793\= Expect no match
1794    bbb
1795    cc
1796
1797/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess
1798\= Expect no match
1799    aaa\x{100}
1800
1801/^X\X/no_start_optimize,no_auto_possess
1802\= Expect no match
1803    X
1804
1805/^X\p{L&}+?/no_start_optimize,no_auto_possess
1806\= Expect no match
1807    X
1808
1809/^X\p{L}+?/no_start_optimize,no_auto_possess
1810\= Expect no match
1811    X
1812
1813/^X\p{Lu}+?/no_start_optimize,no_auto_possess
1814\= Expect no match
1815    X
1816
1817/^X\p{Arabic}+?/no_start_optimize,no_auto_possess
1818\= Expect no match
1819    X
1820
1821/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess
1822\= Expect no match
1823    X
1824
1825/^X\s+?/ucp,no_start_optimize,no_auto_possess
1826\= Expect no match
1827    X
1828    XX
1829
1830/^X\S+?/ucp,no_start_optimize,no_auto_possess
1831    XX
1832\= Expect no match
1833    X
1834
1835/^X\w+?/ucp,no_start_optimize,no_auto_possess
1836\= Expect no match
1837    X
1838
1839/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
1840\= Expect no match
1841    X
1842
1843/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
1844\= Expect no match
1845    X
1846
1847/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess
1848\= Expect no match
1849    X
1850
1851/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
1852\= Expect no match
1853    X
1854
1855/^X\R+?/utf,no_start_optimize,no_auto_possess
1856\= Expect no match
1857    X
1858
1859/^X\H+?/utf,no_start_optimize,no_auto_possess
1860\= Expect no match
1861    X
1862
1863/^X\V+?/utf,no_start_optimize,no_auto_possess
1864\= Expect no match
1865    X
1866
1867/^X\s+?/utf,no_start_optimize,no_auto_possess
1868\= Expect no match
1869    X
1870    XX
1871
1872/^X\S+?/utf,no_start_optimize,no_auto_possess
1873\= Expect no match
1874    X
1875
1876/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess
1877    XYYYZ
1878\= Expect no match
1879    XY
1880    XYY
1881    XYYY
1882    XYYYYZ
1883
1884/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess
1885\= Expect no match
1886    XY
1887    XY!
1888
1889/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess
1890\= Expect no match
1891    XY
1892    XY!
1893
1894/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess
1895\= Expect no match
1896    XY
1897    XY!
1898
1899/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess
1900\= Expect no match
1901    XY
1902    XY!
1903    XY\x{2f00}!
1904
1905/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess
1906\= Expect no match
1907    XY
1908    XY!
1909
1910/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
1911\= Expect no match
1912    X\n
1913    X\n!
1914    X\n\n!
1915
1916/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
1917\= Expect no match
1918    XYY\n
1919
1920/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess
1921\= Expect no match
1922    XY
1923    XY!
1924    XYY!
1925
1926/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess
1927\= Expect no match
1928    X
1929    X\x{b5}
1930    X\x{b5}\x{b5}Y
1931
1932/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess
1933\= Expect no match
1934    X
1935    X$
1936    X@@Y
1937
1938/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess
1939\= Expect partial match
1940    XYY\r\=ph
1941\= Expect no match
1942    X
1943
1944/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
1945\= Expect no match
1946    X
1947    XYY
1948
1949/^X\R+?Z/utf,no_start_optimize,no_auto_possess
1950\= Expect no match
1951    X\nX
1952    X\n\rX
1953    X\n\r\nX
1954    X\n\n
1955    X\n\x{0c}
1956
1957/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess
1958\= Expect no match
1959    X\nX
1960    X\n\rX
1961    X\n\r\nX
1962    X\n\n
1963    X\n\x{0c}
1964
1965/^X\H+?Z/utf,no_start_optimize,no_auto_possess
1966\= Expect no match
1967    XY\t
1968    XYY
1969
1970/^X\h+?Z/utf,no_start_optimize,no_auto_possess
1971\= Expect no match
1972    X\t\t
1973    X\tY
1974
1975/^X\V+?Z/utf,no_start_optimize,no_auto_possess
1976\= Expect no match
1977    XY\n
1978    XYY
1979
1980/^X\v+?Z/utf,no_start_optimize,no_auto_possess
1981\= Expect no match
1982    X\n\n
1983    X\nY
1984
1985/^X\D+?Z/utf,no_start_optimize,no_auto_possess
1986\= Expect no match
1987    XY9
1988    XYY
1989
1990/^X\d+?Z/utf,no_start_optimize,no_auto_possess
1991\= Expect no match
1992    X99
1993    X9Y
1994
1995/^X\S+?Z/utf,no_start_optimize,no_auto_possess
1996\= Expect no match
1997    XY\n
1998    XYY
1999
2000/^X\s+?Z/utf,no_start_optimize,no_auto_possess
2001\= Expect no match
2002    X\n\n
2003    X\nY
2004
2005/^X\W+?Z/utf,no_start_optimize,no_auto_possess
2006\= Expect no match
2007    X.A
2008    X++
2009
2010/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess
2011\= Expect no match
2012    XY
2013    XY!
2014
2015/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess
2016\= Expect no match
2017    XY
2018
2019/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess
2020\= Expect no match
2021    XY
2022
2023/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess
2024\= Expect no match
2025    XYY
2026
2027/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess
2028\= Expect no match
2029    X$
2030
2031# ----------------------------------------------------------------------
2032# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
2033
2034/\x{d800}/B,utf,bad_escape_is_literal
2035
2036/\ud800/B,utf,alt_bsux,bad_escape_is_literal
2037
2038# ----------------------------------------------------------------------
2039
2040/Aሴ+B/literal,utf,no_utf_check
2041    Aሴ+B
2042
2043# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it
2044# doesn't recognize all these scripts. In time these three tests can be moved
2045# to test 4.
2046
2047/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
2048  (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
2049  (\p{Zanabazar_Square}+)/x,utf
2050    \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
2051
2052/^\x{1E900}\x{104B0}/i,utf
2053    \x{1E900}\x{104B0}
2054    \x{1E922}\x{104D8}
2055
2056/^(?:(\X)(?C))+$/utf
2057    \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
2058
2059# Similarly for Unicode 11.0.0
2060
2061/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
2062  (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
2063    \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
2064
2065# Regional indicators
2066
2067/^(\X)(\X)/utf,aftertext
2068    \x{1F1E6}\x{1F1E7}\x{1F1E7}B
2069    \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
2070
2071# More differences from Perl
2072
2073/^\p{Common}/utf
2074    \x{60c}
2075    \x{61f}
2076    \x{964}
2077    \x{965}
2078
2079/^\p{Inherited}/utf
2080    \x{64b}
2081    \x{654}
2082    \x{655}
2083    \x{1D1AA}
2084
2085/\N{U+}/
2086
2087/\N{U+}/utf
2088
2089/\N{U}/
2090
2091# This tests the non-UTF Unicode NEL pattern whitespace character, only
2092# recognized by PCRE2 with /x when there is Unicode support.
2093
2094/A
2095
2096�B/x
2097    AB
2098
2099# This tests Unicode Pattern White Space characters in verb names when they
2100# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
2101# with code points greater than 255 between A, B, and C in the pattern.
2102
2103/(*: A‎B
C)abc/x,utf,mark,alt_verbnames
2104    abc
2105
2106# Script run tests: auto-possessification
2107
2108/^(*sr:.*)/B,utf
2109    paypаl.com   A classic example of why script run checks are a good thing
2110
2111/^(*sr:.*(*ACCEPT))/utf
2112    paypаl.com   But *ACCEPT breaks things
2113
2114/^(*sr:\x{2e80}*)/B,utf
2115
2116/^(*sr:\x{2e80}*)\x{2e80}/B,utf
2117
2118/(?<!)(*sr:)/B
2119
2120/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
2121   abcXBXYCCC!
2122
2123# Some script run patterns are broken in Perl 5.28.0. These can be moved into
2124# test 4 when a mended version of Perl is released.
2125
2126/^(*sr:.{4})/utf
2127    \x{0980}12\x{0993}     Bengali Common-digits Bengali
2128    \x{0780}12\x{07b1}     Thaana Common-digits Thaana
2129    \x{0e01}12\x{0e5b}     Thai Common-digits Thai
2130    \x{1780}12\x{19ff}     Khmer Common-digits Khmer
2131    \x{0904}12\x{0939}     Devanagari Common-digits Devanagari
2132    A\x{ff10}\x{ff19}B     Latin Common-notascii-digits Latin
2133    A\x{1d7ce}\x{1d7cf}B   Latin fancy-common-digits Latin
2134
2135# These ones involve non-ASCII but nevertheless Common digits. As of October
2136# 2018 even blead Perl wasn't handling all of these - but is going to.
2137
2138/^(*sr:.{4})/utf
2139    A\x{ff10}\x{ff19}B     Latin Common-notascii-digits Latin
2140    \x{ff10}\x{ff19}..     Common-notascii-digits Common Common
2141    A\x{ff10}BC            Latin Common-notascii-digit Latin Latin
2142    A\x{1d7ce}\x{1d7cf}B   Latin fancy-common-digits Latin
2143    \x{1d7ce}\x{1d7cf},,   fancy-common-digits Common Common
2144    A\x{1d7ce}BC           Latin fancy-common-digit Latin Latin
2145
2146# Some Unicode 12.1.0 new script characters
2147
2148/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
2149    \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
2150
2151# Some Unicode 13.0.0 new script characters
2152
2153/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
2154    \x{10FB0}\x{11900}\x{18B00}\x{10E80}
2155
2156# -------
2157
2158# Test reference and errors in non-ASCII characters in group names
2159
2160/(?'��ABC'...)/I,utf
2161   abcde\=copy=��ABC
2162
2163# Bad ones
2164
2165/(?'AB၌C'...)\g{AB၌C}/utf
2166
2167/(?'٠ABC'...)/utf
2168
2169/(?'²ABC'...)/utf
2170
2171/(?'X²ABC'...)/utf
2172
2173# -------
2174
2175/\p{Any}*xyz/I
2176
2177/(|�)7/caseless,ucp
2178
2179/(\xc1)\1/i,ucp
2180    \xc1\xe1\=no_jit
2181
2182/\p{L&}+\p{bidi_control}/B
2183
2184/\p{bidi_control}+\p{L&}/B
2185
2186/\p{han}/B
2187
2188/\p{script:han}/B
2189
2190/\p{sc:han}/B
2191
2192/\p{script extensions:han}/B
2193
2194/\p{scx:han}/B
2195
2196# Test error - invalid script name
2197
2198/\p{sc:L}/
2199
2200# Some Boolean property tests that differ from Perl
2201
2202/\p{emojimodifierbase}\p{ebase}/g,utf
2203    >AN<>\x{261d}\x{1faf6}<>yz<
2204
2205/\p{graphemelink}\p{grlink}/g,utf
2206    >AN<>\x{11d97}\x{94d}<>yz<
2207
2208/\p{soft dotted}\p{sd}/g,utf
2209    >AF23<>\x{1df1a}\x{69}<>yz<
2210
2211# ------------------------------------------------
2212
2213/\p{\2b[:x�igi:t:_/
2214
2215# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
2216# the restriction.
2217
2218/AskZ/i,utf,caseless_restrict
2219    AskZ
2220    aSKz
2221\= Expect no match
2222    A\x{17f}kZ
2223    As\x{212a}Z
2224
2225/AskZ/i,utf
2226    AskZ
2227    aSKz
2228    A\x{17f}kZ
2229    As\x{212a}Z
2230
2231/A\x{17f}\x{212a}Z/ir,utf
2232    \= Expect no match
2233    AskZ
2234
2235/A\x{17f}\x{212a}Z/i,utf
2236    AskZ
2237
2238/[AskZ]+/i,utf,caseless_restrict
2239    AskZ
2240    aSKz
2241    A\x{17f}kZ
2242    As\x{212a}Z
2243
2244/[AskZ]+/i,utf
2245    AskZ
2246    aSKz
2247    A\x{17f}kZ
2248    As\x{212a}Z
2249
2250/[\x{17f}\x{212a}]+/ir,utf
2251\= Expect no match
2252    AskZ
2253
2254/[\x{17f}\x{212a}]+/i,utf
2255    AskZ
2256
2257/[^s]+/ir,utf
2258    A\x{17f}Z
2259
2260/[^s]+/i,utf
2261    A\x{17f}Z
2262
2263/[^k]+/ir,utf
2264    A\x{212a}Z
2265
2266/[^k]+/i,utf
2267    A\x{212a}Z
2268
2269/[^sk]+/ir,utf
2270    A\x{17f}\x{212a}Z
2271
2272/[^sk]+/i,utf
2273    A\x{17f}\x{212a}Z
2274
2275/[^\x{17f}]+/ir,utf
2276    AsSZ
2277
2278/[^\x{17f}]+/i,utf
2279    AsSZ
2280
2281/[Ss]+/irB,utf
2282    Sss\x{17f}ss
2283
2284/[Ss]+/iB,utf
2285    Sss\x{17f}ss
2286
2287/[S\x{17f}]/irB,utf
2288
2289/[S\x{17f}]/iB,utf
2290
2291/[\x{17f}s]/irB,utf
2292
2293/[\x{17f}s]/iB,utf
2294
2295/[\x{4b}\x{6b}]/irB,utf
2296
2297/[\x{4b}\x{6b}]/iB,utf
2298
2299/s(?r)s(?-r)s(?r:s)s/i,utf
2300    \x{17f}S\x{17f}S\x{17f}
2301\= Expect no match
2302    \x{17f}\x{17f}\x{17f}S\x{17f}
2303    \x{17f}S\x{17f}\x{17f}\x{17f}
2304
2305/k(?^i)k/ir,utf
2306    K\x{212a}
2307\= Expect no match
2308    \x{212a}\x{212a}
2309
2310# End caseless restrict tests
2311
2312# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
2313
2314# DIGITS
2315
2316/\d+/i,utf
2317    123\x{660}456
2318
2319/\d+/i,utf,ucp
2320    123\x{660}456
2321
2322/\d+/i,utf,ucp,ascii_bsd
2323    123\x{660}456
2324
2325/[\d]+/i,utf
2326    123\x{660}456
2327
2328/[\d]+/i,utf,ucp
2329    123\x{660}456
2330
2331/[\d]+/i,utf,ucp,ascii_bsd
2332    123\x{660}456
2333
2334/\d(?aD)\d(?-aD)\d/utf,ucp
2335    \x{660}9\x{660}
2336\= Expect no match
2337    \x{660}\x{660}\x{660}
2338
2339/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
2340    999
2341    9\x{660}9
2342
2343/\d(?a)\d(?-a)\d/utf,ucp
2344    \x{660}9\x{660}
2345\= Expect no match
2346    \x{660}\x{660}\x{660}
2347
2348/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
2349    999
2350    9\x{660}9
2351
2352# SPACES
2353
2354/>\s+</i,utf
2355    >  <
2356\= Expect no match
2357    >\x{a0} <
2358
2359/>\s+</i,utf,ucp
2360    >  <
2361    >\x{a0} <
2362
2363/>\s+</i,utf,ucp,ascii_bss
2364    >  <
2365\= Expect no match
2366    >\x{a0} <
2367
2368/>[\s]+</i,utf
2369    >  <
2370\= Expect no match
2371    >\x{a0} <
2372
2373/>[\s]+</i,utf,ucp
2374    >  <
2375    >\x{a0} <
2376
2377/>[\s]+</i,utf,ucp,ascii_bss
2378    >  <
2379\= Expect no match
2380    >\x{a0} <
2381
2382/>\s(?aS)\s(?-aS)\s</utf,ucp
2383    >\x{a0} \x{a0}<
2384\= Expect no match
2385    >\x{a0}\x{a0}\x{a0}<
2386
2387/>\s(?a)\s(?-a)\s</utf,ucp
2388    >\x{a0} \x{a0}<
2389\= Expect no match
2390    >\x{a0}\x{a0}\x{a0}<
2391
2392# WORDS
2393
2394/\w+/i,utf
2395    123\x{660}abc
2396
2397/\w+/i,utf,ucp
2398    123\x{660}abc
2399
2400/\w+/i,utf,ucp,ascii_bsw
2401    123\x{660}abc
2402
2403/[\w]+/i,utf
2404    123\x{660}abc
2405
2406/[\w]+/i,utf,ucp
2407    123\x{660}abc
2408
2409/[\w]+/i,utf,ucp,ascii_bsw
2410    123\x{660}abc
2411
2412/\w(?aW)\w(?-aW)\w/utf,ucp
2413    \x{660}A\x{c0}
2414\= Expect no match
2415    \x{660}\x{c0}\x{c0}
2416
2417/\w(?a)\w(?-a)\w/utf,ucp
2418    \x{660}A\x{c0}
2419\= Expect no match
2420    \x{660}\x{c0}\x{c0}
2421
2422# WORD BOUNDARY
2423
2424/\bABC\b/utf
2425    \x{c0}ABC\x{d0}
2426
2427/\bABC\b/utf,ucp
2428\= Expect no match
2429    \x{c0}ABC\x{d0}
2430
2431/\bABC\b/utf,ucp,ascii_bsw
2432    \x{c0}ABC\x{d0}
2433
2434/\bABC\b/utf,ucp,ascii_all
2435    \x{c0}ABC\x{d0}
2436
2437# POSIX
2438
2439/^[[:digit:]]+$/utf,ucp
2440    123456
2441    123\x{660}456
2442
2443/^[[:digit:]]+$/utf,ucp,ascii_digit
2444    123456
2445\= Expect no match
2446    123\x{660}456
2447
2448/[[:digit:]]+/g,utf,ucp,ascii_digit
2449    123\x{660}456
2450
2451/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit
2452    11
2453    \x{ff11}1
2454\= Expect no match
2455    1\x{ff11}
2456
2457/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit
2458    11
2459    \x{ff11}1
2460\= Expect no match
2461    1\x{ff11}
2462
2463/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit
2464    11
2465\= Expect no match
2466    \x{ff11}1
2467    1\x{ff11}
2468
2469/[[:digit:]]+/utf,ucp,ascii_posix
2470    123\x{660}456
2471
2472/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix
2473    11
2474    \x{ff11}1
2475\= Expect no match
2476    1\x{ff11}
2477
2478/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix
2479    11
2480    \x{ff11}1
2481\= Expect no match
2482    1\x{ff11}
2483
2484/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp
2485    11
2486    \x{ff11}1
2487\= Expect no match
2488    1\x{ff11}
2489
2490/^[[:xdigit:]]+$/utf,ucp
2491    f0
2492    1A
2493    d\x{ff10}
2494    \x{ff26}8
2495\= Expect no match
2496    8g\=no_jit
2497
2498/^[[:xdigit:]]+$/utf,ucp,ascii_digit
2499    f0
2500    1A
2501\= Expect no match
2502    d\x{ff10}
2503    \x{ff26}8
2504    8g
2505
2506/>[[:space:]]+</utf,ucp
2507    >\x{a0} \x{a0}<
2508    >\x{a0}\x{a0}\x{a0}<
2509
2510/>[[:space:]]+</utf,ucp,ascii_posix
2511\= Expect no match
2512    >\x{a0} \x{a0}<
2513
2514/(?aP)[[:alnum:]]+/i,ucp,utf
2515    abcáxyz
2516    abc\x{660}xyz
2517
2518/(?aP)[[:alnum:]\d]+/i,ucp,utf
2519    abc\x{660}xyz
2520
2521/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
2522    \x{660}A\x{660}
2523\= Expect no match
2524    \x{660}\x{660}\x{660}
2525
2526# VARIOUS
2527
2528/[\d\s\w]+/a,ucp,utf
2529    9 A\x{660}À
2530    9 AÀ\x{660}
2531
2532# End PCRE2_EXTRA_ASCII_xxx tests
2533
2534/(?<!(|l ))/utf
2535    (?<!(|l ))
2536
2537# End of testinput5
2538