xref: /aosp_15_r20/external/pcre/testdata/testinput10 (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi# This set of tests is for UTF-8 support and Unicode property support, with
2*22dc650dSSadaf Ebrahimi# relevance only for the 8-bit library.
3*22dc650dSSadaf Ebrahimi
4*22dc650dSSadaf Ebrahimi#newline_default lf any anycrlf
5*22dc650dSSadaf Ebrahimi
6*22dc650dSSadaf Ebrahimi# The next 5 patterns have UTF-8 errors
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi/[�]/utf
9*22dc650dSSadaf Ebrahimi
10*22dc650dSSadaf Ebrahimi/�/utf
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi/���xxx/utf
13*22dc650dSSadaf Ebrahimi
14*22dc650dSSadaf Ebrahimi/��������/utf
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi/��������/match_invalid_utf
17*22dc650dSSadaf Ebrahimi
18*22dc650dSSadaf Ebrahimi# Now test subjects
19*22dc650dSSadaf Ebrahimi
20*22dc650dSSadaf Ebrahimi/badutf/utf
21*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
22*22dc650dSSadaf Ebrahimi    X\xdf
23*22dc650dSSadaf Ebrahimi    XX\xef
24*22dc650dSSadaf Ebrahimi    XXX\xef\x80
25*22dc650dSSadaf Ebrahimi    X\xf7
26*22dc650dSSadaf Ebrahimi    XX\xf7\x80
27*22dc650dSSadaf Ebrahimi    XXX\xf7\x80\x80
28*22dc650dSSadaf Ebrahimi    \xfb
29*22dc650dSSadaf Ebrahimi    \xfb\x80
30*22dc650dSSadaf Ebrahimi    \xfb\x80\x80
31*22dc650dSSadaf Ebrahimi    \xfb\x80\x80\x80
32*22dc650dSSadaf Ebrahimi    \xfd
33*22dc650dSSadaf Ebrahimi    \xfd\x80
34*22dc650dSSadaf Ebrahimi    \xfd\x80\x80
35*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\x80
36*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\x80\x80
37*22dc650dSSadaf Ebrahimi    \xdf\x7f
38*22dc650dSSadaf Ebrahimi    \xef\x7f\x80
39*22dc650dSSadaf Ebrahimi    \xef\x80\x7f
40*22dc650dSSadaf Ebrahimi    \xf7\x7f\x80\x80
41*22dc650dSSadaf Ebrahimi    \xf7\x80\x7f\x80
42*22dc650dSSadaf Ebrahimi    \xf7\x80\x80\x7f
43*22dc650dSSadaf Ebrahimi    \xfb\x7f\x80\x80\x80
44*22dc650dSSadaf Ebrahimi    \xfb\x80\x7f\x80\x80
45*22dc650dSSadaf Ebrahimi    \xfb\x80\x80\x7f\x80
46*22dc650dSSadaf Ebrahimi    \xfb\x80\x80\x80\x7f
47*22dc650dSSadaf Ebrahimi    \xfd\x7f\x80\x80\x80\x80
48*22dc650dSSadaf Ebrahimi    \xfd\x80\x7f\x80\x80\x80
49*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\x7f\x80\x80
50*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\x80\x7f\x80
51*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\x80\x80\x7f
52*22dc650dSSadaf Ebrahimi    \xed\xa0\x80
53*22dc650dSSadaf Ebrahimi    \xc0\x8f
54*22dc650dSSadaf Ebrahimi    \xe0\x80\x8f
55*22dc650dSSadaf Ebrahimi    \xf0\x80\x80\x8f
56*22dc650dSSadaf Ebrahimi    \xf8\x80\x80\x80\x8f
57*22dc650dSSadaf Ebrahimi    \xfc\x80\x80\x80\x80\x8f
58*22dc650dSSadaf Ebrahimi    \x80
59*22dc650dSSadaf Ebrahimi    \xfe
60*22dc650dSSadaf Ebrahimi    \xff
61*22dc650dSSadaf Ebrahimi
62*22dc650dSSadaf Ebrahimi/badutf/utf
63*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
64*22dc650dSSadaf Ebrahimi    XX\xfb\x80\x80\x80\x80
65*22dc650dSSadaf Ebrahimi    XX\xfd\x80\x80\x80\x80\x80
66*22dc650dSSadaf Ebrahimi    XX\xf7\xbf\xbf\xbf
67*22dc650dSSadaf Ebrahimi
68*22dc650dSSadaf Ebrahimi/shortutf/utf
69*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
70*22dc650dSSadaf Ebrahimi    XX\xdf\=ph
71*22dc650dSSadaf Ebrahimi    XX\xef\=ph
72*22dc650dSSadaf Ebrahimi    XX\xef\x80\=ph
73*22dc650dSSadaf Ebrahimi    \xf7\=ph
74*22dc650dSSadaf Ebrahimi    \xf7\x80\=ph
75*22dc650dSSadaf Ebrahimi    \xf7\x80\x80\=ph
76*22dc650dSSadaf Ebrahimi    \xfb\=ph
77*22dc650dSSadaf Ebrahimi    \xfb\x80\=ph
78*22dc650dSSadaf Ebrahimi    \xfb\x80\x80\=ph
79*22dc650dSSadaf Ebrahimi    \xfb\x80\x80\x80\=ph
80*22dc650dSSadaf Ebrahimi    \xfd\=ph
81*22dc650dSSadaf Ebrahimi    \xfd\x80\=ph
82*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\=ph
83*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\x80\=ph
84*22dc650dSSadaf Ebrahimi    \xfd\x80\x80\x80\x80\=ph
85*22dc650dSSadaf Ebrahimi
86*22dc650dSSadaf Ebrahimi/anything/utf
87*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
88*22dc650dSSadaf Ebrahimi    X\xc0\x80
89*22dc650dSSadaf Ebrahimi    XX\xc1\x8f
90*22dc650dSSadaf Ebrahimi    XXX\xe0\x9f\x80
91*22dc650dSSadaf Ebrahimi    \xf0\x8f\x80\x80
92*22dc650dSSadaf Ebrahimi    \xf8\x87\x80\x80\x80
93*22dc650dSSadaf Ebrahimi    \xfc\x83\x80\x80\x80\x80
94*22dc650dSSadaf Ebrahimi    \xfe\x80\x80\x80\x80\x80
95*22dc650dSSadaf Ebrahimi    \xff\x80\x80\x80\x80\x80
96*22dc650dSSadaf Ebrahimi    \xf8\x88\x80\x80\x80
97*22dc650dSSadaf Ebrahimi    \xf9\x87\x80\x80\x80
98*22dc650dSSadaf Ebrahimi    \xfc\x84\x80\x80\x80\x80
99*22dc650dSSadaf Ebrahimi    \xfd\x83\x80\x80\x80\x80
100*22dc650dSSadaf Ebrahimi\= Expect no match
101*22dc650dSSadaf Ebrahimi    \xc3\x8f
102*22dc650dSSadaf Ebrahimi    \xe0\xaf\x80
103*22dc650dSSadaf Ebrahimi    \xe1\x80\x80
104*22dc650dSSadaf Ebrahimi    \xf0\x9f\x80\x80
105*22dc650dSSadaf Ebrahimi    \xf1\x8f\x80\x80
106*22dc650dSSadaf Ebrahimi    \xf8\x88\x80\x80\x80\=no_utf_check
107*22dc650dSSadaf Ebrahimi    \xf9\x87\x80\x80\x80\=no_utf_check
108*22dc650dSSadaf Ebrahimi    \xfc\x84\x80\x80\x80\x80\=no_utf_check
109*22dc650dSSadaf Ebrahimi    \xfd\x83\x80\x80\x80\x80\=no_utf_check
110*22dc650dSSadaf Ebrahimi
111*22dc650dSSadaf Ebrahimi# Similar tests with offsets
112*22dc650dSSadaf Ebrahimi
113*22dc650dSSadaf Ebrahimi/badutf/utf
114*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
115*22dc650dSSadaf Ebrahimi    X\xdfabcd
116*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=1
117*22dc650dSSadaf Ebrahimi\= Expect no match
118*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=2
119*22dc650dSSadaf Ebrahimi
120*22dc650dSSadaf Ebrahimi/(?<=x)badutf/utf
121*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
122*22dc650dSSadaf Ebrahimi    X\xdfabcd
123*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=1
124*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=2
125*22dc650dSSadaf Ebrahimi    X\xdfabcd\xdf\=offset=3
126*22dc650dSSadaf Ebrahimi\= Expect no match
127*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=3
128*22dc650dSSadaf Ebrahimi
129*22dc650dSSadaf Ebrahimi/(?<=xx)badutf/utf
130*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
131*22dc650dSSadaf Ebrahimi    X\xdfabcd
132*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=1
133*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=2
134*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=3
135*22dc650dSSadaf Ebrahimi
136*22dc650dSSadaf Ebrahimi/(?<=xxxx)badutf/utf
137*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
138*22dc650dSSadaf Ebrahimi    X\xdfabcd
139*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=1
140*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=2
141*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=3
142*22dc650dSSadaf Ebrahimi    X\xdfabc\xdf\=offset=6
143*22dc650dSSadaf Ebrahimi    X\xdfabc\xdf\=offset=7
144*22dc650dSSadaf Ebrahimi\= Expect no match
145*22dc650dSSadaf Ebrahimi    X\xdfabcd\=offset=6
146*22dc650dSSadaf Ebrahimi
147*22dc650dSSadaf Ebrahimi/\x{100}/IB,utf
148*22dc650dSSadaf Ebrahimi
149*22dc650dSSadaf Ebrahimi/\x{1000}/IB,utf
150*22dc650dSSadaf Ebrahimi
151*22dc650dSSadaf Ebrahimi/\x{10000}/IB,utf
152*22dc650dSSadaf Ebrahimi
153*22dc650dSSadaf Ebrahimi/\x{100000}/IB,utf
154*22dc650dSSadaf Ebrahimi
155*22dc650dSSadaf Ebrahimi/\x{10ffff}/IB,utf
156*22dc650dSSadaf Ebrahimi
157*22dc650dSSadaf Ebrahimi/[\x{ff}]/IB,utf
158*22dc650dSSadaf Ebrahimi
159*22dc650dSSadaf Ebrahimi/[\x{100}]/IB,utf
160*22dc650dSSadaf Ebrahimi
161*22dc650dSSadaf Ebrahimi/\x80/IB,utf
162*22dc650dSSadaf Ebrahimi
163*22dc650dSSadaf Ebrahimi/\xff/IB,utf
164*22dc650dSSadaf Ebrahimi
165*22dc650dSSadaf Ebrahimi/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
166*22dc650dSSadaf Ebrahimi    \x{D55c}\x{ad6d}\x{C5B4}
167*22dc650dSSadaf Ebrahimi
168*22dc650dSSadaf Ebrahimi/\x{65e5}\x{672c}\x{8a9e}/IB,utf
169*22dc650dSSadaf Ebrahimi    \x{65e5}\x{672c}\x{8a9e}
170*22dc650dSSadaf Ebrahimi
171*22dc650dSSadaf Ebrahimi/\x{80}/IB,utf
172*22dc650dSSadaf Ebrahimi
173*22dc650dSSadaf Ebrahimi/\x{084}/IB,utf
174*22dc650dSSadaf Ebrahimi
175*22dc650dSSadaf Ebrahimi/\x{104}/IB,utf
176*22dc650dSSadaf Ebrahimi
177*22dc650dSSadaf Ebrahimi/\x{861}/IB,utf
178*22dc650dSSadaf Ebrahimi
179*22dc650dSSadaf Ebrahimi/\x{212ab}/IB,utf
180*22dc650dSSadaf Ebrahimi
181*22dc650dSSadaf Ebrahimi/[^ab\xC0-\xF0]/IB,utf
182*22dc650dSSadaf Ebrahimi    \x{f1}
183*22dc650dSSadaf Ebrahimi    \x{bf}
184*22dc650dSSadaf Ebrahimi    \x{100}
185*22dc650dSSadaf Ebrahimi    \x{1000}
186*22dc650dSSadaf Ebrahimi\= Expect no match
187*22dc650dSSadaf Ebrahimi    \x{c0}
188*22dc650dSSadaf Ebrahimi    \x{f0}
189*22dc650dSSadaf Ebrahimi
190*22dc650dSSadaf Ebrahimi/Ā{3,4}/IB,utf
191*22dc650dSSadaf Ebrahimi  \x{100}\x{100}\x{100}\x{100\x{100}
192*22dc650dSSadaf Ebrahimi
193*22dc650dSSadaf Ebrahimi/(\x{100}+|x)/IB,utf
194*22dc650dSSadaf Ebrahimi
195*22dc650dSSadaf Ebrahimi/(\x{100}*a|x)/IB,utf
196*22dc650dSSadaf Ebrahimi
197*22dc650dSSadaf Ebrahimi/(\x{100}{0,2}a|x)/IB,utf
198*22dc650dSSadaf Ebrahimi
199*22dc650dSSadaf Ebrahimi/(\x{100}{1,2}a|x)/IB,utf
200*22dc650dSSadaf Ebrahimi
201*22dc650dSSadaf Ebrahimi/\x{100}/IB,utf
202*22dc650dSSadaf Ebrahimi
203*22dc650dSSadaf Ebrahimi/a\x{100}\x{101}*/IB,utf
204*22dc650dSSadaf Ebrahimi
205*22dc650dSSadaf Ebrahimi/a\x{100}\x{101}+/IB,utf
206*22dc650dSSadaf Ebrahimi
207*22dc650dSSadaf Ebrahimi/[^\x{c4}]/IB
208*22dc650dSSadaf Ebrahimi
209*22dc650dSSadaf Ebrahimi/[\x{100}]/IB,utf
210*22dc650dSSadaf Ebrahimi    \x{100}
211*22dc650dSSadaf Ebrahimi    Z\x{100}
212*22dc650dSSadaf Ebrahimi    \x{100}Z
213*22dc650dSSadaf Ebrahimi
214*22dc650dSSadaf Ebrahimi/[\xff]/IB,utf
215*22dc650dSSadaf Ebrahimi    >\x{ff}<
216*22dc650dSSadaf Ebrahimi
217*22dc650dSSadaf Ebrahimi/[^\xff]/IB,utf
218*22dc650dSSadaf Ebrahimi
219*22dc650dSSadaf Ebrahimi/\x{100}abc(xyz(?1))/IB,utf
220*22dc650dSSadaf Ebrahimi
221*22dc650dSSadaf Ebrahimi/\777/I,utf
222*22dc650dSSadaf Ebrahimi  \x{1ff}
223*22dc650dSSadaf Ebrahimi  \777
224*22dc650dSSadaf Ebrahimi
225*22dc650dSSadaf Ebrahimi/\x{100}+\x{200}/IB,utf
226*22dc650dSSadaf Ebrahimi
227*22dc650dSSadaf Ebrahimi/\x{100}+X/IB,utf
228*22dc650dSSadaf Ebrahimi
229*22dc650dSSadaf Ebrahimi/^[\QĀ\E-\QŐ\E/B,utf
230*22dc650dSSadaf Ebrahimi
231*22dc650dSSadaf Ebrahimi# This tests the stricter UTF-8 check according to RFC 3629.
232*22dc650dSSadaf Ebrahimi
233*22dc650dSSadaf Ebrahimi/X/utf
234*22dc650dSSadaf Ebrahimi\= Expect UTF-8 errors
235*22dc650dSSadaf Ebrahimi    \x{d800}
236*22dc650dSSadaf Ebrahimi    \x{da00}
237*22dc650dSSadaf Ebrahimi    \x{dfff}
238*22dc650dSSadaf Ebrahimi    \x{110000}
239*22dc650dSSadaf Ebrahimi    \x{2000000}
240*22dc650dSSadaf Ebrahimi    \x{7fffffff}
241*22dc650dSSadaf Ebrahimi\= Expect no match
242*22dc650dSSadaf Ebrahimi    \x{d800}\=no_utf_check
243*22dc650dSSadaf Ebrahimi    \x{da00}\=no_utf_check
244*22dc650dSSadaf Ebrahimi    \x{dfff}\=no_utf_check
245*22dc650dSSadaf Ebrahimi    \x{110000}\=no_utf_check
246*22dc650dSSadaf Ebrahimi    \x{2000000}\=no_utf_check
247*22dc650dSSadaf Ebrahimi    \x{7fffffff}\=no_utf_check
248*22dc650dSSadaf Ebrahimi
249*22dc650dSSadaf Ebrahimi/(*UTF8)\x{1234}/
250*22dc650dSSadaf Ebrahimi    abcd\x{1234}pqr
251*22dc650dSSadaf Ebrahimi
252*22dc650dSSadaf Ebrahimi/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
253*22dc650dSSadaf Ebrahimi
254*22dc650dSSadaf Ebrahimi/\h/I,utf
255*22dc650dSSadaf Ebrahimi    ABC\x{09}
256*22dc650dSSadaf Ebrahimi    ABC\x{20}
257*22dc650dSSadaf Ebrahimi    ABC\x{a0}
258*22dc650dSSadaf Ebrahimi    ABC\x{1680}
259*22dc650dSSadaf Ebrahimi    ABC\x{180e}
260*22dc650dSSadaf Ebrahimi    ABC\x{2000}
261*22dc650dSSadaf Ebrahimi    ABC\x{202f}
262*22dc650dSSadaf Ebrahimi    ABC\x{205f}
263*22dc650dSSadaf Ebrahimi    ABC\x{3000}
264*22dc650dSSadaf Ebrahimi
265*22dc650dSSadaf Ebrahimi/\v/I,utf
266*22dc650dSSadaf Ebrahimi    ABC\x{0a}
267*22dc650dSSadaf Ebrahimi    ABC\x{0b}
268*22dc650dSSadaf Ebrahimi    ABC\x{0c}
269*22dc650dSSadaf Ebrahimi    ABC\x{0d}
270*22dc650dSSadaf Ebrahimi    ABC\x{85}
271*22dc650dSSadaf Ebrahimi    ABC\x{2028}
272*22dc650dSSadaf Ebrahimi
273*22dc650dSSadaf Ebrahimi/\h*A/I,utf
274*22dc650dSSadaf Ebrahimi    CDBABC
275*22dc650dSSadaf Ebrahimi
276*22dc650dSSadaf Ebrahimi/\v+A/I,utf
277*22dc650dSSadaf Ebrahimi
278*22dc650dSSadaf Ebrahimi/\s?xxx\s/I,utf
279*22dc650dSSadaf Ebrahimi
280*22dc650dSSadaf Ebrahimi/\sxxx\s/I,utf,tables=2
281*22dc650dSSadaf Ebrahimi    AB\x{85}xxx\x{a0}XYZ
282*22dc650dSSadaf Ebrahimi    AB\x{a0}xxx\x{85}XYZ
283*22dc650dSSadaf Ebrahimi
284*22dc650dSSadaf Ebrahimi/\S \S/I,utf,tables=2
285*22dc650dSSadaf Ebrahimi    \x{a2} \x{84}
286*22dc650dSSadaf Ebrahimi    A Z
287*22dc650dSSadaf Ebrahimi
288*22dc650dSSadaf Ebrahimi/a+/utf
289*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=1
290*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=3
291*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=4
292*22dc650dSSadaf Ebrahimi\= Expect bad offset value
293*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=6
294*22dc650dSSadaf Ebrahimi\= Expect bad UTF-8 offset
295*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=2
296*22dc650dSSadaf Ebrahimi\= Expect no match
297*22dc650dSSadaf Ebrahimi    a\x{123}aa\=offset=5
298*22dc650dSSadaf Ebrahimi
299*22dc650dSSadaf Ebrahimi/\x{1234}+/Ii,utf
300*22dc650dSSadaf Ebrahimi
301*22dc650dSSadaf Ebrahimi/\x{1234}+?/Ii,utf
302*22dc650dSSadaf Ebrahimi
303*22dc650dSSadaf Ebrahimi/\x{1234}++/Ii,utf
304*22dc650dSSadaf Ebrahimi
305*22dc650dSSadaf Ebrahimi/\x{1234}{2}/Ii,utf
306*22dc650dSSadaf Ebrahimi
307*22dc650dSSadaf Ebrahimi/[^\x{c4}]/IB,utf
308*22dc650dSSadaf Ebrahimi
309*22dc650dSSadaf Ebrahimi/X+\x{200}/IB,utf
310*22dc650dSSadaf Ebrahimi
311*22dc650dSSadaf Ebrahimi/\R/I,utf
312*22dc650dSSadaf Ebrahimi
313*22dc650dSSadaf Ebrahimi/\777/IB,utf
314*22dc650dSSadaf Ebrahimi
315*22dc650dSSadaf Ebrahimi/\w+\x{C4}/B,utf
316*22dc650dSSadaf Ebrahimi    a\x{C4}\x{C4}
317*22dc650dSSadaf Ebrahimi
318*22dc650dSSadaf Ebrahimi/\w+\x{C4}/B,utf,tables=2
319*22dc650dSSadaf Ebrahimi    a\x{C4}\x{C4}
320*22dc650dSSadaf Ebrahimi
321*22dc650dSSadaf Ebrahimi/\W+\x{C4}/B,utf
322*22dc650dSSadaf Ebrahimi    !\x{C4}
323*22dc650dSSadaf Ebrahimi
324*22dc650dSSadaf Ebrahimi/\W+\x{C4}/B,utf,tables=2
325*22dc650dSSadaf Ebrahimi    !\x{C4}
326*22dc650dSSadaf Ebrahimi
327*22dc650dSSadaf Ebrahimi/\W+\x{A1}/B,utf
328*22dc650dSSadaf Ebrahimi    !\x{A1}
329*22dc650dSSadaf Ebrahimi
330*22dc650dSSadaf Ebrahimi/\W+\x{A1}/B,utf,tables=2
331*22dc650dSSadaf Ebrahimi    !\x{A1}
332*22dc650dSSadaf Ebrahimi
333*22dc650dSSadaf Ebrahimi/X\s+\x{A0}/B,utf
334*22dc650dSSadaf Ebrahimi    X\x20\x{A0}\x{A0}
335*22dc650dSSadaf Ebrahimi
336*22dc650dSSadaf Ebrahimi/X\s+\x{A0}/B,utf,tables=2
337*22dc650dSSadaf Ebrahimi    X\x20\x{A0}\x{A0}
338*22dc650dSSadaf Ebrahimi
339*22dc650dSSadaf Ebrahimi/\S+\x{A0}/B,utf
340*22dc650dSSadaf Ebrahimi    X\x{A0}\x{A0}
341*22dc650dSSadaf Ebrahimi
342*22dc650dSSadaf Ebrahimi/\S+\x{A0}/B,utf,tables=2
343*22dc650dSSadaf Ebrahimi    X\x{A0}\x{A0}
344*22dc650dSSadaf Ebrahimi
345*22dc650dSSadaf Ebrahimi/\x{a0}+\s!/B,utf
346*22dc650dSSadaf Ebrahimi    \x{a0}\x20!
347*22dc650dSSadaf Ebrahimi
348*22dc650dSSadaf Ebrahimi/\x{a0}+\s!/B,utf,tables=2
349*22dc650dSSadaf Ebrahimi    \x{a0}\x20!
350*22dc650dSSadaf Ebrahimi
351*22dc650dSSadaf Ebrahimi/A/utf
352*22dc650dSSadaf Ebrahimi  \x{ff000041}
353*22dc650dSSadaf Ebrahimi  \x{7f000041}
354*22dc650dSSadaf Ebrahimi
355*22dc650dSSadaf Ebrahimi/(*UTF8)abc/never_utf
356*22dc650dSSadaf Ebrahimi
357*22dc650dSSadaf Ebrahimi/abc/utf,never_utf
358*22dc650dSSadaf Ebrahimi
359*22dc650dSSadaf Ebrahimi/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
360*22dc650dSSadaf Ebrahimi
361*22dc650dSSadaf Ebrahimi/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
362*22dc650dSSadaf Ebrahimi
363*22dc650dSSadaf Ebrahimi/AB\x{1fb0}/IB,utf
364*22dc650dSSadaf Ebrahimi
365*22dc650dSSadaf Ebrahimi/AB\x{1fb0}/IBi,utf
366*22dc650dSSadaf Ebrahimi
367*22dc650dSSadaf Ebrahimi/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
368*22dc650dSSadaf Ebrahimi    \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
369*22dc650dSSadaf Ebrahimi    \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
370*22dc650dSSadaf Ebrahimi
371*22dc650dSSadaf Ebrahimi/[ⱥ]/Bi,utf
372*22dc650dSSadaf Ebrahimi
373*22dc650dSSadaf Ebrahimi/[^ⱥ]/Bi,utf
374*22dc650dSSadaf Ebrahimi
375*22dc650dSSadaf Ebrahimi/\h/I
376*22dc650dSSadaf Ebrahimi
377*22dc650dSSadaf Ebrahimi/\v/I
378*22dc650dSSadaf Ebrahimi
379*22dc650dSSadaf Ebrahimi/\R/I
380*22dc650dSSadaf Ebrahimi
381*22dc650dSSadaf Ebrahimi/[[:blank:]]/B,ucp
382*22dc650dSSadaf Ebrahimi
383*22dc650dSSadaf Ebrahimi/\x{212a}+/Ii,utf
384*22dc650dSSadaf Ebrahimi    KKkk\x{212a}
385*22dc650dSSadaf Ebrahimi
386*22dc650dSSadaf Ebrahimi/s+/Ii,utf
387*22dc650dSSadaf Ebrahimi    SSss\x{17f}
388*22dc650dSSadaf Ebrahimi
389*22dc650dSSadaf Ebrahimi/\x{100}*A/IB,utf
390*22dc650dSSadaf Ebrahimi    A
391*22dc650dSSadaf Ebrahimi
392*22dc650dSSadaf Ebrahimi/\x{100}*\d(?R)/IB,utf
393*22dc650dSSadaf Ebrahimi
394*22dc650dSSadaf Ebrahimi/[Z\x{100}]/IB,utf
395*22dc650dSSadaf Ebrahimi    Z\x{100}
396*22dc650dSSadaf Ebrahimi    \x{100}
397*22dc650dSSadaf Ebrahimi    \x{100}Z
398*22dc650dSSadaf Ebrahimi
399*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IB,utf
400*22dc650dSSadaf Ebrahimi
401*22dc650dSSadaf Ebrahimi/[z\Qa-d]Ā\E]/IB,utf
402*22dc650dSSadaf Ebrahimi    \x{100}
403*22dc650dSSadaf Ebrahimi    Ā
404*22dc650dSSadaf Ebrahimi
405*22dc650dSSadaf Ebrahimi/[ab\x{100}]abc(xyz(?1))/IB,utf
406*22dc650dSSadaf Ebrahimi
407*22dc650dSSadaf Ebrahimi/\x{100}*\s/IB,utf
408*22dc650dSSadaf Ebrahimi
409*22dc650dSSadaf Ebrahimi/\x{100}*\d/IB,utf
410*22dc650dSSadaf Ebrahimi
411*22dc650dSSadaf Ebrahimi/\x{100}*\w/IB,utf
412*22dc650dSSadaf Ebrahimi
413*22dc650dSSadaf Ebrahimi/\x{100}*\D/IB,utf
414*22dc650dSSadaf Ebrahimi
415*22dc650dSSadaf Ebrahimi/\x{100}*\S/IB,utf
416*22dc650dSSadaf Ebrahimi
417*22dc650dSSadaf Ebrahimi/\x{100}*\W/IB,utf
418*22dc650dSSadaf Ebrahimi
419*22dc650dSSadaf Ebrahimi/[\x{105}-\x{109}]/IBi,utf
420*22dc650dSSadaf Ebrahimi    \x{104}
421*22dc650dSSadaf Ebrahimi    \x{105}
422*22dc650dSSadaf Ebrahimi    \x{109}
423*22dc650dSSadaf Ebrahimi\= Expect no match
424*22dc650dSSadaf Ebrahimi    \x{100}
425*22dc650dSSadaf Ebrahimi    \x{10a}
426*22dc650dSSadaf Ebrahimi
427*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IBi,utf
428*22dc650dSSadaf Ebrahimi    Z
429*22dc650dSSadaf Ebrahimi    z
430*22dc650dSSadaf Ebrahimi    \x{39c}
431*22dc650dSSadaf Ebrahimi    \x{178}
432*22dc650dSSadaf Ebrahimi    |
433*22dc650dSSadaf Ebrahimi    \x{80}
434*22dc650dSSadaf Ebrahimi    \x{ff}
435*22dc650dSSadaf Ebrahimi    \x{100}
436*22dc650dSSadaf Ebrahimi    \x{101}
437*22dc650dSSadaf Ebrahimi\= Expect no match
438*22dc650dSSadaf Ebrahimi    \x{102}
439*22dc650dSSadaf Ebrahimi    Y
440*22dc650dSSadaf Ebrahimi    y
441*22dc650dSSadaf Ebrahimi
442*22dc650dSSadaf Ebrahimi/[z-\x{100}]/IBi,utf
443*22dc650dSSadaf Ebrahimi
444*22dc650dSSadaf Ebrahimi/\x{3a3}B/IBi,utf
445*22dc650dSSadaf Ebrahimi
446*22dc650dSSadaf Ebrahimi/abc/utf,replace=�
447*22dc650dSSadaf Ebrahimi    abc
448*22dc650dSSadaf Ebrahimi
449*22dc650dSSadaf Ebrahimi/(?<=(a)(?-1))x/I,utf
450*22dc650dSSadaf Ebrahimi    a\x80zx\=offset=3
451*22dc650dSSadaf Ebrahimi
452*22dc650dSSadaf Ebrahimi/[\W\p{Any}]/B
453*22dc650dSSadaf Ebrahimi    abc
454*22dc650dSSadaf Ebrahimi    123
455*22dc650dSSadaf Ebrahimi
456*22dc650dSSadaf Ebrahimi/[\W\pL]/B
457*22dc650dSSadaf Ebrahimi    abc
458*22dc650dSSadaf Ebrahimi\= Expect no match
459*22dc650dSSadaf Ebrahimi    123
460*22dc650dSSadaf Ebrahimi
461*22dc650dSSadaf Ebrahimi/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
462*22dc650dSSadaf Ebrahimi
463*22dc650dSSadaf Ebrahimi/[\s[:^ascii:]]/B,ucp
464*22dc650dSSadaf Ebrahimi
465*22dc650dSSadaf Ebrahimi# A special extra option allows excaped surrogate code points in 8-bit mode,
466*22dc650dSSadaf Ebrahimi# but subjects containing them must not be UTF-checked.
467*22dc650dSSadaf Ebrahimi
468*22dc650dSSadaf Ebrahimi/\x{d800}/I,utf,allow_surrogate_escapes
469*22dc650dSSadaf Ebrahimi    \x{d800}\=no_utf_check
470*22dc650dSSadaf Ebrahimi
471*22dc650dSSadaf Ebrahimi/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
472*22dc650dSSadaf Ebrahimi    \x{dfff}\x{df01}\=no_utf_check
473*22dc650dSSadaf Ebrahimi
474*22dc650dSSadaf Ebrahimi# This has different starting code units in 8-bit mode.
475*22dc650dSSadaf Ebrahimi
476*22dc650dSSadaf Ebrahimi/^[^ab]/IB,utf
477*22dc650dSSadaf Ebrahimi    c
478*22dc650dSSadaf Ebrahimi    \x{ff}
479*22dc650dSSadaf Ebrahimi    \x{100}
480*22dc650dSSadaf Ebrahimi\= Expect no match
481*22dc650dSSadaf Ebrahimi    aaa
482*22dc650dSSadaf Ebrahimi
483*22dc650dSSadaf Ebrahimi# Offsets are different in 8-bit mode.
484*22dc650dSSadaf Ebrahimi
485*22dc650dSSadaf Ebrahimi/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
486*22dc650dSSadaf Ebrahimi    123abcáyzabcdef789abcሴqr
487*22dc650dSSadaf Ebrahimi
488*22dc650dSSadaf Ebrahimi# Check name length with non-ASCII characters
489*22dc650dSSadaf Ebrahimi
490*22dc650dSSadaf Ebrahimi/(?'ABáC678901234567890123456789012012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf
491*22dc650dSSadaf Ebrahimi
492*22dc650dSSadaf Ebrahimi/(?'ABáC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf
493*22dc650dSSadaf Ebrahimi
494*22dc650dSSadaf Ebrahimi/(?'ABZC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf
495*22dc650dSSadaf Ebrahimi
496*22dc650dSSadaf Ebrahimi/(?(n/utf
497*22dc650dSSadaf Ebrahimi
498*22dc650dSSadaf Ebrahimi/(?(á/utf
499*22dc650dSSadaf Ebrahimi
500*22dc650dSSadaf Ebrahimi# Invalid UTF-8 tests
501*22dc650dSSadaf Ebrahimi
502*22dc650dSSadaf Ebrahimi/.../g,match_invalid_utf
503*22dc650dSSadaf Ebrahimi    abcd\x80wxzy\x80pqrs
504*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x80pqrs
505*22dc650dSSadaf Ebrahimi
506*22dc650dSSadaf Ebrahimi/abc/match_invalid_utf
507*22dc650dSSadaf Ebrahimi    ab\x80ab\=ph
508*22dc650dSSadaf Ebrahimi\= Expect no match
509*22dc650dSSadaf Ebrahimi    ab\x80cdef\=ph
510*22dc650dSSadaf Ebrahimi
511*22dc650dSSadaf Ebrahimi/.a/match_invalid_utf
512*22dc650dSSadaf Ebrahimi    ab\=ph
513*22dc650dSSadaf Ebrahimi    ab\=ps
514*22dc650dSSadaf Ebrahimi    b\xf0\x91\x88b\=ph
515*22dc650dSSadaf Ebrahimi    b\xf0\x91\x88b\=ps
516*22dc650dSSadaf Ebrahimi    b\xf0\x91\x88\xb4a
517*22dc650dSSadaf Ebrahimi\= Expect no match
518*22dc650dSSadaf Ebrahimi    b\x80\=ph
519*22dc650dSSadaf Ebrahimi    b\x80\=ps
520*22dc650dSSadaf Ebrahimi    b\xf0\x91\x88\=ph
521*22dc650dSSadaf Ebrahimi    b\xf0\x91\x88\=ps
522*22dc650dSSadaf Ebrahimi
523*22dc650dSSadaf Ebrahimi/.a$/match_invalid_utf
524*22dc650dSSadaf Ebrahimi    ab\=ph
525*22dc650dSSadaf Ebrahimi    ab\=ps
526*22dc650dSSadaf Ebrahimi\= Expect no match
527*22dc650dSSadaf Ebrahimi    b\xf0\x91\x98\=ph
528*22dc650dSSadaf Ebrahimi    b\xf0\x91\x98\=ps
529*22dc650dSSadaf Ebrahimi
530*22dc650dSSadaf Ebrahimi/ab$/match_invalid_utf
531*22dc650dSSadaf Ebrahimi    ab\x80cdeab
532*22dc650dSSadaf Ebrahimi\= Expect no match
533*22dc650dSSadaf Ebrahimi    ab\x80cde
534*22dc650dSSadaf Ebrahimi
535*22dc650dSSadaf Ebrahimi/.../g,match_invalid_utf
536*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x80pqrs
537*22dc650dSSadaf Ebrahimi
538*22dc650dSSadaf Ebrahimi/(?<=x)../g,match_invalid_utf
539*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x80pqrs
540*22dc650dSSadaf Ebrahimi    abcd\x{80}wxzy\x80xpqrs
541*22dc650dSSadaf Ebrahimi
542*22dc650dSSadaf Ebrahimi/X$/match_invalid_utf
543*22dc650dSSadaf Ebrahimi\= Expect no match
544*22dc650dSSadaf Ebrahimi    X\xc4
545*22dc650dSSadaf Ebrahimi
546*22dc650dSSadaf Ebrahimi/(?<=..)X/match_invalid_utf,aftertext
547*22dc650dSSadaf Ebrahimi    AB\x80AQXYZ
548*22dc650dSSadaf Ebrahimi    AB\x80AQXYZ\=offset=5
549*22dc650dSSadaf Ebrahimi    AB\x80\x80AXYZXC\=offset=5
550*22dc650dSSadaf Ebrahimi\= Expect no match
551*22dc650dSSadaf Ebrahimi    AB\x80XYZ
552*22dc650dSSadaf Ebrahimi    AB\x80XYZ\=offset=3
553*22dc650dSSadaf Ebrahimi    AB\xfeXYZ
554*22dc650dSSadaf Ebrahimi    AB\xffXYZ\=offset=3
555*22dc650dSSadaf Ebrahimi    AB\x80AXYZ
556*22dc650dSSadaf Ebrahimi    AB\x80AXYZ\=offset=4
557*22dc650dSSadaf Ebrahimi    AB\x80\x80AXYZ\=offset=5
558*22dc650dSSadaf Ebrahimi
559*22dc650dSSadaf Ebrahimi/.../match_invalid_utf
560*22dc650dSSadaf Ebrahimi    AB\xc4CCC
561*22dc650dSSadaf Ebrahimi\= Expect no match
562*22dc650dSSadaf Ebrahimi    A\x{d800}B
563*22dc650dSSadaf Ebrahimi    A\x{110000}B
564*22dc650dSSadaf Ebrahimi    A\xc4B
565*22dc650dSSadaf Ebrahimi
566*22dc650dSSadaf Ebrahimi/\bX/match_invalid_utf
567*22dc650dSSadaf Ebrahimi    A\x80X
568*22dc650dSSadaf Ebrahimi
569*22dc650dSSadaf Ebrahimi/\BX/match_invalid_utf
570*22dc650dSSadaf Ebrahimi\= Expect no match
571*22dc650dSSadaf Ebrahimi    A\x80X
572*22dc650dSSadaf Ebrahimi
573*22dc650dSSadaf Ebrahimi/(?<=...)X/match_invalid_utf
574*22dc650dSSadaf Ebrahimi    AAA\x80BBBXYZ
575*22dc650dSSadaf Ebrahimi\= Expect no match
576*22dc650dSSadaf Ebrahimi    AAA\x80BXYZ
577*22dc650dSSadaf Ebrahimi    AAA\x80BBXYZ
578*22dc650dSSadaf Ebrahimi
579*22dc650dSSadaf Ebrahimi# -------------------------------------
580*22dc650dSSadaf Ebrahimi
581*22dc650dSSadaf Ebrahimi/(*UTF)(?=\x{123})/I
582*22dc650dSSadaf Ebrahimi
583*22dc650dSSadaf Ebrahimi/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
584*22dc650dSSadaf Ebrahimi
585*22dc650dSSadaf Ebrahimi/[��,]/BI,utf
586*22dc650dSSadaf Ebrahimi
587*22dc650dSSadaf Ebrahimi/[\x{fff4}-\x{ffff8}]/I,utf
588*22dc650dSSadaf Ebrahimi
589*22dc650dSSadaf Ebrahimi/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf
590*22dc650dSSadaf Ebrahimi
591*22dc650dSSadaf Ebrahimi/[\xff\x{ffff}]/I,utf
592*22dc650dSSadaf Ebrahimi
593*22dc650dSSadaf Ebrahimi/[\xff\x{ff}]/I,utf
594*22dc650dSSadaf Ebrahimi    abc\x{ff}def
595*22dc650dSSadaf Ebrahimi
596*22dc650dSSadaf Ebrahimi/[\xff\x{ff}]/I
597*22dc650dSSadaf Ebrahimi    abc\x{ff}def
598*22dc650dSSadaf Ebrahimi
599*22dc650dSSadaf Ebrahimi/[Ss]/I
600*22dc650dSSadaf Ebrahimi
601*22dc650dSSadaf Ebrahimi/[Ss]/I,utf
602*22dc650dSSadaf Ebrahimi
603*22dc650dSSadaf Ebrahimi/(?:\x{ff}|\x{3000})/I,utf
604*22dc650dSSadaf Ebrahimi
605*22dc650dSSadaf Ebrahimi/x/utf
606*22dc650dSSadaf Ebrahimi    abxyz
607*22dc650dSSadaf Ebrahimi    \x80\=startchar
608*22dc650dSSadaf Ebrahimi    abc\x80\=startchar
609*22dc650dSSadaf Ebrahimi    abc\x80\=startchar,offset=3
610*22dc650dSSadaf Ebrahimi
611*22dc650dSSadaf Ebrahimi/\x{c1}+\x{e1}/iIB,ucp
612*22dc650dSSadaf Ebrahimi    \x{c1}\x{c1}\x{c1}
613*22dc650dSSadaf Ebrahimi    \x{e1}\x{e1}\x{e1}
614*22dc650dSSadaf Ebrahimi
615*22dc650dSSadaf Ebrahimi/a|\x{c1}/iI,ucp
616*22dc650dSSadaf Ebrahimi    \x{e1}xxx
617*22dc650dSSadaf Ebrahimi
618*22dc650dSSadaf Ebrahimi/a|\x{c1}/iI,utf
619*22dc650dSSadaf Ebrahimi    \x{e1}xxx
620*22dc650dSSadaf Ebrahimi
621*22dc650dSSadaf Ebrahimi/\x{c1}|\x{e1}/iI,ucp
622*22dc650dSSadaf Ebrahimi
623*22dc650dSSadaf Ebrahimi/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
624*22dc650dSSadaf Ebrahimi    X\x{e1}Y
625*22dc650dSSadaf Ebrahimi
626*22dc650dSSadaf Ebrahimi/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
627*22dc650dSSadaf Ebrahimi    X\x{c1}Y
628*22dc650dSSadaf Ebrahimi
629*22dc650dSSadaf Ebrahimi# Without UTF or UCP characters > 127 have only one case in the default locale.
630*22dc650dSSadaf Ebrahimi
631*22dc650dSSadaf Ebrahimi/X(\x{e1})Y/replace=>\U$1<,substitute_extended
632*22dc650dSSadaf Ebrahimi    X\x{e1}Y
633*22dc650dSSadaf Ebrahimi
634*22dc650dSSadaf Ebrahimi/A/utf,match_invalid_utf,caseless
635*22dc650dSSadaf Ebrahimi    \xe5A
636*22dc650dSSadaf Ebrahimi
637*22dc650dSSadaf Ebrahimi/\bch\b/utf,match_invalid_utf
638*22dc650dSSadaf Ebrahimi    qchq\=ph
639*22dc650dSSadaf Ebrahimi    qchq\=ps
640*22dc650dSSadaf Ebrahimi
641*22dc650dSSadaf Ebrahimi/line1\nbreak/firstline,utf,match_invalid_utf
642*22dc650dSSadaf Ebrahimi    line1\nbreak
643*22dc650dSSadaf Ebrahimi    line0\nline1\nbreak
644*22dc650dSSadaf Ebrahimi
645*22dc650dSSadaf Ebrahimi/A\z/utf,match_invalid_utf
646*22dc650dSSadaf Ebrahimi    A\x80\x42\n
647*22dc650dSSadaf Ebrahimi
648*22dc650dSSadaf Ebrahimi# End of testinput10
649