1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2023 University of Cambridge
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi
19*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi
23*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi
41*22dc650dSSadaf Ebrahimi
42*22dc650dSSadaf Ebrahimi /* This module contains a PCRE private debugging function for printing out the
43*22dc650dSSadaf Ebrahimi internal form of a compiled regular expression, along with some supporting
44*22dc650dSSadaf Ebrahimi local functions. This source file is #included in pcre2test.c at each supported
45*22dc650dSSadaf Ebrahimi code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46*22dc650dSSadaf Ebrahimi that comprise the library. It can also optionally be included in
47*22dc650dSSadaf Ebrahimi pcre2_compile.c for detailed debugging in error situations. */
48*22dc650dSSadaf Ebrahimi
49*22dc650dSSadaf Ebrahimi
50*22dc650dSSadaf Ebrahimi /* Tables of operator names. The same 8-bit table is used for all code unit
51*22dc650dSSadaf Ebrahimi widths, so it must be defined only once. The list itself is defined in
52*22dc650dSSadaf Ebrahimi pcre2_internal.h, which is #included by pcre2test before this file. */
53*22dc650dSSadaf Ebrahimi
54*22dc650dSSadaf Ebrahimi #ifndef OP_LISTS_DEFINED
55*22dc650dSSadaf Ebrahimi static const char *OP_names[] = { OP_NAME_LIST };
56*22dc650dSSadaf Ebrahimi #define OP_LISTS_DEFINED
57*22dc650dSSadaf Ebrahimi #endif
58*22dc650dSSadaf Ebrahimi
59*22dc650dSSadaf Ebrahimi /* The functions and tables herein must all have mode-dependent names. */
60*22dc650dSSadaf Ebrahimi
61*22dc650dSSadaf Ebrahimi #define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62*22dc650dSSadaf Ebrahimi #define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63*22dc650dSSadaf Ebrahimi #define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64*22dc650dSSadaf Ebrahimi #define print_char PCRE2_SUFFIX(print_char_)
65*22dc650dSSadaf Ebrahimi #define print_custring PCRE2_SUFFIX(print_custring_)
66*22dc650dSSadaf Ebrahimi #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67*22dc650dSSadaf Ebrahimi #define print_prop PCRE2_SUFFIX(print_prop_)
68*22dc650dSSadaf Ebrahimi
69*22dc650dSSadaf Ebrahimi /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70*22dc650dSSadaf Ebrahimi the definition is next to the definition of the opcodes in pcre2_internal.h.
71*22dc650dSSadaf Ebrahimi The contents of the table are, however, mode-dependent. */
72*22dc650dSSadaf Ebrahimi
73*22dc650dSSadaf Ebrahimi static const uint8_t OP_lengths[] = { OP_LENGTHS };
74*22dc650dSSadaf Ebrahimi
75*22dc650dSSadaf Ebrahimi
76*22dc650dSSadaf Ebrahimi
77*22dc650dSSadaf Ebrahimi /*************************************************
78*22dc650dSSadaf Ebrahimi * Print one character from a string *
79*22dc650dSSadaf Ebrahimi *************************************************/
80*22dc650dSSadaf Ebrahimi
81*22dc650dSSadaf Ebrahimi /* In UTF mode the character may occupy more than one code unit.
82*22dc650dSSadaf Ebrahimi
83*22dc650dSSadaf Ebrahimi Arguments:
84*22dc650dSSadaf Ebrahimi f file to write to
85*22dc650dSSadaf Ebrahimi ptr pointer to first code unit of the character
86*22dc650dSSadaf Ebrahimi utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87*22dc650dSSadaf Ebrahimi
88*22dc650dSSadaf Ebrahimi Returns: number of additional code units used
89*22dc650dSSadaf Ebrahimi */
90*22dc650dSSadaf Ebrahimi
91*22dc650dSSadaf Ebrahimi static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92*22dc650dSSadaf Ebrahimi print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93*22dc650dSSadaf Ebrahimi {
94*22dc650dSSadaf Ebrahimi uint32_t c = *ptr;
95*22dc650dSSadaf Ebrahimi BOOL one_code_unit = !utf;
96*22dc650dSSadaf Ebrahimi
97*22dc650dSSadaf Ebrahimi /* If UTF is supported and requested, check for a valid single code unit. */
98*22dc650dSSadaf Ebrahimi
99*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
100*22dc650dSSadaf Ebrahimi if (utf)
101*22dc650dSSadaf Ebrahimi {
102*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
103*22dc650dSSadaf Ebrahimi one_code_unit = c < 0x80;
104*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
105*22dc650dSSadaf Ebrahimi one_code_unit = (c & 0xfc00) != 0xd800;
106*22dc650dSSadaf Ebrahimi #else
107*22dc650dSSadaf Ebrahimi one_code_unit = (c & 0xfffff800u) != 0xd800u;
108*22dc650dSSadaf Ebrahimi #endif /* CODE_UNIT_WIDTH */
109*22dc650dSSadaf Ebrahimi }
110*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
111*22dc650dSSadaf Ebrahimi
112*22dc650dSSadaf Ebrahimi /* Handle a valid one-code-unit character at any width. */
113*22dc650dSSadaf Ebrahimi
114*22dc650dSSadaf Ebrahimi if (one_code_unit)
115*22dc650dSSadaf Ebrahimi {
116*22dc650dSSadaf Ebrahimi if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117*22dc650dSSadaf Ebrahimi else if (c < 0x80) fprintf(f, "\\x%02x", c);
118*22dc650dSSadaf Ebrahimi else fprintf(f, "\\x{%02x}", c);
119*22dc650dSSadaf Ebrahimi return 0;
120*22dc650dSSadaf Ebrahimi }
121*22dc650dSSadaf Ebrahimi
122*22dc650dSSadaf Ebrahimi /* Code for invalid UTF code units and multi-unit UTF characters is different
123*22dc650dSSadaf Ebrahimi for each width. If UTF is not supported, control should never get here, but we
124*22dc650dSSadaf Ebrahimi need a return statement to keep the compiler happy. */
125*22dc650dSSadaf Ebrahimi
126*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE
127*22dc650dSSadaf Ebrahimi return 0;
128*22dc650dSSadaf Ebrahimi #else
129*22dc650dSSadaf Ebrahimi
130*22dc650dSSadaf Ebrahimi /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131*22dc650dSSadaf Ebrahimi Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132*22dc650dSSadaf Ebrahimi \X instead of \x as an indication. */
133*22dc650dSSadaf Ebrahimi
134*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
135*22dc650dSSadaf Ebrahimi if ((c & 0xc0) != 0xc0)
136*22dc650dSSadaf Ebrahimi {
137*22dc650dSSadaf Ebrahimi fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138*22dc650dSSadaf Ebrahimi return 0;
139*22dc650dSSadaf Ebrahimi }
140*22dc650dSSadaf Ebrahimi else
141*22dc650dSSadaf Ebrahimi {
142*22dc650dSSadaf Ebrahimi int i;
143*22dc650dSSadaf Ebrahimi int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144*22dc650dSSadaf Ebrahimi int s = 6*a;
145*22dc650dSSadaf Ebrahimi c = (c & PRIV(utf8_table3)[a]) << s;
146*22dc650dSSadaf Ebrahimi for (i = 1; i <= a; i++)
147*22dc650dSSadaf Ebrahimi {
148*22dc650dSSadaf Ebrahimi if ((ptr[i] & 0xc0) != 0x80)
149*22dc650dSSadaf Ebrahimi {
150*22dc650dSSadaf Ebrahimi fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
151*22dc650dSSadaf Ebrahimi return i - 1;
152*22dc650dSSadaf Ebrahimi }
153*22dc650dSSadaf Ebrahimi s -= 6;
154*22dc650dSSadaf Ebrahimi c |= (ptr[i] & 0x3f) << s;
155*22dc650dSSadaf Ebrahimi }
156*22dc650dSSadaf Ebrahimi fprintf(f, "\\x{%x}", c);
157*22dc650dSSadaf Ebrahimi return a;
158*22dc650dSSadaf Ebrahimi }
159*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
160*22dc650dSSadaf Ebrahimi
161*22dc650dSSadaf Ebrahimi /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162*22dc650dSSadaf Ebrahimi Print it with \X instead of \x as an indication. */
163*22dc650dSSadaf Ebrahimi
164*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 16
165*22dc650dSSadaf Ebrahimi if ((ptr[1] & 0xfc00) != 0xdc00)
166*22dc650dSSadaf Ebrahimi {
167*22dc650dSSadaf Ebrahimi fprintf(f, "\\X{%x}", c);
168*22dc650dSSadaf Ebrahimi return 0;
169*22dc650dSSadaf Ebrahimi }
170*22dc650dSSadaf Ebrahimi c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171*22dc650dSSadaf Ebrahimi fprintf(f, "\\x{%x}", c);
172*22dc650dSSadaf Ebrahimi return 1;
173*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
174*22dc650dSSadaf Ebrahimi
175*22dc650dSSadaf Ebrahimi /* For UTF-32 we get here only for a malformed code unit, which should only
176*22dc650dSSadaf Ebrahimi occur if the sanity check has been turned off. Print it with \X instead of \x
177*22dc650dSSadaf Ebrahimi as an indication. */
178*22dc650dSSadaf Ebrahimi
179*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 32
180*22dc650dSSadaf Ebrahimi fprintf(f, "\\X{%x}", c);
181*22dc650dSSadaf Ebrahimi return 0;
182*22dc650dSSadaf Ebrahimi #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
184*22dc650dSSadaf Ebrahimi }
185*22dc650dSSadaf Ebrahimi
186*22dc650dSSadaf Ebrahimi
187*22dc650dSSadaf Ebrahimi
188*22dc650dSSadaf Ebrahimi /*************************************************
189*22dc650dSSadaf Ebrahimi * Print string as a list of code units *
190*22dc650dSSadaf Ebrahimi *************************************************/
191*22dc650dSSadaf Ebrahimi
192*22dc650dSSadaf Ebrahimi /* These take no account of UTF as they always print each individual code unit.
193*22dc650dSSadaf Ebrahimi The string is zero-terminated for print_custring(); the length is given for
194*22dc650dSSadaf Ebrahimi print_custring_bylen().
195*22dc650dSSadaf Ebrahimi
196*22dc650dSSadaf Ebrahimi Arguments:
197*22dc650dSSadaf Ebrahimi f file to write to
198*22dc650dSSadaf Ebrahimi ptr point to the string
199*22dc650dSSadaf Ebrahimi len length for print_custring_bylen()
200*22dc650dSSadaf Ebrahimi
201*22dc650dSSadaf Ebrahimi Returns: nothing
202*22dc650dSSadaf Ebrahimi */
203*22dc650dSSadaf Ebrahimi
204*22dc650dSSadaf Ebrahimi static void
print_custring(FILE * f,PCRE2_SPTR ptr)205*22dc650dSSadaf Ebrahimi print_custring(FILE *f, PCRE2_SPTR ptr)
206*22dc650dSSadaf Ebrahimi {
207*22dc650dSSadaf Ebrahimi while (*ptr != '\0')
208*22dc650dSSadaf Ebrahimi {
209*22dc650dSSadaf Ebrahimi uint32_t c = *ptr++;
210*22dc650dSSadaf Ebrahimi if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211*22dc650dSSadaf Ebrahimi }
212*22dc650dSSadaf Ebrahimi }
213*22dc650dSSadaf Ebrahimi
214*22dc650dSSadaf Ebrahimi static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215*22dc650dSSadaf Ebrahimi print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216*22dc650dSSadaf Ebrahimi {
217*22dc650dSSadaf Ebrahimi for (; len > 0; len--)
218*22dc650dSSadaf Ebrahimi {
219*22dc650dSSadaf Ebrahimi uint32_t c = *ptr++;
220*22dc650dSSadaf Ebrahimi if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221*22dc650dSSadaf Ebrahimi }
222*22dc650dSSadaf Ebrahimi }
223*22dc650dSSadaf Ebrahimi
224*22dc650dSSadaf Ebrahimi
225*22dc650dSSadaf Ebrahimi
226*22dc650dSSadaf Ebrahimi /*************************************************
227*22dc650dSSadaf Ebrahimi * Find Unicode property name *
228*22dc650dSSadaf Ebrahimi *************************************************/
229*22dc650dSSadaf Ebrahimi
230*22dc650dSSadaf Ebrahimi /* When there is no UTF/UCP support, the table of names does not exist. This
231*22dc650dSSadaf Ebrahimi function should not be called in such configurations, because a pattern that
232*22dc650dSSadaf Ebrahimi tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233*22dc650dSSadaf Ebrahimi into the main code, however, we just put one into this function.
234*22dc650dSSadaf Ebrahimi
235*22dc650dSSadaf Ebrahimi Now that the table contains both full names and their abbreviations, we do some
236*22dc650dSSadaf Ebrahimi fiddling to try to get the full name, which is either the longer of two found
237*22dc650dSSadaf Ebrahimi names, or a 3-character script name. */
238*22dc650dSSadaf Ebrahimi
239*22dc650dSSadaf Ebrahimi static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)240*22dc650dSSadaf Ebrahimi get_ucpname(unsigned int ptype, unsigned int pvalue)
241*22dc650dSSadaf Ebrahimi {
242*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
243*22dc650dSSadaf Ebrahimi int count = 0;
244*22dc650dSSadaf Ebrahimi const char *yield = "??";
245*22dc650dSSadaf Ebrahimi size_t len = 0;
246*22dc650dSSadaf Ebrahimi unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
247*22dc650dSSadaf Ebrahimi
248*22dc650dSSadaf Ebrahimi for (int i = PRIV(utt_size) - 1; i >= 0; i--)
249*22dc650dSSadaf Ebrahimi {
250*22dc650dSSadaf Ebrahimi const ucp_type_table *u = PRIV(utt) + i;
251*22dc650dSSadaf Ebrahimi
252*22dc650dSSadaf Ebrahimi if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
253*22dc650dSSadaf Ebrahimi {
254*22dc650dSSadaf Ebrahimi const char *s = PRIV(utt_names) + u->name_offset;
255*22dc650dSSadaf Ebrahimi size_t sl = strlen(s);
256*22dc650dSSadaf Ebrahimi
257*22dc650dSSadaf Ebrahimi if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
258*22dc650dSSadaf Ebrahimi {
259*22dc650dSSadaf Ebrahimi yield = s;
260*22dc650dSSadaf Ebrahimi break;
261*22dc650dSSadaf Ebrahimi }
262*22dc650dSSadaf Ebrahimi
263*22dc650dSSadaf Ebrahimi if (sl > len)
264*22dc650dSSadaf Ebrahimi {
265*22dc650dSSadaf Ebrahimi yield = s;
266*22dc650dSSadaf Ebrahimi len = sl;
267*22dc650dSSadaf Ebrahimi }
268*22dc650dSSadaf Ebrahimi
269*22dc650dSSadaf Ebrahimi if (++count >= 2) break;
270*22dc650dSSadaf Ebrahimi }
271*22dc650dSSadaf Ebrahimi }
272*22dc650dSSadaf Ebrahimi
273*22dc650dSSadaf Ebrahimi return yield;
274*22dc650dSSadaf Ebrahimi
275*22dc650dSSadaf Ebrahimi #else /* No UTF support */
276*22dc650dSSadaf Ebrahimi (void)ptype;
277*22dc650dSSadaf Ebrahimi (void)pvalue;
278*22dc650dSSadaf Ebrahimi return "??";
279*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
280*22dc650dSSadaf Ebrahimi }
281*22dc650dSSadaf Ebrahimi
282*22dc650dSSadaf Ebrahimi
283*22dc650dSSadaf Ebrahimi
284*22dc650dSSadaf Ebrahimi /*************************************************
285*22dc650dSSadaf Ebrahimi * Print Unicode property value *
286*22dc650dSSadaf Ebrahimi *************************************************/
287*22dc650dSSadaf Ebrahimi
288*22dc650dSSadaf Ebrahimi /* "Normal" properties can be printed from tables. The PT_CLIST property is a
289*22dc650dSSadaf Ebrahimi pseudo-property that contains a pointer to a list of case-equivalent
290*22dc650dSSadaf Ebrahimi characters.
291*22dc650dSSadaf Ebrahimi
292*22dc650dSSadaf Ebrahimi Arguments:
293*22dc650dSSadaf Ebrahimi f file to write to
294*22dc650dSSadaf Ebrahimi code pointer in the compiled code
295*22dc650dSSadaf Ebrahimi before text to print before
296*22dc650dSSadaf Ebrahimi after text to print after
297*22dc650dSSadaf Ebrahimi
298*22dc650dSSadaf Ebrahimi Returns: nothing
299*22dc650dSSadaf Ebrahimi */
300*22dc650dSSadaf Ebrahimi
301*22dc650dSSadaf Ebrahimi static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)302*22dc650dSSadaf Ebrahimi print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
303*22dc650dSSadaf Ebrahimi {
304*22dc650dSSadaf Ebrahimi if (code[1] != PT_CLIST)
305*22dc650dSSadaf Ebrahimi {
306*22dc650dSSadaf Ebrahimi const char *sc = (code[1] == PT_SC)? "script:" : "";
307*22dc650dSSadaf Ebrahimi const char *s = get_ucpname(code[1], code[2]);
308*22dc650dSSadaf Ebrahimi fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
309*22dc650dSSadaf Ebrahimi }
310*22dc650dSSadaf Ebrahimi else
311*22dc650dSSadaf Ebrahimi {
312*22dc650dSSadaf Ebrahimi const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
313*22dc650dSSadaf Ebrahimi fprintf (f, "%s%sclist", before, (*code == OP_PROP)? "" : "not ");
314*22dc650dSSadaf Ebrahimi while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
315*22dc650dSSadaf Ebrahimi fprintf(f, "%s", after);
316*22dc650dSSadaf Ebrahimi }
317*22dc650dSSadaf Ebrahimi }
318*22dc650dSSadaf Ebrahimi
319*22dc650dSSadaf Ebrahimi
320*22dc650dSSadaf Ebrahimi
321*22dc650dSSadaf Ebrahimi /*************************************************
322*22dc650dSSadaf Ebrahimi * Print compiled pattern *
323*22dc650dSSadaf Ebrahimi *************************************************/
324*22dc650dSSadaf Ebrahimi
325*22dc650dSSadaf Ebrahimi /* The print_lengths flag controls whether offsets and lengths of items are
326*22dc650dSSadaf Ebrahimi printed. Lenths can be turned off from pcre2test so that automatic tests on
327*22dc650dSSadaf Ebrahimi bytecode can be written that do not depend on the value of LINK_SIZE.
328*22dc650dSSadaf Ebrahimi
329*22dc650dSSadaf Ebrahimi Arguments:
330*22dc650dSSadaf Ebrahimi re a compiled pattern
331*22dc650dSSadaf Ebrahimi f the file to write to
332*22dc650dSSadaf Ebrahimi print_lengths show various lengths
333*22dc650dSSadaf Ebrahimi
334*22dc650dSSadaf Ebrahimi Returns: nothing
335*22dc650dSSadaf Ebrahimi */
336*22dc650dSSadaf Ebrahimi
337*22dc650dSSadaf Ebrahimi static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)338*22dc650dSSadaf Ebrahimi pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
339*22dc650dSSadaf Ebrahimi {
340*22dc650dSSadaf Ebrahimi PCRE2_SPTR codestart, nametable, code;
341*22dc650dSSadaf Ebrahimi uint32_t nesize = re->name_entry_size;
342*22dc650dSSadaf Ebrahimi BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
343*22dc650dSSadaf Ebrahimi
344*22dc650dSSadaf Ebrahimi nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
345*22dc650dSSadaf Ebrahimi code = codestart = nametable + re->name_count * re->name_entry_size;
346*22dc650dSSadaf Ebrahimi
347*22dc650dSSadaf Ebrahimi for(;;)
348*22dc650dSSadaf Ebrahimi {
349*22dc650dSSadaf Ebrahimi PCRE2_SPTR ccode;
350*22dc650dSSadaf Ebrahimi uint32_t c;
351*22dc650dSSadaf Ebrahimi int i;
352*22dc650dSSadaf Ebrahimi const char *flag = " ";
353*22dc650dSSadaf Ebrahimi unsigned int extra = 0;
354*22dc650dSSadaf Ebrahimi
355*22dc650dSSadaf Ebrahimi if (print_lengths)
356*22dc650dSSadaf Ebrahimi fprintf(f, "%3d ", (int)(code - codestart));
357*22dc650dSSadaf Ebrahimi else
358*22dc650dSSadaf Ebrahimi fprintf(f, " ");
359*22dc650dSSadaf Ebrahimi
360*22dc650dSSadaf Ebrahimi switch(*code)
361*22dc650dSSadaf Ebrahimi {
362*22dc650dSSadaf Ebrahimi /* ========================================================================== */
363*22dc650dSSadaf Ebrahimi /* These cases are never obeyed. This is a fudge that causes a compile-
364*22dc650dSSadaf Ebrahimi time error if the vectors OP_names or OP_lengths, which are indexed
365*22dc650dSSadaf Ebrahimi by opcode, are not the correct length. It seems to be the only way to do
366*22dc650dSSadaf Ebrahimi such a check at compile time, as the sizeof() operator does not work in
367*22dc650dSSadaf Ebrahimi the C preprocessor. */
368*22dc650dSSadaf Ebrahimi
369*22dc650dSSadaf Ebrahimi case OP_TABLE_LENGTH:
370*22dc650dSSadaf Ebrahimi case OP_TABLE_LENGTH +
371*22dc650dSSadaf Ebrahimi ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
372*22dc650dSSadaf Ebrahimi (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
373*22dc650dSSadaf Ebrahimi return;
374*22dc650dSSadaf Ebrahimi /* ========================================================================== */
375*22dc650dSSadaf Ebrahimi
376*22dc650dSSadaf Ebrahimi case OP_END:
377*22dc650dSSadaf Ebrahimi fprintf(f, " %s\n", OP_names[*code]);
378*22dc650dSSadaf Ebrahimi fprintf(f, "------------------------------------------------------------------\n");
379*22dc650dSSadaf Ebrahimi return;
380*22dc650dSSadaf Ebrahimi
381*22dc650dSSadaf Ebrahimi case OP_CHAR:
382*22dc650dSSadaf Ebrahimi fprintf(f, " ");
383*22dc650dSSadaf Ebrahimi do
384*22dc650dSSadaf Ebrahimi {
385*22dc650dSSadaf Ebrahimi code++;
386*22dc650dSSadaf Ebrahimi code += 1 + print_char(f, code, utf);
387*22dc650dSSadaf Ebrahimi }
388*22dc650dSSadaf Ebrahimi while (*code == OP_CHAR);
389*22dc650dSSadaf Ebrahimi fprintf(f, "\n");
390*22dc650dSSadaf Ebrahimi continue;
391*22dc650dSSadaf Ebrahimi
392*22dc650dSSadaf Ebrahimi case OP_CHARI:
393*22dc650dSSadaf Ebrahimi fprintf(f, " /i ");
394*22dc650dSSadaf Ebrahimi do
395*22dc650dSSadaf Ebrahimi {
396*22dc650dSSadaf Ebrahimi code++;
397*22dc650dSSadaf Ebrahimi code += 1 + print_char(f, code, utf);
398*22dc650dSSadaf Ebrahimi }
399*22dc650dSSadaf Ebrahimi while (*code == OP_CHARI);
400*22dc650dSSadaf Ebrahimi fprintf(f, "\n");
401*22dc650dSSadaf Ebrahimi continue;
402*22dc650dSSadaf Ebrahimi
403*22dc650dSSadaf Ebrahimi case OP_CBRA:
404*22dc650dSSadaf Ebrahimi case OP_CBRAPOS:
405*22dc650dSSadaf Ebrahimi case OP_SCBRA:
406*22dc650dSSadaf Ebrahimi case OP_SCBRAPOS:
407*22dc650dSSadaf Ebrahimi if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
408*22dc650dSSadaf Ebrahimi else fprintf(f, " ");
409*22dc650dSSadaf Ebrahimi fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
410*22dc650dSSadaf Ebrahimi break;
411*22dc650dSSadaf Ebrahimi
412*22dc650dSSadaf Ebrahimi case OP_BRA:
413*22dc650dSSadaf Ebrahimi case OP_BRAPOS:
414*22dc650dSSadaf Ebrahimi case OP_SBRA:
415*22dc650dSSadaf Ebrahimi case OP_SBRAPOS:
416*22dc650dSSadaf Ebrahimi case OP_KETRMAX:
417*22dc650dSSadaf Ebrahimi case OP_KETRMIN:
418*22dc650dSSadaf Ebrahimi case OP_KETRPOS:
419*22dc650dSSadaf Ebrahimi case OP_ALT:
420*22dc650dSSadaf Ebrahimi case OP_KET:
421*22dc650dSSadaf Ebrahimi case OP_ASSERT:
422*22dc650dSSadaf Ebrahimi case OP_ASSERT_NOT:
423*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK:
424*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NOT:
425*22dc650dSSadaf Ebrahimi case OP_ASSERT_NA:
426*22dc650dSSadaf Ebrahimi case OP_ASSERTBACK_NA:
427*22dc650dSSadaf Ebrahimi case OP_ONCE:
428*22dc650dSSadaf Ebrahimi case OP_SCRIPT_RUN:
429*22dc650dSSadaf Ebrahimi case OP_COND:
430*22dc650dSSadaf Ebrahimi case OP_SCOND:
431*22dc650dSSadaf Ebrahimi if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
432*22dc650dSSadaf Ebrahimi else fprintf(f, " ");
433*22dc650dSSadaf Ebrahimi fprintf(f, "%s", OP_names[*code]);
434*22dc650dSSadaf Ebrahimi break;
435*22dc650dSSadaf Ebrahimi
436*22dc650dSSadaf Ebrahimi case OP_REVERSE:
437*22dc650dSSadaf Ebrahimi if (print_lengths) fprintf(f, "%3d ", GET2(code, 1));
438*22dc650dSSadaf Ebrahimi else fprintf(f, " ");
439*22dc650dSSadaf Ebrahimi fprintf(f, "%s", OP_names[*code]);
440*22dc650dSSadaf Ebrahimi break;
441*22dc650dSSadaf Ebrahimi
442*22dc650dSSadaf Ebrahimi case OP_VREVERSE:
443*22dc650dSSadaf Ebrahimi if (print_lengths) fprintf(f, "%3d %d ", GET2(code, 1),
444*22dc650dSSadaf Ebrahimi GET2(code, 1 + IMM2_SIZE));
445*22dc650dSSadaf Ebrahimi else fprintf(f, " ");
446*22dc650dSSadaf Ebrahimi fprintf(f, "%s", OP_names[*code]);
447*22dc650dSSadaf Ebrahimi break;
448*22dc650dSSadaf Ebrahimi
449*22dc650dSSadaf Ebrahimi case OP_CLOSE:
450*22dc650dSSadaf Ebrahimi fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
451*22dc650dSSadaf Ebrahimi break;
452*22dc650dSSadaf Ebrahimi
453*22dc650dSSadaf Ebrahimi case OP_CREF:
454*22dc650dSSadaf Ebrahimi fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
455*22dc650dSSadaf Ebrahimi break;
456*22dc650dSSadaf Ebrahimi
457*22dc650dSSadaf Ebrahimi case OP_DNCREF:
458*22dc650dSSadaf Ebrahimi {
459*22dc650dSSadaf Ebrahimi PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
460*22dc650dSSadaf Ebrahimi fprintf(f, " %s Cond ref <", flag);
461*22dc650dSSadaf Ebrahimi print_custring(f, entry);
462*22dc650dSSadaf Ebrahimi fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
463*22dc650dSSadaf Ebrahimi }
464*22dc650dSSadaf Ebrahimi break;
465*22dc650dSSadaf Ebrahimi
466*22dc650dSSadaf Ebrahimi case OP_RREF:
467*22dc650dSSadaf Ebrahimi c = GET2(code, 1);
468*22dc650dSSadaf Ebrahimi if (c == RREF_ANY)
469*22dc650dSSadaf Ebrahimi fprintf(f, " Cond recurse any");
470*22dc650dSSadaf Ebrahimi else
471*22dc650dSSadaf Ebrahimi fprintf(f, " Cond recurse %d", c);
472*22dc650dSSadaf Ebrahimi break;
473*22dc650dSSadaf Ebrahimi
474*22dc650dSSadaf Ebrahimi case OP_DNRREF:
475*22dc650dSSadaf Ebrahimi {
476*22dc650dSSadaf Ebrahimi PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
477*22dc650dSSadaf Ebrahimi fprintf(f, " %s Cond recurse <", flag);
478*22dc650dSSadaf Ebrahimi print_custring(f, entry);
479*22dc650dSSadaf Ebrahimi fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
480*22dc650dSSadaf Ebrahimi }
481*22dc650dSSadaf Ebrahimi break;
482*22dc650dSSadaf Ebrahimi
483*22dc650dSSadaf Ebrahimi case OP_FALSE:
484*22dc650dSSadaf Ebrahimi fprintf(f, " Cond false");
485*22dc650dSSadaf Ebrahimi break;
486*22dc650dSSadaf Ebrahimi
487*22dc650dSSadaf Ebrahimi case OP_TRUE:
488*22dc650dSSadaf Ebrahimi fprintf(f, " Cond true");
489*22dc650dSSadaf Ebrahimi break;
490*22dc650dSSadaf Ebrahimi
491*22dc650dSSadaf Ebrahimi case OP_STARI:
492*22dc650dSSadaf Ebrahimi case OP_MINSTARI:
493*22dc650dSSadaf Ebrahimi case OP_POSSTARI:
494*22dc650dSSadaf Ebrahimi case OP_PLUSI:
495*22dc650dSSadaf Ebrahimi case OP_MINPLUSI:
496*22dc650dSSadaf Ebrahimi case OP_POSPLUSI:
497*22dc650dSSadaf Ebrahimi case OP_QUERYI:
498*22dc650dSSadaf Ebrahimi case OP_MINQUERYI:
499*22dc650dSSadaf Ebrahimi case OP_POSQUERYI:
500*22dc650dSSadaf Ebrahimi flag = "/i";
501*22dc650dSSadaf Ebrahimi /* Fall through */
502*22dc650dSSadaf Ebrahimi case OP_STAR:
503*22dc650dSSadaf Ebrahimi case OP_MINSTAR:
504*22dc650dSSadaf Ebrahimi case OP_POSSTAR:
505*22dc650dSSadaf Ebrahimi case OP_PLUS:
506*22dc650dSSadaf Ebrahimi case OP_MINPLUS:
507*22dc650dSSadaf Ebrahimi case OP_POSPLUS:
508*22dc650dSSadaf Ebrahimi case OP_QUERY:
509*22dc650dSSadaf Ebrahimi case OP_MINQUERY:
510*22dc650dSSadaf Ebrahimi case OP_POSQUERY:
511*22dc650dSSadaf Ebrahimi case OP_TYPESTAR:
512*22dc650dSSadaf Ebrahimi case OP_TYPEMINSTAR:
513*22dc650dSSadaf Ebrahimi case OP_TYPEPOSSTAR:
514*22dc650dSSadaf Ebrahimi case OP_TYPEPLUS:
515*22dc650dSSadaf Ebrahimi case OP_TYPEMINPLUS:
516*22dc650dSSadaf Ebrahimi case OP_TYPEPOSPLUS:
517*22dc650dSSadaf Ebrahimi case OP_TYPEQUERY:
518*22dc650dSSadaf Ebrahimi case OP_TYPEMINQUERY:
519*22dc650dSSadaf Ebrahimi case OP_TYPEPOSQUERY:
520*22dc650dSSadaf Ebrahimi fprintf(f, " %s ", flag);
521*22dc650dSSadaf Ebrahimi
522*22dc650dSSadaf Ebrahimi if (*code >= OP_TYPESTAR)
523*22dc650dSSadaf Ebrahimi {
524*22dc650dSSadaf Ebrahimi if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
525*22dc650dSSadaf Ebrahimi {
526*22dc650dSSadaf Ebrahimi print_prop(f, code + 1, "", " ");
527*22dc650dSSadaf Ebrahimi extra = 2;
528*22dc650dSSadaf Ebrahimi }
529*22dc650dSSadaf Ebrahimi else fprintf(f, "%s", OP_names[code[1]]);
530*22dc650dSSadaf Ebrahimi }
531*22dc650dSSadaf Ebrahimi else extra = print_char(f, code+1, utf);
532*22dc650dSSadaf Ebrahimi fprintf(f, "%s", OP_names[*code]);
533*22dc650dSSadaf Ebrahimi break;
534*22dc650dSSadaf Ebrahimi
535*22dc650dSSadaf Ebrahimi case OP_EXACTI:
536*22dc650dSSadaf Ebrahimi case OP_UPTOI:
537*22dc650dSSadaf Ebrahimi case OP_MINUPTOI:
538*22dc650dSSadaf Ebrahimi case OP_POSUPTOI:
539*22dc650dSSadaf Ebrahimi flag = "/i";
540*22dc650dSSadaf Ebrahimi /* Fall through */
541*22dc650dSSadaf Ebrahimi case OP_EXACT:
542*22dc650dSSadaf Ebrahimi case OP_UPTO:
543*22dc650dSSadaf Ebrahimi case OP_MINUPTO:
544*22dc650dSSadaf Ebrahimi case OP_POSUPTO:
545*22dc650dSSadaf Ebrahimi fprintf(f, " %s ", flag);
546*22dc650dSSadaf Ebrahimi extra = print_char(f, code + 1 + IMM2_SIZE, utf);
547*22dc650dSSadaf Ebrahimi fprintf(f, "{");
548*22dc650dSSadaf Ebrahimi if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
549*22dc650dSSadaf Ebrahimi fprintf(f, "%d}", GET2(code,1));
550*22dc650dSSadaf Ebrahimi if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
551*22dc650dSSadaf Ebrahimi else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
552*22dc650dSSadaf Ebrahimi break;
553*22dc650dSSadaf Ebrahimi
554*22dc650dSSadaf Ebrahimi case OP_TYPEEXACT:
555*22dc650dSSadaf Ebrahimi case OP_TYPEUPTO:
556*22dc650dSSadaf Ebrahimi case OP_TYPEMINUPTO:
557*22dc650dSSadaf Ebrahimi case OP_TYPEPOSUPTO:
558*22dc650dSSadaf Ebrahimi if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
559*22dc650dSSadaf Ebrahimi {
560*22dc650dSSadaf Ebrahimi print_prop(f, code + IMM2_SIZE + 1, " ", " ");
561*22dc650dSSadaf Ebrahimi extra = 2;
562*22dc650dSSadaf Ebrahimi }
563*22dc650dSSadaf Ebrahimi else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
564*22dc650dSSadaf Ebrahimi fprintf(f, "{");
565*22dc650dSSadaf Ebrahimi if (*code != OP_TYPEEXACT) fprintf(f, "0,");
566*22dc650dSSadaf Ebrahimi fprintf(f, "%d}", GET2(code,1));
567*22dc650dSSadaf Ebrahimi if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
568*22dc650dSSadaf Ebrahimi else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
569*22dc650dSSadaf Ebrahimi break;
570*22dc650dSSadaf Ebrahimi
571*22dc650dSSadaf Ebrahimi case OP_NOTI:
572*22dc650dSSadaf Ebrahimi flag = "/i";
573*22dc650dSSadaf Ebrahimi /* Fall through */
574*22dc650dSSadaf Ebrahimi case OP_NOT:
575*22dc650dSSadaf Ebrahimi fprintf(f, " %s [^", flag);
576*22dc650dSSadaf Ebrahimi extra = print_char(f, code + 1, utf);
577*22dc650dSSadaf Ebrahimi fprintf(f, "]");
578*22dc650dSSadaf Ebrahimi break;
579*22dc650dSSadaf Ebrahimi
580*22dc650dSSadaf Ebrahimi case OP_NOTSTARI:
581*22dc650dSSadaf Ebrahimi case OP_NOTMINSTARI:
582*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTARI:
583*22dc650dSSadaf Ebrahimi case OP_NOTPLUSI:
584*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUSI:
585*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUSI:
586*22dc650dSSadaf Ebrahimi case OP_NOTQUERYI:
587*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERYI:
588*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERYI:
589*22dc650dSSadaf Ebrahimi flag = "/i";
590*22dc650dSSadaf Ebrahimi /* Fall through */
591*22dc650dSSadaf Ebrahimi
592*22dc650dSSadaf Ebrahimi case OP_NOTSTAR:
593*22dc650dSSadaf Ebrahimi case OP_NOTMINSTAR:
594*22dc650dSSadaf Ebrahimi case OP_NOTPOSSTAR:
595*22dc650dSSadaf Ebrahimi case OP_NOTPLUS:
596*22dc650dSSadaf Ebrahimi case OP_NOTMINPLUS:
597*22dc650dSSadaf Ebrahimi case OP_NOTPOSPLUS:
598*22dc650dSSadaf Ebrahimi case OP_NOTQUERY:
599*22dc650dSSadaf Ebrahimi case OP_NOTMINQUERY:
600*22dc650dSSadaf Ebrahimi case OP_NOTPOSQUERY:
601*22dc650dSSadaf Ebrahimi fprintf(f, " %s [^", flag);
602*22dc650dSSadaf Ebrahimi extra = print_char(f, code + 1, utf);
603*22dc650dSSadaf Ebrahimi fprintf(f, "]%s", OP_names[*code]);
604*22dc650dSSadaf Ebrahimi break;
605*22dc650dSSadaf Ebrahimi
606*22dc650dSSadaf Ebrahimi case OP_NOTEXACTI:
607*22dc650dSSadaf Ebrahimi case OP_NOTUPTOI:
608*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTOI:
609*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTOI:
610*22dc650dSSadaf Ebrahimi flag = "/i";
611*22dc650dSSadaf Ebrahimi /* Fall through */
612*22dc650dSSadaf Ebrahimi
613*22dc650dSSadaf Ebrahimi case OP_NOTEXACT:
614*22dc650dSSadaf Ebrahimi case OP_NOTUPTO:
615*22dc650dSSadaf Ebrahimi case OP_NOTMINUPTO:
616*22dc650dSSadaf Ebrahimi case OP_NOTPOSUPTO:
617*22dc650dSSadaf Ebrahimi fprintf(f, " %s [^", flag);
618*22dc650dSSadaf Ebrahimi extra = print_char(f, code + 1 + IMM2_SIZE, utf);
619*22dc650dSSadaf Ebrahimi fprintf(f, "]{");
620*22dc650dSSadaf Ebrahimi if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
621*22dc650dSSadaf Ebrahimi fprintf(f, "%d}", GET2(code,1));
622*22dc650dSSadaf Ebrahimi if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
623*22dc650dSSadaf Ebrahimi else
624*22dc650dSSadaf Ebrahimi if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
625*22dc650dSSadaf Ebrahimi break;
626*22dc650dSSadaf Ebrahimi
627*22dc650dSSadaf Ebrahimi case OP_RECURSE:
628*22dc650dSSadaf Ebrahimi if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
629*22dc650dSSadaf Ebrahimi else fprintf(f, " ");
630*22dc650dSSadaf Ebrahimi fprintf(f, "%s", OP_names[*code]);
631*22dc650dSSadaf Ebrahimi break;
632*22dc650dSSadaf Ebrahimi
633*22dc650dSSadaf Ebrahimi case OP_REFI:
634*22dc650dSSadaf Ebrahimi flag = "/i";
635*22dc650dSSadaf Ebrahimi /* Fall through */
636*22dc650dSSadaf Ebrahimi case OP_REF:
637*22dc650dSSadaf Ebrahimi fprintf(f, " %s \\%d", flag, GET2(code,1));
638*22dc650dSSadaf Ebrahimi ccode = code + OP_lengths[*code];
639*22dc650dSSadaf Ebrahimi goto CLASS_REF_REPEAT;
640*22dc650dSSadaf Ebrahimi
641*22dc650dSSadaf Ebrahimi case OP_DNREFI:
642*22dc650dSSadaf Ebrahimi flag = "/i";
643*22dc650dSSadaf Ebrahimi /* Fall through */
644*22dc650dSSadaf Ebrahimi case OP_DNREF:
645*22dc650dSSadaf Ebrahimi {
646*22dc650dSSadaf Ebrahimi PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
647*22dc650dSSadaf Ebrahimi fprintf(f, " %s \\k<", flag);
648*22dc650dSSadaf Ebrahimi print_custring(f, entry);
649*22dc650dSSadaf Ebrahimi fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
650*22dc650dSSadaf Ebrahimi }
651*22dc650dSSadaf Ebrahimi ccode = code + OP_lengths[*code];
652*22dc650dSSadaf Ebrahimi goto CLASS_REF_REPEAT;
653*22dc650dSSadaf Ebrahimi
654*22dc650dSSadaf Ebrahimi case OP_CALLOUT:
655*22dc650dSSadaf Ebrahimi fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
656*22dc650dSSadaf Ebrahimi GET(code, 1), GET(code, 1 + LINK_SIZE));
657*22dc650dSSadaf Ebrahimi break;
658*22dc650dSSadaf Ebrahimi
659*22dc650dSSadaf Ebrahimi case OP_CALLOUT_STR:
660*22dc650dSSadaf Ebrahimi c = code[1 + 4*LINK_SIZE];
661*22dc650dSSadaf Ebrahimi fprintf(f, " %s %c", OP_names[*code], c);
662*22dc650dSSadaf Ebrahimi extra = GET(code, 1 + 2*LINK_SIZE);
663*22dc650dSSadaf Ebrahimi print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
664*22dc650dSSadaf Ebrahimi for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
665*22dc650dSSadaf Ebrahimi if (c == PRIV(callout_start_delims)[i])
666*22dc650dSSadaf Ebrahimi {
667*22dc650dSSadaf Ebrahimi c = PRIV(callout_end_delims)[i];
668*22dc650dSSadaf Ebrahimi break;
669*22dc650dSSadaf Ebrahimi }
670*22dc650dSSadaf Ebrahimi fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
671*22dc650dSSadaf Ebrahimi GET(code, 1 + LINK_SIZE));
672*22dc650dSSadaf Ebrahimi break;
673*22dc650dSSadaf Ebrahimi
674*22dc650dSSadaf Ebrahimi case OP_PROP:
675*22dc650dSSadaf Ebrahimi case OP_NOTPROP:
676*22dc650dSSadaf Ebrahimi print_prop(f, code, " ", "");
677*22dc650dSSadaf Ebrahimi break;
678*22dc650dSSadaf Ebrahimi
679*22dc650dSSadaf Ebrahimi /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
680*22dc650dSSadaf Ebrahimi in having this code always here, and it makes it less messy without all
681*22dc650dSSadaf Ebrahimi those #ifdefs. */
682*22dc650dSSadaf Ebrahimi
683*22dc650dSSadaf Ebrahimi case OP_CLASS:
684*22dc650dSSadaf Ebrahimi case OP_NCLASS:
685*22dc650dSSadaf Ebrahimi case OP_XCLASS:
686*22dc650dSSadaf Ebrahimi {
687*22dc650dSSadaf Ebrahimi BOOL printmap, invertmap;
688*22dc650dSSadaf Ebrahimi
689*22dc650dSSadaf Ebrahimi fprintf(f, " [");
690*22dc650dSSadaf Ebrahimi
691*22dc650dSSadaf Ebrahimi /* Negative XCLASS has an inverted map whereas the original opcodes have
692*22dc650dSSadaf Ebrahimi already done the inversion. */
693*22dc650dSSadaf Ebrahimi
694*22dc650dSSadaf Ebrahimi invertmap = FALSE;
695*22dc650dSSadaf Ebrahimi if (*code == OP_XCLASS)
696*22dc650dSSadaf Ebrahimi {
697*22dc650dSSadaf Ebrahimi extra = GET(code, 1);
698*22dc650dSSadaf Ebrahimi ccode = code + LINK_SIZE + 1;
699*22dc650dSSadaf Ebrahimi printmap = (*ccode & XCL_MAP) != 0;
700*22dc650dSSadaf Ebrahimi if ((*ccode & XCL_NOT) != 0)
701*22dc650dSSadaf Ebrahimi {
702*22dc650dSSadaf Ebrahimi invertmap = (*ccode & XCL_HASPROP) == 0;
703*22dc650dSSadaf Ebrahimi fprintf(f, "^");
704*22dc650dSSadaf Ebrahimi }
705*22dc650dSSadaf Ebrahimi ccode++;
706*22dc650dSSadaf Ebrahimi }
707*22dc650dSSadaf Ebrahimi else /* CLASS or NCLASS */
708*22dc650dSSadaf Ebrahimi {
709*22dc650dSSadaf Ebrahimi printmap = TRUE;
710*22dc650dSSadaf Ebrahimi ccode = code + 1;
711*22dc650dSSadaf Ebrahimi }
712*22dc650dSSadaf Ebrahimi
713*22dc650dSSadaf Ebrahimi /* Print a bit map */
714*22dc650dSSadaf Ebrahimi
715*22dc650dSSadaf Ebrahimi if (printmap)
716*22dc650dSSadaf Ebrahimi {
717*22dc650dSSadaf Ebrahimi uint8_t inverted_map[32];
718*22dc650dSSadaf Ebrahimi uint8_t *map = (uint8_t *)ccode;
719*22dc650dSSadaf Ebrahimi
720*22dc650dSSadaf Ebrahimi if (invertmap)
721*22dc650dSSadaf Ebrahimi {
722*22dc650dSSadaf Ebrahimi /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
723*22dc650dSSadaf Ebrahimi for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
724*22dc650dSSadaf Ebrahimi map = inverted_map;
725*22dc650dSSadaf Ebrahimi }
726*22dc650dSSadaf Ebrahimi
727*22dc650dSSadaf Ebrahimi for (i = 0; i < 256; i++)
728*22dc650dSSadaf Ebrahimi {
729*22dc650dSSadaf Ebrahimi if ((map[i/8] & (1u << (i&7))) != 0)
730*22dc650dSSadaf Ebrahimi {
731*22dc650dSSadaf Ebrahimi int j;
732*22dc650dSSadaf Ebrahimi for (j = i+1; j < 256; j++)
733*22dc650dSSadaf Ebrahimi if ((map[j/8] & (1u << (j&7))) == 0) break;
734*22dc650dSSadaf Ebrahimi if (i == '-' || i == ']') fprintf(f, "\\");
735*22dc650dSSadaf Ebrahimi if (PRINTABLE(i)) fprintf(f, "%c", i);
736*22dc650dSSadaf Ebrahimi else fprintf(f, "\\x%02x", i);
737*22dc650dSSadaf Ebrahimi if (--j > i)
738*22dc650dSSadaf Ebrahimi {
739*22dc650dSSadaf Ebrahimi if (j != i + 1) fprintf(f, "-");
740*22dc650dSSadaf Ebrahimi if (j == '-' || j == ']') fprintf(f, "\\");
741*22dc650dSSadaf Ebrahimi if (PRINTABLE(j)) fprintf(f, "%c", j);
742*22dc650dSSadaf Ebrahimi else fprintf(f, "\\x%02x", j);
743*22dc650dSSadaf Ebrahimi }
744*22dc650dSSadaf Ebrahimi i = j;
745*22dc650dSSadaf Ebrahimi }
746*22dc650dSSadaf Ebrahimi }
747*22dc650dSSadaf Ebrahimi ccode += 32 / sizeof(PCRE2_UCHAR);
748*22dc650dSSadaf Ebrahimi }
749*22dc650dSSadaf Ebrahimi }
750*22dc650dSSadaf Ebrahimi
751*22dc650dSSadaf Ebrahimi /* For an XCLASS there is always some additional data */
752*22dc650dSSadaf Ebrahimi
753*22dc650dSSadaf Ebrahimi if (*code == OP_XCLASS)
754*22dc650dSSadaf Ebrahimi {
755*22dc650dSSadaf Ebrahimi PCRE2_UCHAR ch;
756*22dc650dSSadaf Ebrahimi while ((ch = *ccode++) != XCL_END)
757*22dc650dSSadaf Ebrahimi {
758*22dc650dSSadaf Ebrahimi const char *notch = "";
759*22dc650dSSadaf Ebrahimi
760*22dc650dSSadaf Ebrahimi switch(ch)
761*22dc650dSSadaf Ebrahimi {
762*22dc650dSSadaf Ebrahimi case XCL_NOTPROP:
763*22dc650dSSadaf Ebrahimi notch = "^";
764*22dc650dSSadaf Ebrahimi /* Fall through */
765*22dc650dSSadaf Ebrahimi
766*22dc650dSSadaf Ebrahimi case XCL_PROP:
767*22dc650dSSadaf Ebrahimi {
768*22dc650dSSadaf Ebrahimi unsigned int ptype = *ccode++;
769*22dc650dSSadaf Ebrahimi unsigned int pvalue = *ccode++;
770*22dc650dSSadaf Ebrahimi const char *s;
771*22dc650dSSadaf Ebrahimi
772*22dc650dSSadaf Ebrahimi switch(ptype)
773*22dc650dSSadaf Ebrahimi {
774*22dc650dSSadaf Ebrahimi case PT_PXGRAPH:
775*22dc650dSSadaf Ebrahimi fprintf(f, "[:%sgraph:]", notch);
776*22dc650dSSadaf Ebrahimi break;
777*22dc650dSSadaf Ebrahimi
778*22dc650dSSadaf Ebrahimi case PT_PXPRINT:
779*22dc650dSSadaf Ebrahimi fprintf(f, "[:%sprint:]", notch);
780*22dc650dSSadaf Ebrahimi break;
781*22dc650dSSadaf Ebrahimi
782*22dc650dSSadaf Ebrahimi case PT_PXPUNCT:
783*22dc650dSSadaf Ebrahimi fprintf(f, "[:%spunct:]", notch);
784*22dc650dSSadaf Ebrahimi break;
785*22dc650dSSadaf Ebrahimi
786*22dc650dSSadaf Ebrahimi case PT_PXXDIGIT:
787*22dc650dSSadaf Ebrahimi fprintf(f, "[:%sxdigit:]", notch);
788*22dc650dSSadaf Ebrahimi break;
789*22dc650dSSadaf Ebrahimi
790*22dc650dSSadaf Ebrahimi default:
791*22dc650dSSadaf Ebrahimi s = get_ucpname(ptype, pvalue);
792*22dc650dSSadaf Ebrahimi fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'),
793*22dc650dSSadaf Ebrahimi toupper(s[0]), s+1);
794*22dc650dSSadaf Ebrahimi break;
795*22dc650dSSadaf Ebrahimi }
796*22dc650dSSadaf Ebrahimi }
797*22dc650dSSadaf Ebrahimi break;
798*22dc650dSSadaf Ebrahimi
799*22dc650dSSadaf Ebrahimi default:
800*22dc650dSSadaf Ebrahimi ccode += 1 + print_char(f, ccode, utf);
801*22dc650dSSadaf Ebrahimi if (ch == XCL_RANGE)
802*22dc650dSSadaf Ebrahimi {
803*22dc650dSSadaf Ebrahimi fprintf(f, "-");
804*22dc650dSSadaf Ebrahimi ccode += 1 + print_char(f, ccode, utf);
805*22dc650dSSadaf Ebrahimi }
806*22dc650dSSadaf Ebrahimi break;
807*22dc650dSSadaf Ebrahimi }
808*22dc650dSSadaf Ebrahimi }
809*22dc650dSSadaf Ebrahimi }
810*22dc650dSSadaf Ebrahimi
811*22dc650dSSadaf Ebrahimi /* Indicate a non-UTF class which was created by negation */
812*22dc650dSSadaf Ebrahimi
813*22dc650dSSadaf Ebrahimi fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
814*22dc650dSSadaf Ebrahimi
815*22dc650dSSadaf Ebrahimi /* Handle repeats after a class or a back reference */
816*22dc650dSSadaf Ebrahimi
817*22dc650dSSadaf Ebrahimi CLASS_REF_REPEAT:
818*22dc650dSSadaf Ebrahimi switch(*ccode)
819*22dc650dSSadaf Ebrahimi {
820*22dc650dSSadaf Ebrahimi unsigned int min, max;
821*22dc650dSSadaf Ebrahimi
822*22dc650dSSadaf Ebrahimi case OP_CRSTAR:
823*22dc650dSSadaf Ebrahimi case OP_CRMINSTAR:
824*22dc650dSSadaf Ebrahimi case OP_CRPLUS:
825*22dc650dSSadaf Ebrahimi case OP_CRMINPLUS:
826*22dc650dSSadaf Ebrahimi case OP_CRQUERY:
827*22dc650dSSadaf Ebrahimi case OP_CRMINQUERY:
828*22dc650dSSadaf Ebrahimi case OP_CRPOSSTAR:
829*22dc650dSSadaf Ebrahimi case OP_CRPOSPLUS:
830*22dc650dSSadaf Ebrahimi case OP_CRPOSQUERY:
831*22dc650dSSadaf Ebrahimi fprintf(f, "%s", OP_names[*ccode]);
832*22dc650dSSadaf Ebrahimi extra += OP_lengths[*ccode];
833*22dc650dSSadaf Ebrahimi break;
834*22dc650dSSadaf Ebrahimi
835*22dc650dSSadaf Ebrahimi case OP_CRRANGE:
836*22dc650dSSadaf Ebrahimi case OP_CRMINRANGE:
837*22dc650dSSadaf Ebrahimi case OP_CRPOSRANGE:
838*22dc650dSSadaf Ebrahimi min = GET2(ccode,1);
839*22dc650dSSadaf Ebrahimi max = GET2(ccode,1 + IMM2_SIZE);
840*22dc650dSSadaf Ebrahimi if (max == 0) fprintf(f, "{%u,}", min);
841*22dc650dSSadaf Ebrahimi else fprintf(f, "{%u,%u}", min, max);
842*22dc650dSSadaf Ebrahimi if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
843*22dc650dSSadaf Ebrahimi else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
844*22dc650dSSadaf Ebrahimi extra += OP_lengths[*ccode];
845*22dc650dSSadaf Ebrahimi break;
846*22dc650dSSadaf Ebrahimi
847*22dc650dSSadaf Ebrahimi /* Do nothing if it's not a repeat; this code stops picky compilers
848*22dc650dSSadaf Ebrahimi warning about the lack of a default code path. */
849*22dc650dSSadaf Ebrahimi
850*22dc650dSSadaf Ebrahimi default:
851*22dc650dSSadaf Ebrahimi break;
852*22dc650dSSadaf Ebrahimi }
853*22dc650dSSadaf Ebrahimi break;
854*22dc650dSSadaf Ebrahimi
855*22dc650dSSadaf Ebrahimi case OP_MARK:
856*22dc650dSSadaf Ebrahimi case OP_COMMIT_ARG:
857*22dc650dSSadaf Ebrahimi case OP_PRUNE_ARG:
858*22dc650dSSadaf Ebrahimi case OP_SKIP_ARG:
859*22dc650dSSadaf Ebrahimi case OP_THEN_ARG:
860*22dc650dSSadaf Ebrahimi fprintf(f, " %s ", OP_names[*code]);
861*22dc650dSSadaf Ebrahimi print_custring_bylen(f, code + 2, code[1]);
862*22dc650dSSadaf Ebrahimi extra += code[1];
863*22dc650dSSadaf Ebrahimi break;
864*22dc650dSSadaf Ebrahimi
865*22dc650dSSadaf Ebrahimi case OP_THEN:
866*22dc650dSSadaf Ebrahimi fprintf(f, " %s", OP_names[*code]);
867*22dc650dSSadaf Ebrahimi break;
868*22dc650dSSadaf Ebrahimi
869*22dc650dSSadaf Ebrahimi case OP_CIRCM:
870*22dc650dSSadaf Ebrahimi case OP_DOLLM:
871*22dc650dSSadaf Ebrahimi flag = "/m";
872*22dc650dSSadaf Ebrahimi /* Fall through */
873*22dc650dSSadaf Ebrahimi
874*22dc650dSSadaf Ebrahimi /* Anything else is just an item with no data, but possibly a flag. */
875*22dc650dSSadaf Ebrahimi
876*22dc650dSSadaf Ebrahimi default:
877*22dc650dSSadaf Ebrahimi fprintf(f, " %s %s", flag, OP_names[*code]);
878*22dc650dSSadaf Ebrahimi break;
879*22dc650dSSadaf Ebrahimi }
880*22dc650dSSadaf Ebrahimi
881*22dc650dSSadaf Ebrahimi code += OP_lengths[*code] + extra;
882*22dc650dSSadaf Ebrahimi fprintf(f, "\n");
883*22dc650dSSadaf Ebrahimi }
884*22dc650dSSadaf Ebrahimi }
885*22dc650dSSadaf Ebrahimi
886*22dc650dSSadaf Ebrahimi /* End of pcre2_printint.c */
887