xref: /aosp_15_r20/external/pcre/src/pcre2_intmodedep.h (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2016-2024 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi /* This module contains mode-dependent macro and structure definitions. The
43*22dc650dSSadaf Ebrahimi file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
44*22dc650dSSadaf Ebrahimi These mode-dependent items are kept in a separate file so that they can also be
45*22dc650dSSadaf Ebrahimi #included multiple times for different code unit widths by pcre2test in order
46*22dc650dSSadaf Ebrahimi to have access to the hidden structures at all supported widths.
47*22dc650dSSadaf Ebrahimi 
48*22dc650dSSadaf Ebrahimi Some of the mode-dependent macros are required at different widths for
49*22dc650dSSadaf Ebrahimi different parts of the pcre2test code (in particular, the included
50*22dc650dSSadaf Ebrahimi pcre_printint.c file). We undefine them here so that they can be re-defined for
51*22dc650dSSadaf Ebrahimi multiple inclusions. Not all of these are used in pcre2test, but it's easier
52*22dc650dSSadaf Ebrahimi just to undefine them all. */
53*22dc650dSSadaf Ebrahimi 
54*22dc650dSSadaf Ebrahimi #undef ACROSSCHAR
55*22dc650dSSadaf Ebrahimi #undef BACKCHAR
56*22dc650dSSadaf Ebrahimi #undef BYTES2CU
57*22dc650dSSadaf Ebrahimi #undef CHMAX_255
58*22dc650dSSadaf Ebrahimi #undef CU2BYTES
59*22dc650dSSadaf Ebrahimi #undef FORWARDCHAR
60*22dc650dSSadaf Ebrahimi #undef FORWARDCHARTEST
61*22dc650dSSadaf Ebrahimi #undef GET
62*22dc650dSSadaf Ebrahimi #undef GET2
63*22dc650dSSadaf Ebrahimi #undef GETCHAR
64*22dc650dSSadaf Ebrahimi #undef GETCHARINC
65*22dc650dSSadaf Ebrahimi #undef GETCHARINCTEST
66*22dc650dSSadaf Ebrahimi #undef GETCHARLEN
67*22dc650dSSadaf Ebrahimi #undef GETCHARLENTEST
68*22dc650dSSadaf Ebrahimi #undef GETCHARTEST
69*22dc650dSSadaf Ebrahimi #undef GET_EXTRALEN
70*22dc650dSSadaf Ebrahimi #undef HAS_EXTRALEN
71*22dc650dSSadaf Ebrahimi #undef IMM2_SIZE
72*22dc650dSSadaf Ebrahimi #undef MAX_255
73*22dc650dSSadaf Ebrahimi #undef MAX_MARK
74*22dc650dSSadaf Ebrahimi #undef MAX_PATTERN_SIZE
75*22dc650dSSadaf Ebrahimi #undef MAX_UTF_SINGLE_CU
76*22dc650dSSadaf Ebrahimi #undef NOT_FIRSTCU
77*22dc650dSSadaf Ebrahimi #undef PUT
78*22dc650dSSadaf Ebrahimi #undef PUT2
79*22dc650dSSadaf Ebrahimi #undef PUT2INC
80*22dc650dSSadaf Ebrahimi #undef PUTCHAR
81*22dc650dSSadaf Ebrahimi #undef PUTINC
82*22dc650dSSadaf Ebrahimi #undef TABLE_GET
83*22dc650dSSadaf Ebrahimi 
84*22dc650dSSadaf Ebrahimi 
85*22dc650dSSadaf Ebrahimi 
86*22dc650dSSadaf Ebrahimi /* -------------------------- MACROS ----------------------------- */
87*22dc650dSSadaf Ebrahimi 
88*22dc650dSSadaf Ebrahimi /* PCRE keeps offsets in its compiled code as at least 16-bit quantities
89*22dc650dSSadaf Ebrahimi (always stored in big-endian order in 8-bit mode) by default. These are used,
90*22dc650dSSadaf Ebrahimi for example, to link from the start of a subpattern to its alternatives and its
91*22dc650dSSadaf Ebrahimi end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
92*22dc650dSSadaf Ebrahimi to around 64K, which is big enough for almost everybody. However, I received a
93*22dc650dSSadaf Ebrahimi request for an even bigger limit. For this reason, and also to make the code
94*22dc650dSSadaf Ebrahimi easier to maintain, the storing and loading of offsets from the compiled code
95*22dc650dSSadaf Ebrahimi unit string is now handled by the macros that are defined here.
96*22dc650dSSadaf Ebrahimi 
97*22dc650dSSadaf Ebrahimi The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
98*22dc650dSSadaf Ebrahimi values of 3 or 4 are also supported. */
99*22dc650dSSadaf Ebrahimi 
100*22dc650dSSadaf Ebrahimi /* ------------------- 8-bit support  ------------------ */
101*22dc650dSSadaf Ebrahimi 
102*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
103*22dc650dSSadaf Ebrahimi 
104*22dc650dSSadaf Ebrahimi #if LINK_SIZE == 2
105*22dc650dSSadaf Ebrahimi #define PUT(a,n,d)   \
106*22dc650dSSadaf Ebrahimi   (a[n] = (PCRE2_UCHAR)((d) >> 8)), \
107*22dc650dSSadaf Ebrahimi   (a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
108*22dc650dSSadaf Ebrahimi #define GET(a,n) \
109*22dc650dSSadaf Ebrahimi   (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
110*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 16)
111*22dc650dSSadaf Ebrahimi 
112*22dc650dSSadaf Ebrahimi #elif LINK_SIZE == 3
113*22dc650dSSadaf Ebrahimi #define PUT(a,n,d)       \
114*22dc650dSSadaf Ebrahimi   (a[n] = (PCRE2_UCHAR)((d) >> 16)),    \
115*22dc650dSSadaf Ebrahimi   (a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
116*22dc650dSSadaf Ebrahimi   (a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
117*22dc650dSSadaf Ebrahimi #define GET(a,n) \
118*22dc650dSSadaf Ebrahimi   (unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
119*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 24)
120*22dc650dSSadaf Ebrahimi 
121*22dc650dSSadaf Ebrahimi #elif LINK_SIZE == 4
122*22dc650dSSadaf Ebrahimi #define PUT(a,n,d)        \
123*22dc650dSSadaf Ebrahimi   (a[n] = (PCRE2_UCHAR)((d) >> 24)),     \
124*22dc650dSSadaf Ebrahimi   (a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
125*22dc650dSSadaf Ebrahimi   (a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)),  \
126*22dc650dSSadaf Ebrahimi   (a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
127*22dc650dSSadaf Ebrahimi #define GET(a,n) \
128*22dc650dSSadaf Ebrahimi   (unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
129*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 30)   /* Keep it positive */
130*22dc650dSSadaf Ebrahimi 
131*22dc650dSSadaf Ebrahimi #else
132*22dc650dSSadaf Ebrahimi #error LINK_SIZE must be 2, 3, or 4
133*22dc650dSSadaf Ebrahimi #endif
134*22dc650dSSadaf Ebrahimi 
135*22dc650dSSadaf Ebrahimi 
136*22dc650dSSadaf Ebrahimi /* ------------------- 16-bit support  ------------------ */
137*22dc650dSSadaf Ebrahimi 
138*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
139*22dc650dSSadaf Ebrahimi 
140*22dc650dSSadaf Ebrahimi #if LINK_SIZE == 2
141*22dc650dSSadaf Ebrahimi #undef LINK_SIZE
142*22dc650dSSadaf Ebrahimi #define LINK_SIZE 1
143*22dc650dSSadaf Ebrahimi #define PUT(a,n,d)   \
144*22dc650dSSadaf Ebrahimi   (a[n] = (PCRE2_UCHAR)(d))
145*22dc650dSSadaf Ebrahimi #define GET(a,n) \
146*22dc650dSSadaf Ebrahimi   (a[n])
147*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 16)
148*22dc650dSSadaf Ebrahimi 
149*22dc650dSSadaf Ebrahimi #elif LINK_SIZE == 3 || LINK_SIZE == 4
150*22dc650dSSadaf Ebrahimi #undef LINK_SIZE
151*22dc650dSSadaf Ebrahimi #define LINK_SIZE 2
152*22dc650dSSadaf Ebrahimi #define PUT(a,n,d)   \
153*22dc650dSSadaf Ebrahimi   (a[n] = (PCRE2_UCHAR)((d) >> 16)), \
154*22dc650dSSadaf Ebrahimi   (a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
155*22dc650dSSadaf Ebrahimi #define GET(a,n) \
156*22dc650dSSadaf Ebrahimi   (unsigned int)(((a)[n] << 16) | (a)[(n)+1])
157*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 30)  /* Keep it positive */
158*22dc650dSSadaf Ebrahimi 
159*22dc650dSSadaf Ebrahimi #else
160*22dc650dSSadaf Ebrahimi #error LINK_SIZE must be 2, 3, or 4
161*22dc650dSSadaf Ebrahimi #endif
162*22dc650dSSadaf Ebrahimi 
163*22dc650dSSadaf Ebrahimi 
164*22dc650dSSadaf Ebrahimi /* ------------------- 32-bit support  ------------------ */
165*22dc650dSSadaf Ebrahimi 
166*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32
167*22dc650dSSadaf Ebrahimi #undef LINK_SIZE
168*22dc650dSSadaf Ebrahimi #define LINK_SIZE 1
169*22dc650dSSadaf Ebrahimi #define PUT(a,n,d)   \
170*22dc650dSSadaf Ebrahimi   (a[n] = (d))
171*22dc650dSSadaf Ebrahimi #define GET(a,n) \
172*22dc650dSSadaf Ebrahimi   (a[n])
173*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 30)  /* Keep it positive */
174*22dc650dSSadaf Ebrahimi 
175*22dc650dSSadaf Ebrahimi #else
176*22dc650dSSadaf Ebrahimi #error Unsupported compiling mode
177*22dc650dSSadaf Ebrahimi #endif
178*22dc650dSSadaf Ebrahimi 
179*22dc650dSSadaf Ebrahimi 
180*22dc650dSSadaf Ebrahimi /* --------------- Other mode-specific macros ----------------- */
181*22dc650dSSadaf Ebrahimi 
182*22dc650dSSadaf Ebrahimi /* PCRE uses some other (at least) 16-bit quantities that do not change when
183*22dc650dSSadaf Ebrahimi the size of offsets changes. There are used for repeat counts and for other
184*22dc650dSSadaf Ebrahimi things such as capturing parenthesis numbers in back references.
185*22dc650dSSadaf Ebrahimi 
186*22dc650dSSadaf Ebrahimi Define the number of code units required to hold a 16-bit count/offset, and
187*22dc650dSSadaf Ebrahimi macros to load and store such a value. For reasons that I do not understand,
188*22dc650dSSadaf Ebrahimi the expression in the 8-bit GET2 macro is treated by gcc as a signed
189*22dc650dSSadaf Ebrahimi expression, even when a is declared as unsigned. It seems that any kind of
190*22dc650dSSadaf Ebrahimi arithmetic results in a signed value. Hence the cast. */
191*22dc650dSSadaf Ebrahimi 
192*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
193*22dc650dSSadaf Ebrahimi #define IMM2_SIZE 2
194*22dc650dSSadaf Ebrahimi #define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
195*22dc650dSSadaf Ebrahimi #define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
196*22dc650dSSadaf Ebrahimi 
197*22dc650dSSadaf Ebrahimi #else  /* Code units are 16 or 32 bits */
198*22dc650dSSadaf Ebrahimi #define IMM2_SIZE 1
199*22dc650dSSadaf Ebrahimi #define GET2(a,n) a[n]
200*22dc650dSSadaf Ebrahimi #define PUT2(a,n,d) a[n] = d
201*22dc650dSSadaf Ebrahimi #endif
202*22dc650dSSadaf Ebrahimi 
203*22dc650dSSadaf Ebrahimi /* Other macros that are different for 8-bit mode. The MAX_255 macro checks
204*22dc650dSSadaf Ebrahimi whether its argument, which is assumed to be one code unit, is less than 256.
205*22dc650dSSadaf Ebrahimi The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
206*22dc650dSSadaf Ebrahimi name must fit in one code unit; currently it is set to 255 or 65535. The
207*22dc650dSSadaf Ebrahimi TABLE_GET macro is used to access elements of tables containing exactly 256
208*22dc650dSSadaf Ebrahimi items. Its argument is a code unit. When code points can be greater than 255, a
209*22dc650dSSadaf Ebrahimi check is needed before accessing these tables. */
210*22dc650dSSadaf Ebrahimi 
211*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
212*22dc650dSSadaf Ebrahimi #define MAX_255(c) TRUE
213*22dc650dSSadaf Ebrahimi #define MAX_MARK ((1u << 8) - 1)
214*22dc650dSSadaf Ebrahimi #define TABLE_GET(c, table, default) ((table)[c])
215*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
216*22dc650dSSadaf Ebrahimi #define SUPPORT_WIDE_CHARS
217*22dc650dSSadaf Ebrahimi #define CHMAX_255(c) ((c) <= 255u)
218*22dc650dSSadaf Ebrahimi #else
219*22dc650dSSadaf Ebrahimi #define CHMAX_255(c) TRUE
220*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
221*22dc650dSSadaf Ebrahimi 
222*22dc650dSSadaf Ebrahimi #else  /* Code units are 16 or 32 bits */
223*22dc650dSSadaf Ebrahimi #define CHMAX_255(c) ((c) <= 255u)
224*22dc650dSSadaf Ebrahimi #define MAX_255(c) ((c) <= 255u)
225*22dc650dSSadaf Ebrahimi #define MAX_MARK ((1u << 16) - 1)
226*22dc650dSSadaf Ebrahimi #define SUPPORT_WIDE_CHARS
227*22dc650dSSadaf Ebrahimi #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
228*22dc650dSSadaf Ebrahimi #endif
229*22dc650dSSadaf Ebrahimi 
230*22dc650dSSadaf Ebrahimi 
231*22dc650dSSadaf Ebrahimi /* ----------------- Character-handling macros ----------------- */
232*22dc650dSSadaf Ebrahimi 
233*22dc650dSSadaf Ebrahimi /* There is a proposed future special "UTF-21" mode, in which only the lowest
234*22dc650dSSadaf Ebrahimi 21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
235*22dc650dSSadaf Ebrahimi high-order bits available to the application for other uses. In preparation for
236*22dc650dSSadaf Ebrahimi the future implementation of this mode, there are macros that load a data item
237*22dc650dSSadaf Ebrahimi and, if in this special mode, mask it to 21 bits. These macros all have names
238*22dc650dSSadaf Ebrahimi starting with UCHAR21. In all other modes, including the normal 32-bit
239*22dc650dSSadaf Ebrahimi library, the macros all have the same simple definitions. When the new mode is
240*22dc650dSSadaf Ebrahimi implemented, it is expected that these definitions will be varied appropriately
241*22dc650dSSadaf Ebrahimi using #ifdef when compiling the library that supports the special mode. */
242*22dc650dSSadaf Ebrahimi 
243*22dc650dSSadaf Ebrahimi #define UCHAR21(eptr)        (*(eptr))
244*22dc650dSSadaf Ebrahimi #define UCHAR21TEST(eptr)    (*(eptr))
245*22dc650dSSadaf Ebrahimi #define UCHAR21INC(eptr)     (*(eptr)++)
246*22dc650dSSadaf Ebrahimi #define UCHAR21INCTEST(eptr) (*(eptr)++)
247*22dc650dSSadaf Ebrahimi 
248*22dc650dSSadaf Ebrahimi /* When UTF encoding is being used, a character is no longer just a single
249*22dc650dSSadaf Ebrahimi byte in 8-bit mode or a single short in 16-bit mode. The macros for character
250*22dc650dSSadaf Ebrahimi handling generate simple sequences when used in the basic mode, and more
251*22dc650dSSadaf Ebrahimi complicated ones for UTF characters. GETCHARLENTEST and other macros are not
252*22dc650dSSadaf Ebrahimi used when UTF is not supported. To make sure they can never even appear when
253*22dc650dSSadaf Ebrahimi UTF support is omitted, we don't even define them. */
254*22dc650dSSadaf Ebrahimi 
255*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE
256*22dc650dSSadaf Ebrahimi 
257*22dc650dSSadaf Ebrahimi /* #define MAX_UTF_SINGLE_CU */
258*22dc650dSSadaf Ebrahimi /* #define HAS_EXTRALEN(c) */
259*22dc650dSSadaf Ebrahimi /* #define GET_EXTRALEN(c) */
260*22dc650dSSadaf Ebrahimi /* #define NOT_FIRSTCU(c) */
261*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) c = *eptr;
262*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) c = *eptr;
263*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) c = *eptr++;
264*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) c = *eptr++;
265*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) c = *eptr;
266*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) (*p = c, 1)
267*22dc650dSSadaf Ebrahimi /* #define GETCHARLENTEST(c, eptr, len) */
268*22dc650dSSadaf Ebrahimi /* #define BACKCHAR(eptr) */
269*22dc650dSSadaf Ebrahimi /* #define FORWARDCHAR(eptr) */
270*22dc650dSSadaf Ebrahimi /* #define FORWARCCHARTEST(eptr,end) */
271*22dc650dSSadaf Ebrahimi /* #define ACROSSCHAR(condition, eptr, action) */
272*22dc650dSSadaf Ebrahimi 
273*22dc650dSSadaf Ebrahimi #else   /* SUPPORT_UNICODE */
274*22dc650dSSadaf Ebrahimi 
275*22dc650dSSadaf Ebrahimi /* ------------------- 8-bit support  ------------------ */
276*22dc650dSSadaf Ebrahimi 
277*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
278*22dc650dSSadaf Ebrahimi #define MAYBE_UTF_MULTI          /* UTF chars may use multiple code units */
279*22dc650dSSadaf Ebrahimi 
280*22dc650dSSadaf Ebrahimi /* The largest UTF code point that can be encoded as a single code unit. */
281*22dc650dSSadaf Ebrahimi 
282*22dc650dSSadaf Ebrahimi #define MAX_UTF_SINGLE_CU 127
283*22dc650dSSadaf Ebrahimi 
284*22dc650dSSadaf Ebrahimi /* Tests whether the code point needs extra characters to decode. */
285*22dc650dSSadaf Ebrahimi 
286*22dc650dSSadaf Ebrahimi #define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
287*22dc650dSSadaf Ebrahimi 
288*22dc650dSSadaf Ebrahimi /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
289*22dc650dSSadaf Ebrahimi Otherwise it has an undefined behaviour. */
290*22dc650dSSadaf Ebrahimi 
291*22dc650dSSadaf Ebrahimi #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
292*22dc650dSSadaf Ebrahimi 
293*22dc650dSSadaf Ebrahimi /* Returns TRUE, if the given value is not the first code unit of a UTF
294*22dc650dSSadaf Ebrahimi sequence. */
295*22dc650dSSadaf Ebrahimi 
296*22dc650dSSadaf Ebrahimi #define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
297*22dc650dSSadaf Ebrahimi 
298*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, not advancing the pointer. This is called when
299*22dc650dSSadaf Ebrahimi we know we are in UTF-8 mode. */
300*22dc650dSSadaf Ebrahimi 
301*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) \
302*22dc650dSSadaf Ebrahimi   c = *eptr; \
303*22dc650dSSadaf Ebrahimi   if (c >= 0xc0u) GETUTF8(c, eptr);
304*22dc650dSSadaf Ebrahimi 
305*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
306*22dc650dSSadaf Ebrahimi pointer. */
307*22dc650dSSadaf Ebrahimi 
308*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) \
309*22dc650dSSadaf Ebrahimi   c = *eptr; \
310*22dc650dSSadaf Ebrahimi   if (utf && c >= 0xc0u) GETUTF8(c, eptr);
311*22dc650dSSadaf Ebrahimi 
312*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, advancing the pointer. This is called when we
313*22dc650dSSadaf Ebrahimi know we are in UTF-8 mode. */
314*22dc650dSSadaf Ebrahimi 
315*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) \
316*22dc650dSSadaf Ebrahimi   c = *eptr++; \
317*22dc650dSSadaf Ebrahimi   if (c >= 0xc0u) GETUTF8INC(c, eptr);
318*22dc650dSSadaf Ebrahimi 
319*22dc650dSSadaf Ebrahimi /* Get the next character, testing for UTF-8 mode, and advancing the pointer.
320*22dc650dSSadaf Ebrahimi This is called when we don't know if we are in UTF-8 mode. */
321*22dc650dSSadaf Ebrahimi 
322*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) \
323*22dc650dSSadaf Ebrahimi   c = *eptr++; \
324*22dc650dSSadaf Ebrahimi   if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
325*22dc650dSSadaf Ebrahimi 
326*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, not advancing the pointer, incrementing length
327*22dc650dSSadaf Ebrahimi if there are extra bytes. This is called when we know we are in UTF-8 mode. */
328*22dc650dSSadaf Ebrahimi 
329*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) \
330*22dc650dSSadaf Ebrahimi   c = *eptr; \
331*22dc650dSSadaf Ebrahimi   if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
332*22dc650dSSadaf Ebrahimi 
333*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
334*22dc650dSSadaf Ebrahimi pointer, incrementing length if there are extra bytes. This is called when we
335*22dc650dSSadaf Ebrahimi do not know if we are in UTF-8 mode. */
336*22dc650dSSadaf Ebrahimi 
337*22dc650dSSadaf Ebrahimi #define GETCHARLENTEST(c, eptr, len) \
338*22dc650dSSadaf Ebrahimi   c = *eptr; \
339*22dc650dSSadaf Ebrahimi   if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
340*22dc650dSSadaf Ebrahimi 
341*22dc650dSSadaf Ebrahimi /* If the pointer is not at the start of a character, move it back until
342*22dc650dSSadaf Ebrahimi it is. This is called only in UTF-8 mode - we don't put a test within the macro
343*22dc650dSSadaf Ebrahimi because almost all calls are already within a block of UTF-8 only code. */
344*22dc650dSSadaf Ebrahimi 
345*22dc650dSSadaf Ebrahimi #define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
346*22dc650dSSadaf Ebrahimi 
347*22dc650dSSadaf Ebrahimi /* Same as above, just in the other direction. */
348*22dc650dSSadaf Ebrahimi #define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
349*22dc650dSSadaf Ebrahimi #define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
350*22dc650dSSadaf Ebrahimi 
351*22dc650dSSadaf Ebrahimi /* Same as above, but it allows a fully customizable form. */
352*22dc650dSSadaf Ebrahimi #define ACROSSCHAR(condition, eptr, action) \
353*22dc650dSSadaf Ebrahimi   while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
354*22dc650dSSadaf Ebrahimi 
355*22dc650dSSadaf Ebrahimi /* Deposit a character into memory, returning the number of code units. */
356*22dc650dSSadaf Ebrahimi 
357*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
358*22dc650dSSadaf Ebrahimi   PRIV(ord2utf)(c,p) : (*p = c, 1))
359*22dc650dSSadaf Ebrahimi 
360*22dc650dSSadaf Ebrahimi 
361*22dc650dSSadaf Ebrahimi /* ------------------- 16-bit support  ------------------ */
362*22dc650dSSadaf Ebrahimi 
363*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
364*22dc650dSSadaf Ebrahimi #define MAYBE_UTF_MULTI          /* UTF chars may use multiple code units */
365*22dc650dSSadaf Ebrahimi 
366*22dc650dSSadaf Ebrahimi /* The largest UTF code point that can be encoded as a single code unit. */
367*22dc650dSSadaf Ebrahimi 
368*22dc650dSSadaf Ebrahimi #define MAX_UTF_SINGLE_CU 65535
369*22dc650dSSadaf Ebrahimi 
370*22dc650dSSadaf Ebrahimi /* Tests whether the code point needs extra characters to decode. */
371*22dc650dSSadaf Ebrahimi 
372*22dc650dSSadaf Ebrahimi #define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
373*22dc650dSSadaf Ebrahimi 
374*22dc650dSSadaf Ebrahimi /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
375*22dc650dSSadaf Ebrahimi Otherwise it has an undefined behaviour. */
376*22dc650dSSadaf Ebrahimi 
377*22dc650dSSadaf Ebrahimi #define GET_EXTRALEN(c) 1
378*22dc650dSSadaf Ebrahimi 
379*22dc650dSSadaf Ebrahimi /* Returns TRUE, if the given value is not the first code unit of a UTF
380*22dc650dSSadaf Ebrahimi sequence. */
381*22dc650dSSadaf Ebrahimi 
382*22dc650dSSadaf Ebrahimi #define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
383*22dc650dSSadaf Ebrahimi 
384*22dc650dSSadaf Ebrahimi /* Base macro to pick up the low surrogate of a UTF-16 character, not
385*22dc650dSSadaf Ebrahimi advancing the pointer. */
386*22dc650dSSadaf Ebrahimi 
387*22dc650dSSadaf Ebrahimi #define GETUTF16(c, eptr) \
388*22dc650dSSadaf Ebrahimi    { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
389*22dc650dSSadaf Ebrahimi 
390*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, not advancing the pointer. This is called when
391*22dc650dSSadaf Ebrahimi we know we are in UTF-16 mode. */
392*22dc650dSSadaf Ebrahimi 
393*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) \
394*22dc650dSSadaf Ebrahimi   c = *eptr; \
395*22dc650dSSadaf Ebrahimi   if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
396*22dc650dSSadaf Ebrahimi 
397*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
398*22dc650dSSadaf Ebrahimi pointer. */
399*22dc650dSSadaf Ebrahimi 
400*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) \
401*22dc650dSSadaf Ebrahimi   c = *eptr; \
402*22dc650dSSadaf Ebrahimi   if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
403*22dc650dSSadaf Ebrahimi 
404*22dc650dSSadaf Ebrahimi /* Base macro to pick up the low surrogate of a UTF-16 character, advancing
405*22dc650dSSadaf Ebrahimi the pointer. */
406*22dc650dSSadaf Ebrahimi 
407*22dc650dSSadaf Ebrahimi #define GETUTF16INC(c, eptr) \
408*22dc650dSSadaf Ebrahimi    { c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
409*22dc650dSSadaf Ebrahimi 
410*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, advancing the pointer. This is called when we
411*22dc650dSSadaf Ebrahimi know we are in UTF-16 mode. */
412*22dc650dSSadaf Ebrahimi 
413*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) \
414*22dc650dSSadaf Ebrahimi   c = *eptr++; \
415*22dc650dSSadaf Ebrahimi   if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
416*22dc650dSSadaf Ebrahimi 
417*22dc650dSSadaf Ebrahimi /* Get the next character, testing for UTF-16 mode, and advancing the pointer.
418*22dc650dSSadaf Ebrahimi This is called when we don't know if we are in UTF-16 mode. */
419*22dc650dSSadaf Ebrahimi 
420*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) \
421*22dc650dSSadaf Ebrahimi   c = *eptr++; \
422*22dc650dSSadaf Ebrahimi   if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
423*22dc650dSSadaf Ebrahimi 
424*22dc650dSSadaf Ebrahimi /* Base macro to pick up the low surrogate of a UTF-16 character, not
425*22dc650dSSadaf Ebrahimi advancing the pointer, incrementing the length. */
426*22dc650dSSadaf Ebrahimi 
427*22dc650dSSadaf Ebrahimi #define GETUTF16LEN(c, eptr, len) \
428*22dc650dSSadaf Ebrahimi    { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
429*22dc650dSSadaf Ebrahimi 
430*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, not advancing the pointer, incrementing
431*22dc650dSSadaf Ebrahimi length if there is a low surrogate. This is called when we know we are in
432*22dc650dSSadaf Ebrahimi UTF-16 mode. */
433*22dc650dSSadaf Ebrahimi 
434*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) \
435*22dc650dSSadaf Ebrahimi   c = *eptr; \
436*22dc650dSSadaf Ebrahimi   if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
437*22dc650dSSadaf Ebrahimi 
438*22dc650dSSadaf Ebrahimi /* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
439*22dc650dSSadaf Ebrahimi pointer, incrementing length if there is a low surrogate. This is called when
440*22dc650dSSadaf Ebrahimi we do not know if we are in UTF-16 mode. */
441*22dc650dSSadaf Ebrahimi 
442*22dc650dSSadaf Ebrahimi #define GETCHARLENTEST(c, eptr, len) \
443*22dc650dSSadaf Ebrahimi   c = *eptr; \
444*22dc650dSSadaf Ebrahimi   if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
445*22dc650dSSadaf Ebrahimi 
446*22dc650dSSadaf Ebrahimi /* If the pointer is not at the start of a character, move it back until
447*22dc650dSSadaf Ebrahimi it is. This is called only in UTF-16 mode - we don't put a test within the
448*22dc650dSSadaf Ebrahimi macro because almost all calls are already within a block of UTF-16 only
449*22dc650dSSadaf Ebrahimi code. */
450*22dc650dSSadaf Ebrahimi 
451*22dc650dSSadaf Ebrahimi #define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
452*22dc650dSSadaf Ebrahimi 
453*22dc650dSSadaf Ebrahimi /* Same as above, just in the other direction. */
454*22dc650dSSadaf Ebrahimi #define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
455*22dc650dSSadaf Ebrahimi #define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
456*22dc650dSSadaf Ebrahimi 
457*22dc650dSSadaf Ebrahimi /* Same as above, but it allows a fully customizable form. */
458*22dc650dSSadaf Ebrahimi #define ACROSSCHAR(condition, eptr, action) \
459*22dc650dSSadaf Ebrahimi   if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
460*22dc650dSSadaf Ebrahimi 
461*22dc650dSSadaf Ebrahimi /* Deposit a character into memory, returning the number of code units. */
462*22dc650dSSadaf Ebrahimi 
463*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
464*22dc650dSSadaf Ebrahimi   PRIV(ord2utf)(c,p) : (*p = c, 1))
465*22dc650dSSadaf Ebrahimi 
466*22dc650dSSadaf Ebrahimi 
467*22dc650dSSadaf Ebrahimi /* ------------------- 32-bit support  ------------------ */
468*22dc650dSSadaf Ebrahimi 
469*22dc650dSSadaf Ebrahimi #else
470*22dc650dSSadaf Ebrahimi 
471*22dc650dSSadaf Ebrahimi /* These are trivial for the 32-bit library, since all UTF-32 characters fit
472*22dc650dSSadaf Ebrahimi into one PCRE2_UCHAR unit. */
473*22dc650dSSadaf Ebrahimi 
474*22dc650dSSadaf Ebrahimi #define MAX_UTF_SINGLE_CU (0x10ffffu)
475*22dc650dSSadaf Ebrahimi #define HAS_EXTRALEN(c) (0)
476*22dc650dSSadaf Ebrahimi #define GET_EXTRALEN(c) (0)
477*22dc650dSSadaf Ebrahimi #define NOT_FIRSTCU(c) (0)
478*22dc650dSSadaf Ebrahimi 
479*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, not advancing the pointer. This is called when
480*22dc650dSSadaf Ebrahimi we know we are in UTF-32 mode. */
481*22dc650dSSadaf Ebrahimi 
482*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) \
483*22dc650dSSadaf Ebrahimi   c = *(eptr);
484*22dc650dSSadaf Ebrahimi 
485*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
486*22dc650dSSadaf Ebrahimi pointer. */
487*22dc650dSSadaf Ebrahimi 
488*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) \
489*22dc650dSSadaf Ebrahimi   c = *(eptr);
490*22dc650dSSadaf Ebrahimi 
491*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, advancing the pointer. This is called when we
492*22dc650dSSadaf Ebrahimi know we are in UTF-32 mode. */
493*22dc650dSSadaf Ebrahimi 
494*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) \
495*22dc650dSSadaf Ebrahimi   c = *((eptr)++);
496*22dc650dSSadaf Ebrahimi 
497*22dc650dSSadaf Ebrahimi /* Get the next character, testing for UTF-32 mode, and advancing the pointer.
498*22dc650dSSadaf Ebrahimi This is called when we don't know if we are in UTF-32 mode. */
499*22dc650dSSadaf Ebrahimi 
500*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) \
501*22dc650dSSadaf Ebrahimi   c = *((eptr)++);
502*22dc650dSSadaf Ebrahimi 
503*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, not advancing the pointer, not incrementing
504*22dc650dSSadaf Ebrahimi length (since all UTF-32 is of length 1). This is called when we know we are in
505*22dc650dSSadaf Ebrahimi UTF-32 mode. */
506*22dc650dSSadaf Ebrahimi 
507*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) \
508*22dc650dSSadaf Ebrahimi   GETCHAR(c, eptr)
509*22dc650dSSadaf Ebrahimi 
510*22dc650dSSadaf Ebrahimi /* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
511*22dc650dSSadaf Ebrahimi pointer, not incrementing the length (since all UTF-32 is of length 1).
512*22dc650dSSadaf Ebrahimi This is called when we do not know if we are in UTF-32 mode. */
513*22dc650dSSadaf Ebrahimi 
514*22dc650dSSadaf Ebrahimi #define GETCHARLENTEST(c, eptr, len) \
515*22dc650dSSadaf Ebrahimi   GETCHARTEST(c, eptr)
516*22dc650dSSadaf Ebrahimi 
517*22dc650dSSadaf Ebrahimi /* If the pointer is not at the start of a character, move it back until
518*22dc650dSSadaf Ebrahimi it is. This is called only in UTF-32 mode - we don't put a test within the
519*22dc650dSSadaf Ebrahimi macro because almost all calls are already within a block of UTF-32 only
520*22dc650dSSadaf Ebrahimi code.
521*22dc650dSSadaf Ebrahimi 
522*22dc650dSSadaf Ebrahimi These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
523*22dc650dSSadaf Ebrahimi 
524*22dc650dSSadaf Ebrahimi #define BACKCHAR(eptr) do { } while (0)
525*22dc650dSSadaf Ebrahimi 
526*22dc650dSSadaf Ebrahimi /* Same as above, just in the other direction. */
527*22dc650dSSadaf Ebrahimi 
528*22dc650dSSadaf Ebrahimi #define FORWARDCHAR(eptr) do { } while (0)
529*22dc650dSSadaf Ebrahimi #define FORWARDCHARTEST(eptr,end) do { } while (0)
530*22dc650dSSadaf Ebrahimi 
531*22dc650dSSadaf Ebrahimi /* Same as above, but it allows a fully customizable form. */
532*22dc650dSSadaf Ebrahimi 
533*22dc650dSSadaf Ebrahimi #define ACROSSCHAR(condition, eptr, action) do { } while (0)
534*22dc650dSSadaf Ebrahimi 
535*22dc650dSSadaf Ebrahimi /* Deposit a character into memory, returning the number of code units. */
536*22dc650dSSadaf Ebrahimi 
537*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) (*p = c, 1)
538*22dc650dSSadaf Ebrahimi 
539*22dc650dSSadaf Ebrahimi #endif  /* UTF-32 character handling */
540*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
541*22dc650dSSadaf Ebrahimi 
542*22dc650dSSadaf Ebrahimi 
543*22dc650dSSadaf Ebrahimi /* Mode-dependent macros that have the same definition in all modes. */
544*22dc650dSSadaf Ebrahimi 
545*22dc650dSSadaf Ebrahimi #define CU2BYTES(x)     ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
546*22dc650dSSadaf Ebrahimi #define BYTES2CU(x)     ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
547*22dc650dSSadaf Ebrahimi #define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
548*22dc650dSSadaf Ebrahimi #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
549*22dc650dSSadaf Ebrahimi 
550*22dc650dSSadaf Ebrahimi 
551*22dc650dSSadaf Ebrahimi /* ----------------------- HIDDEN STRUCTURES ----------------------------- */
552*22dc650dSSadaf Ebrahimi 
553*22dc650dSSadaf Ebrahimi /* NOTE: All these structures *must* start with a pcre2_memctl structure. The
554*22dc650dSSadaf Ebrahimi code that uses them is simpler because it assumes this. */
555*22dc650dSSadaf Ebrahimi 
556*22dc650dSSadaf Ebrahimi /* The real general context structure. At present it holds only data for custom
557*22dc650dSSadaf Ebrahimi memory control. */
558*22dc650dSSadaf Ebrahimi 
559*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_general_context {
560*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;
561*22dc650dSSadaf Ebrahimi } pcre2_real_general_context;
562*22dc650dSSadaf Ebrahimi 
563*22dc650dSSadaf Ebrahimi /* The real compile context structure */
564*22dc650dSSadaf Ebrahimi 
565*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_compile_context {
566*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;
567*22dc650dSSadaf Ebrahimi   int (*stack_guard)(uint32_t, void *);
568*22dc650dSSadaf Ebrahimi   void *stack_guard_data;
569*22dc650dSSadaf Ebrahimi   const uint8_t *tables;
570*22dc650dSSadaf Ebrahimi   PCRE2_SIZE max_pattern_length;
571*22dc650dSSadaf Ebrahimi   PCRE2_SIZE max_pattern_compiled_length;
572*22dc650dSSadaf Ebrahimi   uint16_t bsr_convention;
573*22dc650dSSadaf Ebrahimi   uint16_t newline_convention;
574*22dc650dSSadaf Ebrahimi   uint32_t parens_nest_limit;
575*22dc650dSSadaf Ebrahimi   uint32_t extra_options;
576*22dc650dSSadaf Ebrahimi   uint32_t max_varlookbehind;
577*22dc650dSSadaf Ebrahimi } pcre2_real_compile_context;
578*22dc650dSSadaf Ebrahimi 
579*22dc650dSSadaf Ebrahimi /* The real match context structure. */
580*22dc650dSSadaf Ebrahimi 
581*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_match_context {
582*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;
583*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
584*22dc650dSSadaf Ebrahimi   pcre2_jit_callback jit_callback;
585*22dc650dSSadaf Ebrahimi   void *jit_callback_data;
586*22dc650dSSadaf Ebrahimi #endif
587*22dc650dSSadaf Ebrahimi   int    (*callout)(pcre2_callout_block *, void *);
588*22dc650dSSadaf Ebrahimi   void    *callout_data;
589*22dc650dSSadaf Ebrahimi   int    (*substitute_callout)(pcre2_substitute_callout_block *, void *);
590*22dc650dSSadaf Ebrahimi   void    *substitute_callout_data;
591*22dc650dSSadaf Ebrahimi   PCRE2_SIZE offset_limit;
592*22dc650dSSadaf Ebrahimi   uint32_t heap_limit;
593*22dc650dSSadaf Ebrahimi   uint32_t match_limit;
594*22dc650dSSadaf Ebrahimi   uint32_t depth_limit;
595*22dc650dSSadaf Ebrahimi } pcre2_real_match_context;
596*22dc650dSSadaf Ebrahimi 
597*22dc650dSSadaf Ebrahimi /* The real convert context structure. */
598*22dc650dSSadaf Ebrahimi 
599*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_convert_context {
600*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;
601*22dc650dSSadaf Ebrahimi   uint32_t glob_separator;
602*22dc650dSSadaf Ebrahimi   uint32_t glob_escape;
603*22dc650dSSadaf Ebrahimi } pcre2_real_convert_context;
604*22dc650dSSadaf Ebrahimi 
605*22dc650dSSadaf Ebrahimi /* The real compiled code structure. The type for the blocksize field is
606*22dc650dSSadaf Ebrahimi defined specially because it is required in pcre2_serialize_decode() when
607*22dc650dSSadaf Ebrahimi copying the size from possibly unaligned memory into a variable of the same
608*22dc650dSSadaf Ebrahimi type. Use a macro rather than a typedef to avoid compiler warnings when this
609*22dc650dSSadaf Ebrahimi file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
610*22dc650dSSadaf Ebrahimi largest lookbehind that is supported. (OP_REVERSE and OP_VREVERSE in a pattern
611*22dc650dSSadaf Ebrahimi have 16-bit arguments in 8-bit and 16-bit modes, so we need no more than a
612*22dc650dSSadaf Ebrahimi 16-bit field here.) */
613*22dc650dSSadaf Ebrahimi 
614*22dc650dSSadaf Ebrahimi #undef  CODE_BLOCKSIZE_TYPE
615*22dc650dSSadaf Ebrahimi #define CODE_BLOCKSIZE_TYPE PCRE2_SIZE
616*22dc650dSSadaf Ebrahimi 
617*22dc650dSSadaf Ebrahimi #undef  LOOKBEHIND_MAX
618*22dc650dSSadaf Ebrahimi #define LOOKBEHIND_MAX UINT16_MAX
619*22dc650dSSadaf Ebrahimi 
620*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_code {
621*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;            /* Memory control fields */
622*22dc650dSSadaf Ebrahimi   const uint8_t *tables;          /* The character tables */
623*22dc650dSSadaf Ebrahimi   void    *executable_jit;        /* Pointer to JIT code */
624*22dc650dSSadaf Ebrahimi   uint8_t  start_bitmap[32];      /* Bitmap for starting code unit < 256 */
625*22dc650dSSadaf Ebrahimi   CODE_BLOCKSIZE_TYPE blocksize;  /* Total (bytes) that was malloc-ed */
626*22dc650dSSadaf Ebrahimi   uint32_t magic_number;          /* Paranoid and endianness check */
627*22dc650dSSadaf Ebrahimi   uint32_t compile_options;       /* Options passed to pcre2_compile() */
628*22dc650dSSadaf Ebrahimi   uint32_t overall_options;       /* Options after processing the pattern */
629*22dc650dSSadaf Ebrahimi   uint32_t extra_options;         /* Taken from compile_context */
630*22dc650dSSadaf Ebrahimi   uint32_t flags;                 /* Various state flags */
631*22dc650dSSadaf Ebrahimi   uint32_t limit_heap;            /* Limit set in the pattern */
632*22dc650dSSadaf Ebrahimi   uint32_t limit_match;           /* Limit set in the pattern */
633*22dc650dSSadaf Ebrahimi   uint32_t limit_depth;           /* Limit set in the pattern */
634*22dc650dSSadaf Ebrahimi   uint32_t first_codeunit;        /* Starting code unit */
635*22dc650dSSadaf Ebrahimi   uint32_t last_codeunit;         /* This codeunit must be seen */
636*22dc650dSSadaf Ebrahimi   uint16_t bsr_convention;        /* What \R matches */
637*22dc650dSSadaf Ebrahimi   uint16_t newline_convention;    /* What is a newline? */
638*22dc650dSSadaf Ebrahimi   uint16_t max_lookbehind;        /* Longest lookbehind (characters) */
639*22dc650dSSadaf Ebrahimi   uint16_t minlength;             /* Minimum length of match */
640*22dc650dSSadaf Ebrahimi   uint16_t top_bracket;           /* Highest numbered group */
641*22dc650dSSadaf Ebrahimi   uint16_t top_backref;           /* Highest numbered back reference */
642*22dc650dSSadaf Ebrahimi   uint16_t name_entry_size;       /* Size (code units) of table entries */
643*22dc650dSSadaf Ebrahimi   uint16_t name_count;            /* Number of name entries in the table */
644*22dc650dSSadaf Ebrahimi } pcre2_real_code;
645*22dc650dSSadaf Ebrahimi 
646*22dc650dSSadaf Ebrahimi /* The real match data structure. Define ovector as large as it can ever
647*22dc650dSSadaf Ebrahimi actually be so that array bound checkers don't grumble. Memory for this
648*22dc650dSSadaf Ebrahimi structure is obtained by calling pcre2_match_data_create(), which sets the size
649*22dc650dSSadaf Ebrahimi as the offset of ovector plus a pair of elements for each capturable string, so
650*22dc650dSSadaf Ebrahimi the size varies from call to call. As the maximum number of capturing
651*22dc650dSSadaf Ebrahimi subpatterns is 65535 we must allow for 65536 strings to include the overall
652*22dc650dSSadaf Ebrahimi match. (See also the heapframe structure below.) */
653*22dc650dSSadaf Ebrahimi 
654*22dc650dSSadaf Ebrahimi struct heapframe;  /* Forward reference */
655*22dc650dSSadaf Ebrahimi 
656*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_match_data {
657*22dc650dSSadaf Ebrahimi   pcre2_memctl     memctl;           /* Memory control fields */
658*22dc650dSSadaf Ebrahimi   const pcre2_real_code *code;       /* The pattern used for the match */
659*22dc650dSSadaf Ebrahimi   PCRE2_SPTR       subject;          /* The subject that was matched */
660*22dc650dSSadaf Ebrahimi   PCRE2_SPTR       mark;             /* Pointer to last mark */
661*22dc650dSSadaf Ebrahimi   struct heapframe *heapframes;      /* Backtracking frames heap memory */
662*22dc650dSSadaf Ebrahimi   PCRE2_SIZE       heapframes_size;  /* Malloc-ed size */
663*22dc650dSSadaf Ebrahimi   PCRE2_SIZE       subject_length;   /* Subject length */
664*22dc650dSSadaf Ebrahimi   PCRE2_SIZE       leftchar;         /* Offset to leftmost code unit */
665*22dc650dSSadaf Ebrahimi   PCRE2_SIZE       rightchar;        /* Offset to rightmost code unit */
666*22dc650dSSadaf Ebrahimi   PCRE2_SIZE       startchar;        /* Offset to starting code unit */
667*22dc650dSSadaf Ebrahimi   uint8_t          matchedby;        /* Type of match (normal, JIT, DFA) */
668*22dc650dSSadaf Ebrahimi   uint8_t          flags;            /* Various flags */
669*22dc650dSSadaf Ebrahimi   uint16_t         oveccount;        /* Number of pairs */
670*22dc650dSSadaf Ebrahimi   int              rc;               /* The return code from the match */
671*22dc650dSSadaf Ebrahimi   PCRE2_SIZE       ovector[131072];  /* Must be last in the structure */
672*22dc650dSSadaf Ebrahimi } pcre2_real_match_data;
673*22dc650dSSadaf Ebrahimi 
674*22dc650dSSadaf Ebrahimi 
675*22dc650dSSadaf Ebrahimi /* ----------------------- PRIVATE STRUCTURES ----------------------------- */
676*22dc650dSSadaf Ebrahimi 
677*22dc650dSSadaf Ebrahimi /* These structures are not needed for pcre2test. */
678*22dc650dSSadaf Ebrahimi 
679*22dc650dSSadaf Ebrahimi #ifndef PCRE2_PCRE2TEST
680*22dc650dSSadaf Ebrahimi 
681*22dc650dSSadaf Ebrahimi /* Structures for checking for mutual function recursion when scanning compiled
682*22dc650dSSadaf Ebrahimi or parsed code. */
683*22dc650dSSadaf Ebrahimi 
684*22dc650dSSadaf Ebrahimi typedef struct recurse_check {
685*22dc650dSSadaf Ebrahimi   struct recurse_check *prev;
686*22dc650dSSadaf Ebrahimi   PCRE2_SPTR group;
687*22dc650dSSadaf Ebrahimi } recurse_check;
688*22dc650dSSadaf Ebrahimi 
689*22dc650dSSadaf Ebrahimi typedef struct parsed_recurse_check {
690*22dc650dSSadaf Ebrahimi   struct parsed_recurse_check *prev;
691*22dc650dSSadaf Ebrahimi   uint32_t *groupptr;
692*22dc650dSSadaf Ebrahimi } parsed_recurse_check;
693*22dc650dSSadaf Ebrahimi 
694*22dc650dSSadaf Ebrahimi /* Structure for building a cache when filling in pattern recursion offsets. */
695*22dc650dSSadaf Ebrahimi 
696*22dc650dSSadaf Ebrahimi typedef struct recurse_cache {
697*22dc650dSSadaf Ebrahimi   PCRE2_SPTR group;
698*22dc650dSSadaf Ebrahimi   int groupnumber;
699*22dc650dSSadaf Ebrahimi } recurse_cache;
700*22dc650dSSadaf Ebrahimi 
701*22dc650dSSadaf Ebrahimi /* Structure for maintaining a chain of pointers to the currently incomplete
702*22dc650dSSadaf Ebrahimi branches, for testing for left recursion while compiling. */
703*22dc650dSSadaf Ebrahimi 
704*22dc650dSSadaf Ebrahimi typedef struct branch_chain {
705*22dc650dSSadaf Ebrahimi   struct branch_chain *outer;
706*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR *current_branch;
707*22dc650dSSadaf Ebrahimi } branch_chain;
708*22dc650dSSadaf Ebrahimi 
709*22dc650dSSadaf Ebrahimi /* Structure for building a list of named groups during the first pass of
710*22dc650dSSadaf Ebrahimi compiling. */
711*22dc650dSSadaf Ebrahimi 
712*22dc650dSSadaf Ebrahimi typedef struct named_group {
713*22dc650dSSadaf Ebrahimi   PCRE2_SPTR   name;          /* Points to the name in the pattern */
714*22dc650dSSadaf Ebrahimi   uint32_t     number;        /* Group number */
715*22dc650dSSadaf Ebrahimi   uint16_t     length;        /* Length of the name */
716*22dc650dSSadaf Ebrahimi   uint16_t     isdup;         /* TRUE if a duplicate */
717*22dc650dSSadaf Ebrahimi } named_group;
718*22dc650dSSadaf Ebrahimi 
719*22dc650dSSadaf Ebrahimi /* Structure for passing "static" information around between the functions
720*22dc650dSSadaf Ebrahimi doing the compiling, so that they are thread-safe. */
721*22dc650dSSadaf Ebrahimi 
722*22dc650dSSadaf Ebrahimi typedef struct compile_block {
723*22dc650dSSadaf Ebrahimi   pcre2_real_compile_context *cx;  /* Points to the compile context */
724*22dc650dSSadaf Ebrahimi   const uint8_t *lcc;              /* Points to lower casing table */
725*22dc650dSSadaf Ebrahimi   const uint8_t *fcc;              /* Points to case-flipping table */
726*22dc650dSSadaf Ebrahimi   const uint8_t *cbits;            /* Points to character type table */
727*22dc650dSSadaf Ebrahimi   const uint8_t *ctypes;           /* Points to table of type maps */
728*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_workspace;      /* The start of working space */
729*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_code;           /* The start of the compiled code */
730*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_pattern;        /* The start of the pattern */
731*22dc650dSSadaf Ebrahimi   PCRE2_SPTR end_pattern;          /* The end of the pattern */
732*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR *name_table;         /* The name/number table */
733*22dc650dSSadaf Ebrahimi   PCRE2_SIZE workspace_size;       /* Size of workspace */
734*22dc650dSSadaf Ebrahimi   PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
735*22dc650dSSadaf Ebrahimi   PCRE2_SIZE erroroffset;          /* Offset of error in pattern */
736*22dc650dSSadaf Ebrahimi   uint16_t names_found;            /* Number of entries so far */
737*22dc650dSSadaf Ebrahimi   uint16_t name_entry_size;        /* Size of each entry */
738*22dc650dSSadaf Ebrahimi   uint16_t parens_depth;           /* Depth of nested parentheses */
739*22dc650dSSadaf Ebrahimi   uint16_t assert_depth;           /* Depth of nested assertions */
740*22dc650dSSadaf Ebrahimi   named_group *named_groups;       /* Points to vector in pre-compile */
741*22dc650dSSadaf Ebrahimi   uint32_t named_group_list_size;  /* Number of entries in the list */
742*22dc650dSSadaf Ebrahimi   uint32_t external_options;       /* External (initial) options */
743*22dc650dSSadaf Ebrahimi   uint32_t external_flags;         /* External flag bits to be set */
744*22dc650dSSadaf Ebrahimi   uint32_t bracount;               /* Count of capturing parentheses */
745*22dc650dSSadaf Ebrahimi   uint32_t lastcapture;            /* Last capture encountered */
746*22dc650dSSadaf Ebrahimi   uint32_t *parsed_pattern;        /* Parsed pattern buffer */
747*22dc650dSSadaf Ebrahimi   uint32_t *parsed_pattern_end;    /* Parsed pattern should not get here */
748*22dc650dSSadaf Ebrahimi   uint32_t *groupinfo;             /* Group info vector */
749*22dc650dSSadaf Ebrahimi   uint32_t top_backref;            /* Maximum back reference */
750*22dc650dSSadaf Ebrahimi   uint32_t backref_map;            /* Bitmap of low back refs */
751*22dc650dSSadaf Ebrahimi   uint32_t nltype;                 /* Newline type */
752*22dc650dSSadaf Ebrahimi   uint32_t nllen;                  /* Newline string length */
753*22dc650dSSadaf Ebrahimi   uint32_t class_range_start;      /* Overall class range start */
754*22dc650dSSadaf Ebrahimi   uint32_t class_range_end;        /* Overall class range end */
755*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR nl[4];               /* Newline string when fixed length */
756*22dc650dSSadaf Ebrahimi   uint32_t req_varyopt;            /* "After variable item" flag for reqbyte */
757*22dc650dSSadaf Ebrahimi   uint32_t max_varlookbehind;      /* Limit for variable lookbehinds */
758*22dc650dSSadaf Ebrahimi   int  max_lookbehind;             /* Maximum lookbehind encountered (characters) */
759*22dc650dSSadaf Ebrahimi   BOOL had_accept;                 /* (*ACCEPT) encountered */
760*22dc650dSSadaf Ebrahimi   BOOL had_pruneorskip;            /* (*PRUNE) or (*SKIP) encountered */
761*22dc650dSSadaf Ebrahimi   BOOL had_recurse;                /* Had a pattern recursion or subroutine call */
762*22dc650dSSadaf Ebrahimi   BOOL dupnames;                   /* Duplicate names exist */
763*22dc650dSSadaf Ebrahimi } compile_block;
764*22dc650dSSadaf Ebrahimi 
765*22dc650dSSadaf Ebrahimi /* Structure for keeping the properties of the in-memory stack used
766*22dc650dSSadaf Ebrahimi by the JIT matcher. */
767*22dc650dSSadaf Ebrahimi 
768*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_jit_stack {
769*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;
770*22dc650dSSadaf Ebrahimi   void* stack;
771*22dc650dSSadaf Ebrahimi } pcre2_real_jit_stack;
772*22dc650dSSadaf Ebrahimi 
773*22dc650dSSadaf Ebrahimi /* Structure for items in a linked list that represents an explicit recursive
774*22dc650dSSadaf Ebrahimi call within the pattern when running pcre2_dfa_match(). */
775*22dc650dSSadaf Ebrahimi 
776*22dc650dSSadaf Ebrahimi typedef struct dfa_recursion_info {
777*22dc650dSSadaf Ebrahimi   struct dfa_recursion_info *prevrec;
778*22dc650dSSadaf Ebrahimi   PCRE2_SPTR subject_position;
779*22dc650dSSadaf Ebrahimi   PCRE2_SPTR last_used_ptr;
780*22dc650dSSadaf Ebrahimi   uint32_t group_num;
781*22dc650dSSadaf Ebrahimi } dfa_recursion_info;
782*22dc650dSSadaf Ebrahimi 
783*22dc650dSSadaf Ebrahimi /* Structure for "stack" frames that are used for remembering backtracking
784*22dc650dSSadaf Ebrahimi positions during matching. As these are used in a vector, with the ovector item
785*22dc650dSSadaf Ebrahimi being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
786*22dc650dSSadaf Ebrahimi only way to check this at compile time is to force an error by generating an
787*22dc650dSSadaf Ebrahimi array with a negative size. By putting this in a typedef (which is never used),
788*22dc650dSSadaf Ebrahimi we don't generate any code when all is well. */
789*22dc650dSSadaf Ebrahimi 
790*22dc650dSSadaf Ebrahimi typedef struct heapframe {
791*22dc650dSSadaf Ebrahimi 
792*22dc650dSSadaf Ebrahimi   /* The first set of fields are variables that have to be preserved over calls
793*22dc650dSSadaf Ebrahimi   to RRMATCH(), but which do not need to be copied to new frames. */
794*22dc650dSSadaf Ebrahimi 
795*22dc650dSSadaf Ebrahimi   PCRE2_SPTR ecode;          /* The current position in the pattern */
796*22dc650dSSadaf Ebrahimi   PCRE2_SPTR temp_sptr[2];   /* Used for short-term PCRE_SPTR values */
797*22dc650dSSadaf Ebrahimi   PCRE2_SIZE length;         /* Used for character, string, or code lengths */
798*22dc650dSSadaf Ebrahimi   PCRE2_SIZE back_frame;     /* Amount to subtract on RRETURN */
799*22dc650dSSadaf Ebrahimi   PCRE2_SIZE temp_size;      /* Used for short-term PCRE2_SIZE values */
800*22dc650dSSadaf Ebrahimi   uint32_t rdepth;           /* Function "recursion" depth within pcre2_match() */
801*22dc650dSSadaf Ebrahimi   uint32_t group_frame_type; /* Type information for group frames */
802*22dc650dSSadaf Ebrahimi   uint32_t temp_32[4];       /* Used for short-term 32-bit or BOOL values */
803*22dc650dSSadaf Ebrahimi   uint8_t return_id;         /* Where to go on in internal "return" */
804*22dc650dSSadaf Ebrahimi   uint8_t op;                /* Processing opcode */
805*22dc650dSSadaf Ebrahimi 
806*22dc650dSSadaf Ebrahimi   /* At this point, the structure is 16-bit aligned. On most architectures
807*22dc650dSSadaf Ebrahimi   the alignment requirement for a pointer will ensure that the eptr field below
808*22dc650dSSadaf Ebrahimi   is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
809*22dc650dSSadaf Ebrahimi   that is 16-bit aligned. We must therefore ensure that what comes between here
810*22dc650dSSadaf Ebrahimi   and eptr is an odd multiple of 16 bits so as to get back into 32-bit
811*22dc650dSSadaf Ebrahimi   alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
812*22dc650dSSadaf Ebrahimi   fudges in the other cases. In the 32-bit case the padding comes first so that
813*22dc650dSSadaf Ebrahimi   the occu field itself is 32-bit aligned. Without the padding, this structure
814*22dc650dSSadaf Ebrahimi   is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
815*22dc650dSSadaf Ebrahimi 
816*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
817*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR occu[6];       /* Used for other case code units */
818*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
819*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR occu[2];       /* Used for other case code units */
820*22dc650dSSadaf Ebrahimi   uint8_t unused[2];         /* Ensure 32-bit alignment (see above) */
821*22dc650dSSadaf Ebrahimi #else
822*22dc650dSSadaf Ebrahimi   uint8_t unused[2];         /* Ensure 32-bit alignment (see above) */
823*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR occu[1];       /* Used for other case code units */
824*22dc650dSSadaf Ebrahimi #endif
825*22dc650dSSadaf Ebrahimi 
826*22dc650dSSadaf Ebrahimi   /* The rest have to be copied from the previous frame whenever a new frame
827*22dc650dSSadaf Ebrahimi   becomes current. The final field is specified as a large vector so that
828*22dc650dSSadaf Ebrahimi   runtime array bound checks don't catch references to it. However, for any
829*22dc650dSSadaf Ebrahimi   specific call to pcre2_match() the memory allocated for each frame structure
830*22dc650dSSadaf Ebrahimi   allows for exactly the right size ovector for the number of capturing
831*22dc650dSSadaf Ebrahimi   parentheses. (See also the comment for pcre2_real_match_data above.) */
832*22dc650dSSadaf Ebrahimi 
833*22dc650dSSadaf Ebrahimi   PCRE2_SPTR eptr;              /* MUST BE FIRST */
834*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_match;       /* Can be adjusted by \K */
835*22dc650dSSadaf Ebrahimi   PCRE2_SPTR mark;              /* Most recent mark on the success path */
836*22dc650dSSadaf Ebrahimi   PCRE2_SPTR recurse_last_used; /* Last character used at time of pattern recursion */
837*22dc650dSSadaf Ebrahimi   uint32_t current_recurse;     /* Group number of current (deepest) pattern recursion */
838*22dc650dSSadaf Ebrahimi   uint32_t capture_last;        /* Most recent capture */
839*22dc650dSSadaf Ebrahimi   PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
840*22dc650dSSadaf Ebrahimi   PCRE2_SIZE offset_top;        /* Offset after highest capture */
841*22dc650dSSadaf Ebrahimi   PCRE2_SIZE ovector[131072];   /* Must be last in the structure */
842*22dc650dSSadaf Ebrahimi } heapframe;
843*22dc650dSSadaf Ebrahimi 
844*22dc650dSSadaf Ebrahimi /* This typedef is a check that the size of the heapframe structure is a
845*22dc650dSSadaf Ebrahimi multiple of PCRE2_SIZE. See various comments above. */
846*22dc650dSSadaf Ebrahimi 
847*22dc650dSSadaf Ebrahimi typedef char check_heapframe_size[
848*22dc650dSSadaf Ebrahimi   ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
849*22dc650dSSadaf Ebrahimi 
850*22dc650dSSadaf Ebrahimi /* Structure for computing the alignment of heapframe. */
851*22dc650dSSadaf Ebrahimi 
852*22dc650dSSadaf Ebrahimi typedef struct heapframe_align {
853*22dc650dSSadaf Ebrahimi   char unalign;    /* Completely unalign the current offset */
854*22dc650dSSadaf Ebrahimi   heapframe frame; /* Offset is its alignment */
855*22dc650dSSadaf Ebrahimi } heapframe_align;
856*22dc650dSSadaf Ebrahimi 
857*22dc650dSSadaf Ebrahimi /* This define is the minimum alignment required for a heapframe, in bytes. */
858*22dc650dSSadaf Ebrahimi 
859*22dc650dSSadaf Ebrahimi #define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
860*22dc650dSSadaf Ebrahimi 
861*22dc650dSSadaf Ebrahimi /* Structure for passing "static" information around between the functions
862*22dc650dSSadaf Ebrahimi doing traditional NFA matching (pcre2_match() and friends). */
863*22dc650dSSadaf Ebrahimi 
864*22dc650dSSadaf Ebrahimi typedef struct match_block {
865*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;            /* For general use */
866*22dc650dSSadaf Ebrahimi   uint32_t heap_limit;            /* As it says */
867*22dc650dSSadaf Ebrahimi   uint32_t match_limit;           /* As it says */
868*22dc650dSSadaf Ebrahimi   uint32_t match_limit_depth;     /* As it says */
869*22dc650dSSadaf Ebrahimi   uint32_t match_call_count;      /* Number of times a new frame is created */
870*22dc650dSSadaf Ebrahimi   BOOL hitend;                    /* Hit the end of the subject at some point */
871*22dc650dSSadaf Ebrahimi   BOOL hasthen;                   /* Pattern contains (*THEN) */
872*22dc650dSSadaf Ebrahimi   BOOL allowemptypartial;         /* Allow empty hard partial */
873*22dc650dSSadaf Ebrahimi   const uint8_t *lcc;             /* Points to lower casing table */
874*22dc650dSSadaf Ebrahimi   const uint8_t *fcc;             /* Points to case-flipping table */
875*22dc650dSSadaf Ebrahimi   const uint8_t *ctypes;          /* Points to table of type maps */
876*22dc650dSSadaf Ebrahimi   PCRE2_SIZE start_offset;        /* The start offset value */
877*22dc650dSSadaf Ebrahimi   PCRE2_SIZE end_offset_top;      /* Highwater mark at end of match */
878*22dc650dSSadaf Ebrahimi   uint16_t partial;               /* PARTIAL options */
879*22dc650dSSadaf Ebrahimi   uint16_t bsr_convention;        /* \R interpretation */
880*22dc650dSSadaf Ebrahimi   uint16_t name_count;            /* Number of names in name table */
881*22dc650dSSadaf Ebrahimi   uint16_t name_entry_size;       /* Size of entry in names table */
882*22dc650dSSadaf Ebrahimi   PCRE2_SPTR name_table;          /* Table of group names */
883*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_code;          /* For use in pattern recursion */
884*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_subject;       /* Start of the subject string */
885*22dc650dSSadaf Ebrahimi   PCRE2_SPTR check_subject;       /* Where UTF-checked from */
886*22dc650dSSadaf Ebrahimi   PCRE2_SPTR end_subject;         /* Usable end of the subject string */
887*22dc650dSSadaf Ebrahimi   PCRE2_SPTR true_end_subject;    /* Actual end of the subject string */
888*22dc650dSSadaf Ebrahimi   PCRE2_SPTR end_match_ptr;       /* Subject position at end match */
889*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_used_ptr;      /* Earliest consulted character */
890*22dc650dSSadaf Ebrahimi   PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
891*22dc650dSSadaf Ebrahimi   PCRE2_SPTR mark;                /* Mark pointer to pass back on success */
892*22dc650dSSadaf Ebrahimi   PCRE2_SPTR nomatch_mark;        /* Mark pointer to pass back on failure */
893*22dc650dSSadaf Ebrahimi   PCRE2_SPTR verb_ecode_ptr;      /* For passing back info */
894*22dc650dSSadaf Ebrahimi   PCRE2_SPTR verb_skip_ptr;       /* For passing back a (*SKIP) name */
895*22dc650dSSadaf Ebrahimi   uint32_t verb_current_recurse;  /* Current recursion group when (*VERB) happens */
896*22dc650dSSadaf Ebrahimi   uint32_t moptions;              /* Match options */
897*22dc650dSSadaf Ebrahimi   uint32_t poptions;              /* Pattern options */
898*22dc650dSSadaf Ebrahimi   uint32_t skip_arg_count;        /* For counting SKIP_ARGs */
899*22dc650dSSadaf Ebrahimi   uint32_t ignore_skip_arg;       /* For re-run when SKIP arg name not found */
900*22dc650dSSadaf Ebrahimi   uint32_t nltype;                /* Newline type */
901*22dc650dSSadaf Ebrahimi   uint32_t nllen;                 /* Newline string length */
902*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR nl[4];              /* Newline string when fixed */
903*22dc650dSSadaf Ebrahimi   pcre2_callout_block *cb;        /* Points to a callout block */
904*22dc650dSSadaf Ebrahimi   void  *callout_data;            /* To pass back to callouts */
905*22dc650dSSadaf Ebrahimi   int (*callout)(pcre2_callout_block *,void *);  /* Callout function or NULL */
906*22dc650dSSadaf Ebrahimi } match_block;
907*22dc650dSSadaf Ebrahimi 
908*22dc650dSSadaf Ebrahimi /* A similar structure is used for the same purpose by the DFA matching
909*22dc650dSSadaf Ebrahimi functions. */
910*22dc650dSSadaf Ebrahimi 
911*22dc650dSSadaf Ebrahimi typedef struct dfa_match_block {
912*22dc650dSSadaf Ebrahimi   pcre2_memctl memctl;            /* For general use */
913*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_code;          /* Start of the compiled pattern */
914*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_subject ;      /* Start of the subject string */
915*22dc650dSSadaf Ebrahimi   PCRE2_SPTR end_subject;         /* End of subject string */
916*22dc650dSSadaf Ebrahimi   PCRE2_SPTR start_used_ptr;      /* Earliest consulted character */
917*22dc650dSSadaf Ebrahimi   PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
918*22dc650dSSadaf Ebrahimi   const uint8_t *tables;          /* Character tables */
919*22dc650dSSadaf Ebrahimi   PCRE2_SIZE start_offset;        /* The start offset value */
920*22dc650dSSadaf Ebrahimi   uint32_t heap_limit;            /* As it says */
921*22dc650dSSadaf Ebrahimi   PCRE2_SIZE heap_used;           /* As it says */
922*22dc650dSSadaf Ebrahimi   uint32_t match_limit;           /* As it says */
923*22dc650dSSadaf Ebrahimi   uint32_t match_limit_depth;     /* As it says */
924*22dc650dSSadaf Ebrahimi   uint32_t match_call_count;      /* Number of calls of internal function */
925*22dc650dSSadaf Ebrahimi   uint32_t moptions;              /* Match options */
926*22dc650dSSadaf Ebrahimi   uint32_t poptions;              /* Pattern options */
927*22dc650dSSadaf Ebrahimi   uint32_t nltype;                /* Newline type */
928*22dc650dSSadaf Ebrahimi   uint32_t nllen;                 /* Newline string length */
929*22dc650dSSadaf Ebrahimi   BOOL allowemptypartial;         /* Allow empty hard partial */
930*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR nl[4];              /* Newline string when fixed */
931*22dc650dSSadaf Ebrahimi   uint16_t bsr_convention;        /* \R interpretation */
932*22dc650dSSadaf Ebrahimi   pcre2_callout_block *cb;        /* Points to a callout block */
933*22dc650dSSadaf Ebrahimi   void *callout_data;             /* To pass back to callouts */
934*22dc650dSSadaf Ebrahimi   int (*callout)(pcre2_callout_block *,void *);  /* Callout function or NULL */
935*22dc650dSSadaf Ebrahimi   dfa_recursion_info *recursive;  /* Linked list of pattern recursion data */
936*22dc650dSSadaf Ebrahimi } dfa_match_block;
937*22dc650dSSadaf Ebrahimi 
938*22dc650dSSadaf Ebrahimi #endif  /* PCRE2_PCRE2TEST */
939*22dc650dSSadaf Ebrahimi 
940*22dc650dSSadaf Ebrahimi /* End of pcre2_intmodedep.h */
941