1*22dc650dSSadaf Ebrahimi /************************************************* 2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions * 3*22dc650dSSadaf Ebrahimi *************************************************/ 4*22dc650dSSadaf Ebrahimi 5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax 6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language. 7*22dc650dSSadaf Ebrahimi 8*22dc650dSSadaf Ebrahimi Written by Philip Hazel 9*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge 10*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016-2024 University of Cambridge 11*22dc650dSSadaf Ebrahimi 12*22dc650dSSadaf Ebrahimi ----------------------------------------------------------------------------- 13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without 14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met: 15*22dc650dSSadaf Ebrahimi 16*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice, 17*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer. 18*22dc650dSSadaf Ebrahimi 19*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright 20*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the 21*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution. 22*22dc650dSSadaf Ebrahimi 23*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its 24*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from 25*22dc650dSSadaf Ebrahimi this software without specific prior written permission. 26*22dc650dSSadaf Ebrahimi 27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE. 38*22dc650dSSadaf Ebrahimi ----------------------------------------------------------------------------- 39*22dc650dSSadaf Ebrahimi */ 40*22dc650dSSadaf Ebrahimi 41*22dc650dSSadaf Ebrahimi 42*22dc650dSSadaf Ebrahimi /* This module contains mode-dependent macro and structure definitions. The 43*22dc650dSSadaf Ebrahimi file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined. 44*22dc650dSSadaf Ebrahimi These mode-dependent items are kept in a separate file so that they can also be 45*22dc650dSSadaf Ebrahimi #included multiple times for different code unit widths by pcre2test in order 46*22dc650dSSadaf Ebrahimi to have access to the hidden structures at all supported widths. 47*22dc650dSSadaf Ebrahimi 48*22dc650dSSadaf Ebrahimi Some of the mode-dependent macros are required at different widths for 49*22dc650dSSadaf Ebrahimi different parts of the pcre2test code (in particular, the included 50*22dc650dSSadaf Ebrahimi pcre_printint.c file). We undefine them here so that they can be re-defined for 51*22dc650dSSadaf Ebrahimi multiple inclusions. Not all of these are used in pcre2test, but it's easier 52*22dc650dSSadaf Ebrahimi just to undefine them all. */ 53*22dc650dSSadaf Ebrahimi 54*22dc650dSSadaf Ebrahimi #undef ACROSSCHAR 55*22dc650dSSadaf Ebrahimi #undef BACKCHAR 56*22dc650dSSadaf Ebrahimi #undef BYTES2CU 57*22dc650dSSadaf Ebrahimi #undef CHMAX_255 58*22dc650dSSadaf Ebrahimi #undef CU2BYTES 59*22dc650dSSadaf Ebrahimi #undef FORWARDCHAR 60*22dc650dSSadaf Ebrahimi #undef FORWARDCHARTEST 61*22dc650dSSadaf Ebrahimi #undef GET 62*22dc650dSSadaf Ebrahimi #undef GET2 63*22dc650dSSadaf Ebrahimi #undef GETCHAR 64*22dc650dSSadaf Ebrahimi #undef GETCHARINC 65*22dc650dSSadaf Ebrahimi #undef GETCHARINCTEST 66*22dc650dSSadaf Ebrahimi #undef GETCHARLEN 67*22dc650dSSadaf Ebrahimi #undef GETCHARLENTEST 68*22dc650dSSadaf Ebrahimi #undef GETCHARTEST 69*22dc650dSSadaf Ebrahimi #undef GET_EXTRALEN 70*22dc650dSSadaf Ebrahimi #undef HAS_EXTRALEN 71*22dc650dSSadaf Ebrahimi #undef IMM2_SIZE 72*22dc650dSSadaf Ebrahimi #undef MAX_255 73*22dc650dSSadaf Ebrahimi #undef MAX_MARK 74*22dc650dSSadaf Ebrahimi #undef MAX_PATTERN_SIZE 75*22dc650dSSadaf Ebrahimi #undef MAX_UTF_SINGLE_CU 76*22dc650dSSadaf Ebrahimi #undef NOT_FIRSTCU 77*22dc650dSSadaf Ebrahimi #undef PUT 78*22dc650dSSadaf Ebrahimi #undef PUT2 79*22dc650dSSadaf Ebrahimi #undef PUT2INC 80*22dc650dSSadaf Ebrahimi #undef PUTCHAR 81*22dc650dSSadaf Ebrahimi #undef PUTINC 82*22dc650dSSadaf Ebrahimi #undef TABLE_GET 83*22dc650dSSadaf Ebrahimi 84*22dc650dSSadaf Ebrahimi 85*22dc650dSSadaf Ebrahimi 86*22dc650dSSadaf Ebrahimi /* -------------------------- MACROS ----------------------------- */ 87*22dc650dSSadaf Ebrahimi 88*22dc650dSSadaf Ebrahimi /* PCRE keeps offsets in its compiled code as at least 16-bit quantities 89*22dc650dSSadaf Ebrahimi (always stored in big-endian order in 8-bit mode) by default. These are used, 90*22dc650dSSadaf Ebrahimi for example, to link from the start of a subpattern to its alternatives and its 91*22dc650dSSadaf Ebrahimi end. The use of 16 bits per offset limits the size of an 8-bit compiled regex 92*22dc650dSSadaf Ebrahimi to around 64K, which is big enough for almost everybody. However, I received a 93*22dc650dSSadaf Ebrahimi request for an even bigger limit. For this reason, and also to make the code 94*22dc650dSSadaf Ebrahimi easier to maintain, the storing and loading of offsets from the compiled code 95*22dc650dSSadaf Ebrahimi unit string is now handled by the macros that are defined here. 96*22dc650dSSadaf Ebrahimi 97*22dc650dSSadaf Ebrahimi The macros are controlled by the value of LINK_SIZE. This defaults to 2, but 98*22dc650dSSadaf Ebrahimi values of 3 or 4 are also supported. */ 99*22dc650dSSadaf Ebrahimi 100*22dc650dSSadaf Ebrahimi /* ------------------- 8-bit support ------------------ */ 101*22dc650dSSadaf Ebrahimi 102*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 103*22dc650dSSadaf Ebrahimi 104*22dc650dSSadaf Ebrahimi #if LINK_SIZE == 2 105*22dc650dSSadaf Ebrahimi #define PUT(a,n,d) \ 106*22dc650dSSadaf Ebrahimi (a[n] = (PCRE2_UCHAR)((d) >> 8)), \ 107*22dc650dSSadaf Ebrahimi (a[(n)+1] = (PCRE2_UCHAR)((d) & 255)) 108*22dc650dSSadaf Ebrahimi #define GET(a,n) \ 109*22dc650dSSadaf Ebrahimi (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) 110*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 16) 111*22dc650dSSadaf Ebrahimi 112*22dc650dSSadaf Ebrahimi #elif LINK_SIZE == 3 113*22dc650dSSadaf Ebrahimi #define PUT(a,n,d) \ 114*22dc650dSSadaf Ebrahimi (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ 115*22dc650dSSadaf Ebrahimi (a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \ 116*22dc650dSSadaf Ebrahimi (a[(n)+2] = (PCRE2_UCHAR)((d) & 255)) 117*22dc650dSSadaf Ebrahimi #define GET(a,n) \ 118*22dc650dSSadaf Ebrahimi (unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) 119*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 24) 120*22dc650dSSadaf Ebrahimi 121*22dc650dSSadaf Ebrahimi #elif LINK_SIZE == 4 122*22dc650dSSadaf Ebrahimi #define PUT(a,n,d) \ 123*22dc650dSSadaf Ebrahimi (a[n] = (PCRE2_UCHAR)((d) >> 24)), \ 124*22dc650dSSadaf Ebrahimi (a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \ 125*22dc650dSSadaf Ebrahimi (a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \ 126*22dc650dSSadaf Ebrahimi (a[(n)+3] = (PCRE2_UCHAR)((d) & 255)) 127*22dc650dSSadaf Ebrahimi #define GET(a,n) \ 128*22dc650dSSadaf Ebrahimi (unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) 129*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ 130*22dc650dSSadaf Ebrahimi 131*22dc650dSSadaf Ebrahimi #else 132*22dc650dSSadaf Ebrahimi #error LINK_SIZE must be 2, 3, or 4 133*22dc650dSSadaf Ebrahimi #endif 134*22dc650dSSadaf Ebrahimi 135*22dc650dSSadaf Ebrahimi 136*22dc650dSSadaf Ebrahimi /* ------------------- 16-bit support ------------------ */ 137*22dc650dSSadaf Ebrahimi 138*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16 139*22dc650dSSadaf Ebrahimi 140*22dc650dSSadaf Ebrahimi #if LINK_SIZE == 2 141*22dc650dSSadaf Ebrahimi #undef LINK_SIZE 142*22dc650dSSadaf Ebrahimi #define LINK_SIZE 1 143*22dc650dSSadaf Ebrahimi #define PUT(a,n,d) \ 144*22dc650dSSadaf Ebrahimi (a[n] = (PCRE2_UCHAR)(d)) 145*22dc650dSSadaf Ebrahimi #define GET(a,n) \ 146*22dc650dSSadaf Ebrahimi (a[n]) 147*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 16) 148*22dc650dSSadaf Ebrahimi 149*22dc650dSSadaf Ebrahimi #elif LINK_SIZE == 3 || LINK_SIZE == 4 150*22dc650dSSadaf Ebrahimi #undef LINK_SIZE 151*22dc650dSSadaf Ebrahimi #define LINK_SIZE 2 152*22dc650dSSadaf Ebrahimi #define PUT(a,n,d) \ 153*22dc650dSSadaf Ebrahimi (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ 154*22dc650dSSadaf Ebrahimi (a[(n)+1] = (PCRE2_UCHAR)((d) & 65535)) 155*22dc650dSSadaf Ebrahimi #define GET(a,n) \ 156*22dc650dSSadaf Ebrahimi (unsigned int)(((a)[n] << 16) | (a)[(n)+1]) 157*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ 158*22dc650dSSadaf Ebrahimi 159*22dc650dSSadaf Ebrahimi #else 160*22dc650dSSadaf Ebrahimi #error LINK_SIZE must be 2, 3, or 4 161*22dc650dSSadaf Ebrahimi #endif 162*22dc650dSSadaf Ebrahimi 163*22dc650dSSadaf Ebrahimi 164*22dc650dSSadaf Ebrahimi /* ------------------- 32-bit support ------------------ */ 165*22dc650dSSadaf Ebrahimi 166*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 32 167*22dc650dSSadaf Ebrahimi #undef LINK_SIZE 168*22dc650dSSadaf Ebrahimi #define LINK_SIZE 1 169*22dc650dSSadaf Ebrahimi #define PUT(a,n,d) \ 170*22dc650dSSadaf Ebrahimi (a[n] = (d)) 171*22dc650dSSadaf Ebrahimi #define GET(a,n) \ 172*22dc650dSSadaf Ebrahimi (a[n]) 173*22dc650dSSadaf Ebrahimi #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ 174*22dc650dSSadaf Ebrahimi 175*22dc650dSSadaf Ebrahimi #else 176*22dc650dSSadaf Ebrahimi #error Unsupported compiling mode 177*22dc650dSSadaf Ebrahimi #endif 178*22dc650dSSadaf Ebrahimi 179*22dc650dSSadaf Ebrahimi 180*22dc650dSSadaf Ebrahimi /* --------------- Other mode-specific macros ----------------- */ 181*22dc650dSSadaf Ebrahimi 182*22dc650dSSadaf Ebrahimi /* PCRE uses some other (at least) 16-bit quantities that do not change when 183*22dc650dSSadaf Ebrahimi the size of offsets changes. There are used for repeat counts and for other 184*22dc650dSSadaf Ebrahimi things such as capturing parenthesis numbers in back references. 185*22dc650dSSadaf Ebrahimi 186*22dc650dSSadaf Ebrahimi Define the number of code units required to hold a 16-bit count/offset, and 187*22dc650dSSadaf Ebrahimi macros to load and store such a value. For reasons that I do not understand, 188*22dc650dSSadaf Ebrahimi the expression in the 8-bit GET2 macro is treated by gcc as a signed 189*22dc650dSSadaf Ebrahimi expression, even when a is declared as unsigned. It seems that any kind of 190*22dc650dSSadaf Ebrahimi arithmetic results in a signed value. Hence the cast. */ 191*22dc650dSSadaf Ebrahimi 192*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 193*22dc650dSSadaf Ebrahimi #define IMM2_SIZE 2 194*22dc650dSSadaf Ebrahimi #define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) 195*22dc650dSSadaf Ebrahimi #define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255 196*22dc650dSSadaf Ebrahimi 197*22dc650dSSadaf Ebrahimi #else /* Code units are 16 or 32 bits */ 198*22dc650dSSadaf Ebrahimi #define IMM2_SIZE 1 199*22dc650dSSadaf Ebrahimi #define GET2(a,n) a[n] 200*22dc650dSSadaf Ebrahimi #define PUT2(a,n,d) a[n] = d 201*22dc650dSSadaf Ebrahimi #endif 202*22dc650dSSadaf Ebrahimi 203*22dc650dSSadaf Ebrahimi /* Other macros that are different for 8-bit mode. The MAX_255 macro checks 204*22dc650dSSadaf Ebrahimi whether its argument, which is assumed to be one code unit, is less than 256. 205*22dc650dSSadaf Ebrahimi The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK 206*22dc650dSSadaf Ebrahimi name must fit in one code unit; currently it is set to 255 or 65535. The 207*22dc650dSSadaf Ebrahimi TABLE_GET macro is used to access elements of tables containing exactly 256 208*22dc650dSSadaf Ebrahimi items. Its argument is a code unit. When code points can be greater than 255, a 209*22dc650dSSadaf Ebrahimi check is needed before accessing these tables. */ 210*22dc650dSSadaf Ebrahimi 211*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 212*22dc650dSSadaf Ebrahimi #define MAX_255(c) TRUE 213*22dc650dSSadaf Ebrahimi #define MAX_MARK ((1u << 8) - 1) 214*22dc650dSSadaf Ebrahimi #define TABLE_GET(c, table, default) ((table)[c]) 215*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE 216*22dc650dSSadaf Ebrahimi #define SUPPORT_WIDE_CHARS 217*22dc650dSSadaf Ebrahimi #define CHMAX_255(c) ((c) <= 255u) 218*22dc650dSSadaf Ebrahimi #else 219*22dc650dSSadaf Ebrahimi #define CHMAX_255(c) TRUE 220*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */ 221*22dc650dSSadaf Ebrahimi 222*22dc650dSSadaf Ebrahimi #else /* Code units are 16 or 32 bits */ 223*22dc650dSSadaf Ebrahimi #define CHMAX_255(c) ((c) <= 255u) 224*22dc650dSSadaf Ebrahimi #define MAX_255(c) ((c) <= 255u) 225*22dc650dSSadaf Ebrahimi #define MAX_MARK ((1u << 16) - 1) 226*22dc650dSSadaf Ebrahimi #define SUPPORT_WIDE_CHARS 227*22dc650dSSadaf Ebrahimi #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) 228*22dc650dSSadaf Ebrahimi #endif 229*22dc650dSSadaf Ebrahimi 230*22dc650dSSadaf Ebrahimi 231*22dc650dSSadaf Ebrahimi /* ----------------- Character-handling macros ----------------- */ 232*22dc650dSSadaf Ebrahimi 233*22dc650dSSadaf Ebrahimi /* There is a proposed future special "UTF-21" mode, in which only the lowest 234*22dc650dSSadaf Ebrahimi 21 bits of a 32-bit character are interpreted as UTF, with the remaining 11 235*22dc650dSSadaf Ebrahimi high-order bits available to the application for other uses. In preparation for 236*22dc650dSSadaf Ebrahimi the future implementation of this mode, there are macros that load a data item 237*22dc650dSSadaf Ebrahimi and, if in this special mode, mask it to 21 bits. These macros all have names 238*22dc650dSSadaf Ebrahimi starting with UCHAR21. In all other modes, including the normal 32-bit 239*22dc650dSSadaf Ebrahimi library, the macros all have the same simple definitions. When the new mode is 240*22dc650dSSadaf Ebrahimi implemented, it is expected that these definitions will be varied appropriately 241*22dc650dSSadaf Ebrahimi using #ifdef when compiling the library that supports the special mode. */ 242*22dc650dSSadaf Ebrahimi 243*22dc650dSSadaf Ebrahimi #define UCHAR21(eptr) (*(eptr)) 244*22dc650dSSadaf Ebrahimi #define UCHAR21TEST(eptr) (*(eptr)) 245*22dc650dSSadaf Ebrahimi #define UCHAR21INC(eptr) (*(eptr)++) 246*22dc650dSSadaf Ebrahimi #define UCHAR21INCTEST(eptr) (*(eptr)++) 247*22dc650dSSadaf Ebrahimi 248*22dc650dSSadaf Ebrahimi /* When UTF encoding is being used, a character is no longer just a single 249*22dc650dSSadaf Ebrahimi byte in 8-bit mode or a single short in 16-bit mode. The macros for character 250*22dc650dSSadaf Ebrahimi handling generate simple sequences when used in the basic mode, and more 251*22dc650dSSadaf Ebrahimi complicated ones for UTF characters. GETCHARLENTEST and other macros are not 252*22dc650dSSadaf Ebrahimi used when UTF is not supported. To make sure they can never even appear when 253*22dc650dSSadaf Ebrahimi UTF support is omitted, we don't even define them. */ 254*22dc650dSSadaf Ebrahimi 255*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE 256*22dc650dSSadaf Ebrahimi 257*22dc650dSSadaf Ebrahimi /* #define MAX_UTF_SINGLE_CU */ 258*22dc650dSSadaf Ebrahimi /* #define HAS_EXTRALEN(c) */ 259*22dc650dSSadaf Ebrahimi /* #define GET_EXTRALEN(c) */ 260*22dc650dSSadaf Ebrahimi /* #define NOT_FIRSTCU(c) */ 261*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) c = *eptr; 262*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) c = *eptr; 263*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) c = *eptr++; 264*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) c = *eptr++; 265*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) c = *eptr; 266*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) (*p = c, 1) 267*22dc650dSSadaf Ebrahimi /* #define GETCHARLENTEST(c, eptr, len) */ 268*22dc650dSSadaf Ebrahimi /* #define BACKCHAR(eptr) */ 269*22dc650dSSadaf Ebrahimi /* #define FORWARDCHAR(eptr) */ 270*22dc650dSSadaf Ebrahimi /* #define FORWARCCHARTEST(eptr,end) */ 271*22dc650dSSadaf Ebrahimi /* #define ACROSSCHAR(condition, eptr, action) */ 272*22dc650dSSadaf Ebrahimi 273*22dc650dSSadaf Ebrahimi #else /* SUPPORT_UNICODE */ 274*22dc650dSSadaf Ebrahimi 275*22dc650dSSadaf Ebrahimi /* ------------------- 8-bit support ------------------ */ 276*22dc650dSSadaf Ebrahimi 277*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 278*22dc650dSSadaf Ebrahimi #define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ 279*22dc650dSSadaf Ebrahimi 280*22dc650dSSadaf Ebrahimi /* The largest UTF code point that can be encoded as a single code unit. */ 281*22dc650dSSadaf Ebrahimi 282*22dc650dSSadaf Ebrahimi #define MAX_UTF_SINGLE_CU 127 283*22dc650dSSadaf Ebrahimi 284*22dc650dSSadaf Ebrahimi /* Tests whether the code point needs extra characters to decode. */ 285*22dc650dSSadaf Ebrahimi 286*22dc650dSSadaf Ebrahimi #define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c) 287*22dc650dSSadaf Ebrahimi 288*22dc650dSSadaf Ebrahimi /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. 289*22dc650dSSadaf Ebrahimi Otherwise it has an undefined behaviour. */ 290*22dc650dSSadaf Ebrahimi 291*22dc650dSSadaf Ebrahimi #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu]) 292*22dc650dSSadaf Ebrahimi 293*22dc650dSSadaf Ebrahimi /* Returns TRUE, if the given value is not the first code unit of a UTF 294*22dc650dSSadaf Ebrahimi sequence. */ 295*22dc650dSSadaf Ebrahimi 296*22dc650dSSadaf Ebrahimi #define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u) 297*22dc650dSSadaf Ebrahimi 298*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, not advancing the pointer. This is called when 299*22dc650dSSadaf Ebrahimi we know we are in UTF-8 mode. */ 300*22dc650dSSadaf Ebrahimi 301*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) \ 302*22dc650dSSadaf Ebrahimi c = *eptr; \ 303*22dc650dSSadaf Ebrahimi if (c >= 0xc0u) GETUTF8(c, eptr); 304*22dc650dSSadaf Ebrahimi 305*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the 306*22dc650dSSadaf Ebrahimi pointer. */ 307*22dc650dSSadaf Ebrahimi 308*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) \ 309*22dc650dSSadaf Ebrahimi c = *eptr; \ 310*22dc650dSSadaf Ebrahimi if (utf && c >= 0xc0u) GETUTF8(c, eptr); 311*22dc650dSSadaf Ebrahimi 312*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, advancing the pointer. This is called when we 313*22dc650dSSadaf Ebrahimi know we are in UTF-8 mode. */ 314*22dc650dSSadaf Ebrahimi 315*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) \ 316*22dc650dSSadaf Ebrahimi c = *eptr++; \ 317*22dc650dSSadaf Ebrahimi if (c >= 0xc0u) GETUTF8INC(c, eptr); 318*22dc650dSSadaf Ebrahimi 319*22dc650dSSadaf Ebrahimi /* Get the next character, testing for UTF-8 mode, and advancing the pointer. 320*22dc650dSSadaf Ebrahimi This is called when we don't know if we are in UTF-8 mode. */ 321*22dc650dSSadaf Ebrahimi 322*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) \ 323*22dc650dSSadaf Ebrahimi c = *eptr++; \ 324*22dc650dSSadaf Ebrahimi if (utf && c >= 0xc0u) GETUTF8INC(c, eptr); 325*22dc650dSSadaf Ebrahimi 326*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, not advancing the pointer, incrementing length 327*22dc650dSSadaf Ebrahimi if there are extra bytes. This is called when we know we are in UTF-8 mode. */ 328*22dc650dSSadaf Ebrahimi 329*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) \ 330*22dc650dSSadaf Ebrahimi c = *eptr; \ 331*22dc650dSSadaf Ebrahimi if (c >= 0xc0u) GETUTF8LEN(c, eptr, len); 332*22dc650dSSadaf Ebrahimi 333*22dc650dSSadaf Ebrahimi /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the 334*22dc650dSSadaf Ebrahimi pointer, incrementing length if there are extra bytes. This is called when we 335*22dc650dSSadaf Ebrahimi do not know if we are in UTF-8 mode. */ 336*22dc650dSSadaf Ebrahimi 337*22dc650dSSadaf Ebrahimi #define GETCHARLENTEST(c, eptr, len) \ 338*22dc650dSSadaf Ebrahimi c = *eptr; \ 339*22dc650dSSadaf Ebrahimi if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len); 340*22dc650dSSadaf Ebrahimi 341*22dc650dSSadaf Ebrahimi /* If the pointer is not at the start of a character, move it back until 342*22dc650dSSadaf Ebrahimi it is. This is called only in UTF-8 mode - we don't put a test within the macro 343*22dc650dSSadaf Ebrahimi because almost all calls are already within a block of UTF-8 only code. */ 344*22dc650dSSadaf Ebrahimi 345*22dc650dSSadaf Ebrahimi #define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr-- 346*22dc650dSSadaf Ebrahimi 347*22dc650dSSadaf Ebrahimi /* Same as above, just in the other direction. */ 348*22dc650dSSadaf Ebrahimi #define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++ 349*22dc650dSSadaf Ebrahimi #define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++ 350*22dc650dSSadaf Ebrahimi 351*22dc650dSSadaf Ebrahimi /* Same as above, but it allows a fully customizable form. */ 352*22dc650dSSadaf Ebrahimi #define ACROSSCHAR(condition, eptr, action) \ 353*22dc650dSSadaf Ebrahimi while((condition) && ((*eptr) & 0xc0u) == 0x80u) action 354*22dc650dSSadaf Ebrahimi 355*22dc650dSSadaf Ebrahimi /* Deposit a character into memory, returning the number of code units. */ 356*22dc650dSSadaf Ebrahimi 357*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ 358*22dc650dSSadaf Ebrahimi PRIV(ord2utf)(c,p) : (*p = c, 1)) 359*22dc650dSSadaf Ebrahimi 360*22dc650dSSadaf Ebrahimi 361*22dc650dSSadaf Ebrahimi /* ------------------- 16-bit support ------------------ */ 362*22dc650dSSadaf Ebrahimi 363*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16 364*22dc650dSSadaf Ebrahimi #define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ 365*22dc650dSSadaf Ebrahimi 366*22dc650dSSadaf Ebrahimi /* The largest UTF code point that can be encoded as a single code unit. */ 367*22dc650dSSadaf Ebrahimi 368*22dc650dSSadaf Ebrahimi #define MAX_UTF_SINGLE_CU 65535 369*22dc650dSSadaf Ebrahimi 370*22dc650dSSadaf Ebrahimi /* Tests whether the code point needs extra characters to decode. */ 371*22dc650dSSadaf Ebrahimi 372*22dc650dSSadaf Ebrahimi #define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u) 373*22dc650dSSadaf Ebrahimi 374*22dc650dSSadaf Ebrahimi /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. 375*22dc650dSSadaf Ebrahimi Otherwise it has an undefined behaviour. */ 376*22dc650dSSadaf Ebrahimi 377*22dc650dSSadaf Ebrahimi #define GET_EXTRALEN(c) 1 378*22dc650dSSadaf Ebrahimi 379*22dc650dSSadaf Ebrahimi /* Returns TRUE, if the given value is not the first code unit of a UTF 380*22dc650dSSadaf Ebrahimi sequence. */ 381*22dc650dSSadaf Ebrahimi 382*22dc650dSSadaf Ebrahimi #define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u) 383*22dc650dSSadaf Ebrahimi 384*22dc650dSSadaf Ebrahimi /* Base macro to pick up the low surrogate of a UTF-16 character, not 385*22dc650dSSadaf Ebrahimi advancing the pointer. */ 386*22dc650dSSadaf Ebrahimi 387*22dc650dSSadaf Ebrahimi #define GETUTF16(c, eptr) \ 388*22dc650dSSadaf Ebrahimi { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; } 389*22dc650dSSadaf Ebrahimi 390*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, not advancing the pointer. This is called when 391*22dc650dSSadaf Ebrahimi we know we are in UTF-16 mode. */ 392*22dc650dSSadaf Ebrahimi 393*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) \ 394*22dc650dSSadaf Ebrahimi c = *eptr; \ 395*22dc650dSSadaf Ebrahimi if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); 396*22dc650dSSadaf Ebrahimi 397*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the 398*22dc650dSSadaf Ebrahimi pointer. */ 399*22dc650dSSadaf Ebrahimi 400*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) \ 401*22dc650dSSadaf Ebrahimi c = *eptr; \ 402*22dc650dSSadaf Ebrahimi if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); 403*22dc650dSSadaf Ebrahimi 404*22dc650dSSadaf Ebrahimi /* Base macro to pick up the low surrogate of a UTF-16 character, advancing 405*22dc650dSSadaf Ebrahimi the pointer. */ 406*22dc650dSSadaf Ebrahimi 407*22dc650dSSadaf Ebrahimi #define GETUTF16INC(c, eptr) \ 408*22dc650dSSadaf Ebrahimi { c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; } 409*22dc650dSSadaf Ebrahimi 410*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, advancing the pointer. This is called when we 411*22dc650dSSadaf Ebrahimi know we are in UTF-16 mode. */ 412*22dc650dSSadaf Ebrahimi 413*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) \ 414*22dc650dSSadaf Ebrahimi c = *eptr++; \ 415*22dc650dSSadaf Ebrahimi if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); 416*22dc650dSSadaf Ebrahimi 417*22dc650dSSadaf Ebrahimi /* Get the next character, testing for UTF-16 mode, and advancing the pointer. 418*22dc650dSSadaf Ebrahimi This is called when we don't know if we are in UTF-16 mode. */ 419*22dc650dSSadaf Ebrahimi 420*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) \ 421*22dc650dSSadaf Ebrahimi c = *eptr++; \ 422*22dc650dSSadaf Ebrahimi if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); 423*22dc650dSSadaf Ebrahimi 424*22dc650dSSadaf Ebrahimi /* Base macro to pick up the low surrogate of a UTF-16 character, not 425*22dc650dSSadaf Ebrahimi advancing the pointer, incrementing the length. */ 426*22dc650dSSadaf Ebrahimi 427*22dc650dSSadaf Ebrahimi #define GETUTF16LEN(c, eptr, len) \ 428*22dc650dSSadaf Ebrahimi { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; } 429*22dc650dSSadaf Ebrahimi 430*22dc650dSSadaf Ebrahimi /* Get the next UTF-16 character, not advancing the pointer, incrementing 431*22dc650dSSadaf Ebrahimi length if there is a low surrogate. This is called when we know we are in 432*22dc650dSSadaf Ebrahimi UTF-16 mode. */ 433*22dc650dSSadaf Ebrahimi 434*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) \ 435*22dc650dSSadaf Ebrahimi c = *eptr; \ 436*22dc650dSSadaf Ebrahimi if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); 437*22dc650dSSadaf Ebrahimi 438*22dc650dSSadaf Ebrahimi /* Get the next UTF-816character, testing for UTF-16 mode, not advancing the 439*22dc650dSSadaf Ebrahimi pointer, incrementing length if there is a low surrogate. This is called when 440*22dc650dSSadaf Ebrahimi we do not know if we are in UTF-16 mode. */ 441*22dc650dSSadaf Ebrahimi 442*22dc650dSSadaf Ebrahimi #define GETCHARLENTEST(c, eptr, len) \ 443*22dc650dSSadaf Ebrahimi c = *eptr; \ 444*22dc650dSSadaf Ebrahimi if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); 445*22dc650dSSadaf Ebrahimi 446*22dc650dSSadaf Ebrahimi /* If the pointer is not at the start of a character, move it back until 447*22dc650dSSadaf Ebrahimi it is. This is called only in UTF-16 mode - we don't put a test within the 448*22dc650dSSadaf Ebrahimi macro because almost all calls are already within a block of UTF-16 only 449*22dc650dSSadaf Ebrahimi code. */ 450*22dc650dSSadaf Ebrahimi 451*22dc650dSSadaf Ebrahimi #define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr-- 452*22dc650dSSadaf Ebrahimi 453*22dc650dSSadaf Ebrahimi /* Same as above, just in the other direction. */ 454*22dc650dSSadaf Ebrahimi #define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++ 455*22dc650dSSadaf Ebrahimi #define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++ 456*22dc650dSSadaf Ebrahimi 457*22dc650dSSadaf Ebrahimi /* Same as above, but it allows a fully customizable form. */ 458*22dc650dSSadaf Ebrahimi #define ACROSSCHAR(condition, eptr, action) \ 459*22dc650dSSadaf Ebrahimi if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action 460*22dc650dSSadaf Ebrahimi 461*22dc650dSSadaf Ebrahimi /* Deposit a character into memory, returning the number of code units. */ 462*22dc650dSSadaf Ebrahimi 463*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ 464*22dc650dSSadaf Ebrahimi PRIV(ord2utf)(c,p) : (*p = c, 1)) 465*22dc650dSSadaf Ebrahimi 466*22dc650dSSadaf Ebrahimi 467*22dc650dSSadaf Ebrahimi /* ------------------- 32-bit support ------------------ */ 468*22dc650dSSadaf Ebrahimi 469*22dc650dSSadaf Ebrahimi #else 470*22dc650dSSadaf Ebrahimi 471*22dc650dSSadaf Ebrahimi /* These are trivial for the 32-bit library, since all UTF-32 characters fit 472*22dc650dSSadaf Ebrahimi into one PCRE2_UCHAR unit. */ 473*22dc650dSSadaf Ebrahimi 474*22dc650dSSadaf Ebrahimi #define MAX_UTF_SINGLE_CU (0x10ffffu) 475*22dc650dSSadaf Ebrahimi #define HAS_EXTRALEN(c) (0) 476*22dc650dSSadaf Ebrahimi #define GET_EXTRALEN(c) (0) 477*22dc650dSSadaf Ebrahimi #define NOT_FIRSTCU(c) (0) 478*22dc650dSSadaf Ebrahimi 479*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, not advancing the pointer. This is called when 480*22dc650dSSadaf Ebrahimi we know we are in UTF-32 mode. */ 481*22dc650dSSadaf Ebrahimi 482*22dc650dSSadaf Ebrahimi #define GETCHAR(c, eptr) \ 483*22dc650dSSadaf Ebrahimi c = *(eptr); 484*22dc650dSSadaf Ebrahimi 485*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the 486*22dc650dSSadaf Ebrahimi pointer. */ 487*22dc650dSSadaf Ebrahimi 488*22dc650dSSadaf Ebrahimi #define GETCHARTEST(c, eptr) \ 489*22dc650dSSadaf Ebrahimi c = *(eptr); 490*22dc650dSSadaf Ebrahimi 491*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, advancing the pointer. This is called when we 492*22dc650dSSadaf Ebrahimi know we are in UTF-32 mode. */ 493*22dc650dSSadaf Ebrahimi 494*22dc650dSSadaf Ebrahimi #define GETCHARINC(c, eptr) \ 495*22dc650dSSadaf Ebrahimi c = *((eptr)++); 496*22dc650dSSadaf Ebrahimi 497*22dc650dSSadaf Ebrahimi /* Get the next character, testing for UTF-32 mode, and advancing the pointer. 498*22dc650dSSadaf Ebrahimi This is called when we don't know if we are in UTF-32 mode. */ 499*22dc650dSSadaf Ebrahimi 500*22dc650dSSadaf Ebrahimi #define GETCHARINCTEST(c, eptr) \ 501*22dc650dSSadaf Ebrahimi c = *((eptr)++); 502*22dc650dSSadaf Ebrahimi 503*22dc650dSSadaf Ebrahimi /* Get the next UTF-32 character, not advancing the pointer, not incrementing 504*22dc650dSSadaf Ebrahimi length (since all UTF-32 is of length 1). This is called when we know we are in 505*22dc650dSSadaf Ebrahimi UTF-32 mode. */ 506*22dc650dSSadaf Ebrahimi 507*22dc650dSSadaf Ebrahimi #define GETCHARLEN(c, eptr, len) \ 508*22dc650dSSadaf Ebrahimi GETCHAR(c, eptr) 509*22dc650dSSadaf Ebrahimi 510*22dc650dSSadaf Ebrahimi /* Get the next UTF-32character, testing for UTF-32 mode, not advancing the 511*22dc650dSSadaf Ebrahimi pointer, not incrementing the length (since all UTF-32 is of length 1). 512*22dc650dSSadaf Ebrahimi This is called when we do not know if we are in UTF-32 mode. */ 513*22dc650dSSadaf Ebrahimi 514*22dc650dSSadaf Ebrahimi #define GETCHARLENTEST(c, eptr, len) \ 515*22dc650dSSadaf Ebrahimi GETCHARTEST(c, eptr) 516*22dc650dSSadaf Ebrahimi 517*22dc650dSSadaf Ebrahimi /* If the pointer is not at the start of a character, move it back until 518*22dc650dSSadaf Ebrahimi it is. This is called only in UTF-32 mode - we don't put a test within the 519*22dc650dSSadaf Ebrahimi macro because almost all calls are already within a block of UTF-32 only 520*22dc650dSSadaf Ebrahimi code. 521*22dc650dSSadaf Ebrahimi 522*22dc650dSSadaf Ebrahimi These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */ 523*22dc650dSSadaf Ebrahimi 524*22dc650dSSadaf Ebrahimi #define BACKCHAR(eptr) do { } while (0) 525*22dc650dSSadaf Ebrahimi 526*22dc650dSSadaf Ebrahimi /* Same as above, just in the other direction. */ 527*22dc650dSSadaf Ebrahimi 528*22dc650dSSadaf Ebrahimi #define FORWARDCHAR(eptr) do { } while (0) 529*22dc650dSSadaf Ebrahimi #define FORWARDCHARTEST(eptr,end) do { } while (0) 530*22dc650dSSadaf Ebrahimi 531*22dc650dSSadaf Ebrahimi /* Same as above, but it allows a fully customizable form. */ 532*22dc650dSSadaf Ebrahimi 533*22dc650dSSadaf Ebrahimi #define ACROSSCHAR(condition, eptr, action) do { } while (0) 534*22dc650dSSadaf Ebrahimi 535*22dc650dSSadaf Ebrahimi /* Deposit a character into memory, returning the number of code units. */ 536*22dc650dSSadaf Ebrahimi 537*22dc650dSSadaf Ebrahimi #define PUTCHAR(c, p) (*p = c, 1) 538*22dc650dSSadaf Ebrahimi 539*22dc650dSSadaf Ebrahimi #endif /* UTF-32 character handling */ 540*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */ 541*22dc650dSSadaf Ebrahimi 542*22dc650dSSadaf Ebrahimi 543*22dc650dSSadaf Ebrahimi /* Mode-dependent macros that have the same definition in all modes. */ 544*22dc650dSSadaf Ebrahimi 545*22dc650dSSadaf Ebrahimi #define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8))) 546*22dc650dSSadaf Ebrahimi #define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8))) 547*22dc650dSSadaf Ebrahimi #define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE 548*22dc650dSSadaf Ebrahimi #define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE 549*22dc650dSSadaf Ebrahimi 550*22dc650dSSadaf Ebrahimi 551*22dc650dSSadaf Ebrahimi /* ----------------------- HIDDEN STRUCTURES ----------------------------- */ 552*22dc650dSSadaf Ebrahimi 553*22dc650dSSadaf Ebrahimi /* NOTE: All these structures *must* start with a pcre2_memctl structure. The 554*22dc650dSSadaf Ebrahimi code that uses them is simpler because it assumes this. */ 555*22dc650dSSadaf Ebrahimi 556*22dc650dSSadaf Ebrahimi /* The real general context structure. At present it holds only data for custom 557*22dc650dSSadaf Ebrahimi memory control. */ 558*22dc650dSSadaf Ebrahimi 559*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_general_context { 560*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; 561*22dc650dSSadaf Ebrahimi } pcre2_real_general_context; 562*22dc650dSSadaf Ebrahimi 563*22dc650dSSadaf Ebrahimi /* The real compile context structure */ 564*22dc650dSSadaf Ebrahimi 565*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_compile_context { 566*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; 567*22dc650dSSadaf Ebrahimi int (*stack_guard)(uint32_t, void *); 568*22dc650dSSadaf Ebrahimi void *stack_guard_data; 569*22dc650dSSadaf Ebrahimi const uint8_t *tables; 570*22dc650dSSadaf Ebrahimi PCRE2_SIZE max_pattern_length; 571*22dc650dSSadaf Ebrahimi PCRE2_SIZE max_pattern_compiled_length; 572*22dc650dSSadaf Ebrahimi uint16_t bsr_convention; 573*22dc650dSSadaf Ebrahimi uint16_t newline_convention; 574*22dc650dSSadaf Ebrahimi uint32_t parens_nest_limit; 575*22dc650dSSadaf Ebrahimi uint32_t extra_options; 576*22dc650dSSadaf Ebrahimi uint32_t max_varlookbehind; 577*22dc650dSSadaf Ebrahimi } pcre2_real_compile_context; 578*22dc650dSSadaf Ebrahimi 579*22dc650dSSadaf Ebrahimi /* The real match context structure. */ 580*22dc650dSSadaf Ebrahimi 581*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_match_context { 582*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; 583*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT 584*22dc650dSSadaf Ebrahimi pcre2_jit_callback jit_callback; 585*22dc650dSSadaf Ebrahimi void *jit_callback_data; 586*22dc650dSSadaf Ebrahimi #endif 587*22dc650dSSadaf Ebrahimi int (*callout)(pcre2_callout_block *, void *); 588*22dc650dSSadaf Ebrahimi void *callout_data; 589*22dc650dSSadaf Ebrahimi int (*substitute_callout)(pcre2_substitute_callout_block *, void *); 590*22dc650dSSadaf Ebrahimi void *substitute_callout_data; 591*22dc650dSSadaf Ebrahimi PCRE2_SIZE offset_limit; 592*22dc650dSSadaf Ebrahimi uint32_t heap_limit; 593*22dc650dSSadaf Ebrahimi uint32_t match_limit; 594*22dc650dSSadaf Ebrahimi uint32_t depth_limit; 595*22dc650dSSadaf Ebrahimi } pcre2_real_match_context; 596*22dc650dSSadaf Ebrahimi 597*22dc650dSSadaf Ebrahimi /* The real convert context structure. */ 598*22dc650dSSadaf Ebrahimi 599*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_convert_context { 600*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; 601*22dc650dSSadaf Ebrahimi uint32_t glob_separator; 602*22dc650dSSadaf Ebrahimi uint32_t glob_escape; 603*22dc650dSSadaf Ebrahimi } pcre2_real_convert_context; 604*22dc650dSSadaf Ebrahimi 605*22dc650dSSadaf Ebrahimi /* The real compiled code structure. The type for the blocksize field is 606*22dc650dSSadaf Ebrahimi defined specially because it is required in pcre2_serialize_decode() when 607*22dc650dSSadaf Ebrahimi copying the size from possibly unaligned memory into a variable of the same 608*22dc650dSSadaf Ebrahimi type. Use a macro rather than a typedef to avoid compiler warnings when this 609*22dc650dSSadaf Ebrahimi file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the 610*22dc650dSSadaf Ebrahimi largest lookbehind that is supported. (OP_REVERSE and OP_VREVERSE in a pattern 611*22dc650dSSadaf Ebrahimi have 16-bit arguments in 8-bit and 16-bit modes, so we need no more than a 612*22dc650dSSadaf Ebrahimi 16-bit field here.) */ 613*22dc650dSSadaf Ebrahimi 614*22dc650dSSadaf Ebrahimi #undef CODE_BLOCKSIZE_TYPE 615*22dc650dSSadaf Ebrahimi #define CODE_BLOCKSIZE_TYPE PCRE2_SIZE 616*22dc650dSSadaf Ebrahimi 617*22dc650dSSadaf Ebrahimi #undef LOOKBEHIND_MAX 618*22dc650dSSadaf Ebrahimi #define LOOKBEHIND_MAX UINT16_MAX 619*22dc650dSSadaf Ebrahimi 620*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_code { 621*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; /* Memory control fields */ 622*22dc650dSSadaf Ebrahimi const uint8_t *tables; /* The character tables */ 623*22dc650dSSadaf Ebrahimi void *executable_jit; /* Pointer to JIT code */ 624*22dc650dSSadaf Ebrahimi uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */ 625*22dc650dSSadaf Ebrahimi CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */ 626*22dc650dSSadaf Ebrahimi uint32_t magic_number; /* Paranoid and endianness check */ 627*22dc650dSSadaf Ebrahimi uint32_t compile_options; /* Options passed to pcre2_compile() */ 628*22dc650dSSadaf Ebrahimi uint32_t overall_options; /* Options after processing the pattern */ 629*22dc650dSSadaf Ebrahimi uint32_t extra_options; /* Taken from compile_context */ 630*22dc650dSSadaf Ebrahimi uint32_t flags; /* Various state flags */ 631*22dc650dSSadaf Ebrahimi uint32_t limit_heap; /* Limit set in the pattern */ 632*22dc650dSSadaf Ebrahimi uint32_t limit_match; /* Limit set in the pattern */ 633*22dc650dSSadaf Ebrahimi uint32_t limit_depth; /* Limit set in the pattern */ 634*22dc650dSSadaf Ebrahimi uint32_t first_codeunit; /* Starting code unit */ 635*22dc650dSSadaf Ebrahimi uint32_t last_codeunit; /* This codeunit must be seen */ 636*22dc650dSSadaf Ebrahimi uint16_t bsr_convention; /* What \R matches */ 637*22dc650dSSadaf Ebrahimi uint16_t newline_convention; /* What is a newline? */ 638*22dc650dSSadaf Ebrahimi uint16_t max_lookbehind; /* Longest lookbehind (characters) */ 639*22dc650dSSadaf Ebrahimi uint16_t minlength; /* Minimum length of match */ 640*22dc650dSSadaf Ebrahimi uint16_t top_bracket; /* Highest numbered group */ 641*22dc650dSSadaf Ebrahimi uint16_t top_backref; /* Highest numbered back reference */ 642*22dc650dSSadaf Ebrahimi uint16_t name_entry_size; /* Size (code units) of table entries */ 643*22dc650dSSadaf Ebrahimi uint16_t name_count; /* Number of name entries in the table */ 644*22dc650dSSadaf Ebrahimi } pcre2_real_code; 645*22dc650dSSadaf Ebrahimi 646*22dc650dSSadaf Ebrahimi /* The real match data structure. Define ovector as large as it can ever 647*22dc650dSSadaf Ebrahimi actually be so that array bound checkers don't grumble. Memory for this 648*22dc650dSSadaf Ebrahimi structure is obtained by calling pcre2_match_data_create(), which sets the size 649*22dc650dSSadaf Ebrahimi as the offset of ovector plus a pair of elements for each capturable string, so 650*22dc650dSSadaf Ebrahimi the size varies from call to call. As the maximum number of capturing 651*22dc650dSSadaf Ebrahimi subpatterns is 65535 we must allow for 65536 strings to include the overall 652*22dc650dSSadaf Ebrahimi match. (See also the heapframe structure below.) */ 653*22dc650dSSadaf Ebrahimi 654*22dc650dSSadaf Ebrahimi struct heapframe; /* Forward reference */ 655*22dc650dSSadaf Ebrahimi 656*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_match_data { 657*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; /* Memory control fields */ 658*22dc650dSSadaf Ebrahimi const pcre2_real_code *code; /* The pattern used for the match */ 659*22dc650dSSadaf Ebrahimi PCRE2_SPTR subject; /* The subject that was matched */ 660*22dc650dSSadaf Ebrahimi PCRE2_SPTR mark; /* Pointer to last mark */ 661*22dc650dSSadaf Ebrahimi struct heapframe *heapframes; /* Backtracking frames heap memory */ 662*22dc650dSSadaf Ebrahimi PCRE2_SIZE heapframes_size; /* Malloc-ed size */ 663*22dc650dSSadaf Ebrahimi PCRE2_SIZE subject_length; /* Subject length */ 664*22dc650dSSadaf Ebrahimi PCRE2_SIZE leftchar; /* Offset to leftmost code unit */ 665*22dc650dSSadaf Ebrahimi PCRE2_SIZE rightchar; /* Offset to rightmost code unit */ 666*22dc650dSSadaf Ebrahimi PCRE2_SIZE startchar; /* Offset to starting code unit */ 667*22dc650dSSadaf Ebrahimi uint8_t matchedby; /* Type of match (normal, JIT, DFA) */ 668*22dc650dSSadaf Ebrahimi uint8_t flags; /* Various flags */ 669*22dc650dSSadaf Ebrahimi uint16_t oveccount; /* Number of pairs */ 670*22dc650dSSadaf Ebrahimi int rc; /* The return code from the match */ 671*22dc650dSSadaf Ebrahimi PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ 672*22dc650dSSadaf Ebrahimi } pcre2_real_match_data; 673*22dc650dSSadaf Ebrahimi 674*22dc650dSSadaf Ebrahimi 675*22dc650dSSadaf Ebrahimi /* ----------------------- PRIVATE STRUCTURES ----------------------------- */ 676*22dc650dSSadaf Ebrahimi 677*22dc650dSSadaf Ebrahimi /* These structures are not needed for pcre2test. */ 678*22dc650dSSadaf Ebrahimi 679*22dc650dSSadaf Ebrahimi #ifndef PCRE2_PCRE2TEST 680*22dc650dSSadaf Ebrahimi 681*22dc650dSSadaf Ebrahimi /* Structures for checking for mutual function recursion when scanning compiled 682*22dc650dSSadaf Ebrahimi or parsed code. */ 683*22dc650dSSadaf Ebrahimi 684*22dc650dSSadaf Ebrahimi typedef struct recurse_check { 685*22dc650dSSadaf Ebrahimi struct recurse_check *prev; 686*22dc650dSSadaf Ebrahimi PCRE2_SPTR group; 687*22dc650dSSadaf Ebrahimi } recurse_check; 688*22dc650dSSadaf Ebrahimi 689*22dc650dSSadaf Ebrahimi typedef struct parsed_recurse_check { 690*22dc650dSSadaf Ebrahimi struct parsed_recurse_check *prev; 691*22dc650dSSadaf Ebrahimi uint32_t *groupptr; 692*22dc650dSSadaf Ebrahimi } parsed_recurse_check; 693*22dc650dSSadaf Ebrahimi 694*22dc650dSSadaf Ebrahimi /* Structure for building a cache when filling in pattern recursion offsets. */ 695*22dc650dSSadaf Ebrahimi 696*22dc650dSSadaf Ebrahimi typedef struct recurse_cache { 697*22dc650dSSadaf Ebrahimi PCRE2_SPTR group; 698*22dc650dSSadaf Ebrahimi int groupnumber; 699*22dc650dSSadaf Ebrahimi } recurse_cache; 700*22dc650dSSadaf Ebrahimi 701*22dc650dSSadaf Ebrahimi /* Structure for maintaining a chain of pointers to the currently incomplete 702*22dc650dSSadaf Ebrahimi branches, for testing for left recursion while compiling. */ 703*22dc650dSSadaf Ebrahimi 704*22dc650dSSadaf Ebrahimi typedef struct branch_chain { 705*22dc650dSSadaf Ebrahimi struct branch_chain *outer; 706*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *current_branch; 707*22dc650dSSadaf Ebrahimi } branch_chain; 708*22dc650dSSadaf Ebrahimi 709*22dc650dSSadaf Ebrahimi /* Structure for building a list of named groups during the first pass of 710*22dc650dSSadaf Ebrahimi compiling. */ 711*22dc650dSSadaf Ebrahimi 712*22dc650dSSadaf Ebrahimi typedef struct named_group { 713*22dc650dSSadaf Ebrahimi PCRE2_SPTR name; /* Points to the name in the pattern */ 714*22dc650dSSadaf Ebrahimi uint32_t number; /* Group number */ 715*22dc650dSSadaf Ebrahimi uint16_t length; /* Length of the name */ 716*22dc650dSSadaf Ebrahimi uint16_t isdup; /* TRUE if a duplicate */ 717*22dc650dSSadaf Ebrahimi } named_group; 718*22dc650dSSadaf Ebrahimi 719*22dc650dSSadaf Ebrahimi /* Structure for passing "static" information around between the functions 720*22dc650dSSadaf Ebrahimi doing the compiling, so that they are thread-safe. */ 721*22dc650dSSadaf Ebrahimi 722*22dc650dSSadaf Ebrahimi typedef struct compile_block { 723*22dc650dSSadaf Ebrahimi pcre2_real_compile_context *cx; /* Points to the compile context */ 724*22dc650dSSadaf Ebrahimi const uint8_t *lcc; /* Points to lower casing table */ 725*22dc650dSSadaf Ebrahimi const uint8_t *fcc; /* Points to case-flipping table */ 726*22dc650dSSadaf Ebrahimi const uint8_t *cbits; /* Points to character type table */ 727*22dc650dSSadaf Ebrahimi const uint8_t *ctypes; /* Points to table of type maps */ 728*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_workspace; /* The start of working space */ 729*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_code; /* The start of the compiled code */ 730*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_pattern; /* The start of the pattern */ 731*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_pattern; /* The end of the pattern */ 732*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *name_table; /* The name/number table */ 733*22dc650dSSadaf Ebrahimi PCRE2_SIZE workspace_size; /* Size of workspace */ 734*22dc650dSSadaf Ebrahimi PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */ 735*22dc650dSSadaf Ebrahimi PCRE2_SIZE erroroffset; /* Offset of error in pattern */ 736*22dc650dSSadaf Ebrahimi uint16_t names_found; /* Number of entries so far */ 737*22dc650dSSadaf Ebrahimi uint16_t name_entry_size; /* Size of each entry */ 738*22dc650dSSadaf Ebrahimi uint16_t parens_depth; /* Depth of nested parentheses */ 739*22dc650dSSadaf Ebrahimi uint16_t assert_depth; /* Depth of nested assertions */ 740*22dc650dSSadaf Ebrahimi named_group *named_groups; /* Points to vector in pre-compile */ 741*22dc650dSSadaf Ebrahimi uint32_t named_group_list_size; /* Number of entries in the list */ 742*22dc650dSSadaf Ebrahimi uint32_t external_options; /* External (initial) options */ 743*22dc650dSSadaf Ebrahimi uint32_t external_flags; /* External flag bits to be set */ 744*22dc650dSSadaf Ebrahimi uint32_t bracount; /* Count of capturing parentheses */ 745*22dc650dSSadaf Ebrahimi uint32_t lastcapture; /* Last capture encountered */ 746*22dc650dSSadaf Ebrahimi uint32_t *parsed_pattern; /* Parsed pattern buffer */ 747*22dc650dSSadaf Ebrahimi uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */ 748*22dc650dSSadaf Ebrahimi uint32_t *groupinfo; /* Group info vector */ 749*22dc650dSSadaf Ebrahimi uint32_t top_backref; /* Maximum back reference */ 750*22dc650dSSadaf Ebrahimi uint32_t backref_map; /* Bitmap of low back refs */ 751*22dc650dSSadaf Ebrahimi uint32_t nltype; /* Newline type */ 752*22dc650dSSadaf Ebrahimi uint32_t nllen; /* Newline string length */ 753*22dc650dSSadaf Ebrahimi uint32_t class_range_start; /* Overall class range start */ 754*22dc650dSSadaf Ebrahimi uint32_t class_range_end; /* Overall class range end */ 755*22dc650dSSadaf Ebrahimi PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ 756*22dc650dSSadaf Ebrahimi uint32_t req_varyopt; /* "After variable item" flag for reqbyte */ 757*22dc650dSSadaf Ebrahimi uint32_t max_varlookbehind; /* Limit for variable lookbehinds */ 758*22dc650dSSadaf Ebrahimi int max_lookbehind; /* Maximum lookbehind encountered (characters) */ 759*22dc650dSSadaf Ebrahimi BOOL had_accept; /* (*ACCEPT) encountered */ 760*22dc650dSSadaf Ebrahimi BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ 761*22dc650dSSadaf Ebrahimi BOOL had_recurse; /* Had a pattern recursion or subroutine call */ 762*22dc650dSSadaf Ebrahimi BOOL dupnames; /* Duplicate names exist */ 763*22dc650dSSadaf Ebrahimi } compile_block; 764*22dc650dSSadaf Ebrahimi 765*22dc650dSSadaf Ebrahimi /* Structure for keeping the properties of the in-memory stack used 766*22dc650dSSadaf Ebrahimi by the JIT matcher. */ 767*22dc650dSSadaf Ebrahimi 768*22dc650dSSadaf Ebrahimi typedef struct pcre2_real_jit_stack { 769*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; 770*22dc650dSSadaf Ebrahimi void* stack; 771*22dc650dSSadaf Ebrahimi } pcre2_real_jit_stack; 772*22dc650dSSadaf Ebrahimi 773*22dc650dSSadaf Ebrahimi /* Structure for items in a linked list that represents an explicit recursive 774*22dc650dSSadaf Ebrahimi call within the pattern when running pcre2_dfa_match(). */ 775*22dc650dSSadaf Ebrahimi 776*22dc650dSSadaf Ebrahimi typedef struct dfa_recursion_info { 777*22dc650dSSadaf Ebrahimi struct dfa_recursion_info *prevrec; 778*22dc650dSSadaf Ebrahimi PCRE2_SPTR subject_position; 779*22dc650dSSadaf Ebrahimi PCRE2_SPTR last_used_ptr; 780*22dc650dSSadaf Ebrahimi uint32_t group_num; 781*22dc650dSSadaf Ebrahimi } dfa_recursion_info; 782*22dc650dSSadaf Ebrahimi 783*22dc650dSSadaf Ebrahimi /* Structure for "stack" frames that are used for remembering backtracking 784*22dc650dSSadaf Ebrahimi positions during matching. As these are used in a vector, with the ovector item 785*22dc650dSSadaf Ebrahimi being extended, the size of the structure must be a multiple of PCRE2_SIZE. The 786*22dc650dSSadaf Ebrahimi only way to check this at compile time is to force an error by generating an 787*22dc650dSSadaf Ebrahimi array with a negative size. By putting this in a typedef (which is never used), 788*22dc650dSSadaf Ebrahimi we don't generate any code when all is well. */ 789*22dc650dSSadaf Ebrahimi 790*22dc650dSSadaf Ebrahimi typedef struct heapframe { 791*22dc650dSSadaf Ebrahimi 792*22dc650dSSadaf Ebrahimi /* The first set of fields are variables that have to be preserved over calls 793*22dc650dSSadaf Ebrahimi to RRMATCH(), but which do not need to be copied to new frames. */ 794*22dc650dSSadaf Ebrahimi 795*22dc650dSSadaf Ebrahimi PCRE2_SPTR ecode; /* The current position in the pattern */ 796*22dc650dSSadaf Ebrahimi PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */ 797*22dc650dSSadaf Ebrahimi PCRE2_SIZE length; /* Used for character, string, or code lengths */ 798*22dc650dSSadaf Ebrahimi PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ 799*22dc650dSSadaf Ebrahimi PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */ 800*22dc650dSSadaf Ebrahimi uint32_t rdepth; /* Function "recursion" depth within pcre2_match() */ 801*22dc650dSSadaf Ebrahimi uint32_t group_frame_type; /* Type information for group frames */ 802*22dc650dSSadaf Ebrahimi uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */ 803*22dc650dSSadaf Ebrahimi uint8_t return_id; /* Where to go on in internal "return" */ 804*22dc650dSSadaf Ebrahimi uint8_t op; /* Processing opcode */ 805*22dc650dSSadaf Ebrahimi 806*22dc650dSSadaf Ebrahimi /* At this point, the structure is 16-bit aligned. On most architectures 807*22dc650dSSadaf Ebrahimi the alignment requirement for a pointer will ensure that the eptr field below 808*22dc650dSSadaf Ebrahimi is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer 809*22dc650dSSadaf Ebrahimi that is 16-bit aligned. We must therefore ensure that what comes between here 810*22dc650dSSadaf Ebrahimi and eptr is an odd multiple of 16 bits so as to get back into 32-bit 811*22dc650dSSadaf Ebrahimi alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs 812*22dc650dSSadaf Ebrahimi fudges in the other cases. In the 32-bit case the padding comes first so that 813*22dc650dSSadaf Ebrahimi the occu field itself is 32-bit aligned. Without the padding, this structure 814*22dc650dSSadaf Ebrahimi is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */ 815*22dc650dSSadaf Ebrahimi 816*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8 817*22dc650dSSadaf Ebrahimi PCRE2_UCHAR occu[6]; /* Used for other case code units */ 818*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16 819*22dc650dSSadaf Ebrahimi PCRE2_UCHAR occu[2]; /* Used for other case code units */ 820*22dc650dSSadaf Ebrahimi uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ 821*22dc650dSSadaf Ebrahimi #else 822*22dc650dSSadaf Ebrahimi uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ 823*22dc650dSSadaf Ebrahimi PCRE2_UCHAR occu[1]; /* Used for other case code units */ 824*22dc650dSSadaf Ebrahimi #endif 825*22dc650dSSadaf Ebrahimi 826*22dc650dSSadaf Ebrahimi /* The rest have to be copied from the previous frame whenever a new frame 827*22dc650dSSadaf Ebrahimi becomes current. The final field is specified as a large vector so that 828*22dc650dSSadaf Ebrahimi runtime array bound checks don't catch references to it. However, for any 829*22dc650dSSadaf Ebrahimi specific call to pcre2_match() the memory allocated for each frame structure 830*22dc650dSSadaf Ebrahimi allows for exactly the right size ovector for the number of capturing 831*22dc650dSSadaf Ebrahimi parentheses. (See also the comment for pcre2_real_match_data above.) */ 832*22dc650dSSadaf Ebrahimi 833*22dc650dSSadaf Ebrahimi PCRE2_SPTR eptr; /* MUST BE FIRST */ 834*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_match; /* Can be adjusted by \K */ 835*22dc650dSSadaf Ebrahimi PCRE2_SPTR mark; /* Most recent mark on the success path */ 836*22dc650dSSadaf Ebrahimi PCRE2_SPTR recurse_last_used; /* Last character used at time of pattern recursion */ 837*22dc650dSSadaf Ebrahimi uint32_t current_recurse; /* Group number of current (deepest) pattern recursion */ 838*22dc650dSSadaf Ebrahimi uint32_t capture_last; /* Most recent capture */ 839*22dc650dSSadaf Ebrahimi PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */ 840*22dc650dSSadaf Ebrahimi PCRE2_SIZE offset_top; /* Offset after highest capture */ 841*22dc650dSSadaf Ebrahimi PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ 842*22dc650dSSadaf Ebrahimi } heapframe; 843*22dc650dSSadaf Ebrahimi 844*22dc650dSSadaf Ebrahimi /* This typedef is a check that the size of the heapframe structure is a 845*22dc650dSSadaf Ebrahimi multiple of PCRE2_SIZE. See various comments above. */ 846*22dc650dSSadaf Ebrahimi 847*22dc650dSSadaf Ebrahimi typedef char check_heapframe_size[ 848*22dc650dSSadaf Ebrahimi ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; 849*22dc650dSSadaf Ebrahimi 850*22dc650dSSadaf Ebrahimi /* Structure for computing the alignment of heapframe. */ 851*22dc650dSSadaf Ebrahimi 852*22dc650dSSadaf Ebrahimi typedef struct heapframe_align { 853*22dc650dSSadaf Ebrahimi char unalign; /* Completely unalign the current offset */ 854*22dc650dSSadaf Ebrahimi heapframe frame; /* Offset is its alignment */ 855*22dc650dSSadaf Ebrahimi } heapframe_align; 856*22dc650dSSadaf Ebrahimi 857*22dc650dSSadaf Ebrahimi /* This define is the minimum alignment required for a heapframe, in bytes. */ 858*22dc650dSSadaf Ebrahimi 859*22dc650dSSadaf Ebrahimi #define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame) 860*22dc650dSSadaf Ebrahimi 861*22dc650dSSadaf Ebrahimi /* Structure for passing "static" information around between the functions 862*22dc650dSSadaf Ebrahimi doing traditional NFA matching (pcre2_match() and friends). */ 863*22dc650dSSadaf Ebrahimi 864*22dc650dSSadaf Ebrahimi typedef struct match_block { 865*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; /* For general use */ 866*22dc650dSSadaf Ebrahimi uint32_t heap_limit; /* As it says */ 867*22dc650dSSadaf Ebrahimi uint32_t match_limit; /* As it says */ 868*22dc650dSSadaf Ebrahimi uint32_t match_limit_depth; /* As it says */ 869*22dc650dSSadaf Ebrahimi uint32_t match_call_count; /* Number of times a new frame is created */ 870*22dc650dSSadaf Ebrahimi BOOL hitend; /* Hit the end of the subject at some point */ 871*22dc650dSSadaf Ebrahimi BOOL hasthen; /* Pattern contains (*THEN) */ 872*22dc650dSSadaf Ebrahimi BOOL allowemptypartial; /* Allow empty hard partial */ 873*22dc650dSSadaf Ebrahimi const uint8_t *lcc; /* Points to lower casing table */ 874*22dc650dSSadaf Ebrahimi const uint8_t *fcc; /* Points to case-flipping table */ 875*22dc650dSSadaf Ebrahimi const uint8_t *ctypes; /* Points to table of type maps */ 876*22dc650dSSadaf Ebrahimi PCRE2_SIZE start_offset; /* The start offset value */ 877*22dc650dSSadaf Ebrahimi PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */ 878*22dc650dSSadaf Ebrahimi uint16_t partial; /* PARTIAL options */ 879*22dc650dSSadaf Ebrahimi uint16_t bsr_convention; /* \R interpretation */ 880*22dc650dSSadaf Ebrahimi uint16_t name_count; /* Number of names in name table */ 881*22dc650dSSadaf Ebrahimi uint16_t name_entry_size; /* Size of entry in names table */ 882*22dc650dSSadaf Ebrahimi PCRE2_SPTR name_table; /* Table of group names */ 883*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_code; /* For use in pattern recursion */ 884*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_subject; /* Start of the subject string */ 885*22dc650dSSadaf Ebrahimi PCRE2_SPTR check_subject; /* Where UTF-checked from */ 886*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_subject; /* Usable end of the subject string */ 887*22dc650dSSadaf Ebrahimi PCRE2_SPTR true_end_subject; /* Actual end of the subject string */ 888*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_match_ptr; /* Subject position at end match */ 889*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ 890*22dc650dSSadaf Ebrahimi PCRE2_SPTR last_used_ptr; /* Latest consulted character */ 891*22dc650dSSadaf Ebrahimi PCRE2_SPTR mark; /* Mark pointer to pass back on success */ 892*22dc650dSSadaf Ebrahimi PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */ 893*22dc650dSSadaf Ebrahimi PCRE2_SPTR verb_ecode_ptr; /* For passing back info */ 894*22dc650dSSadaf Ebrahimi PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */ 895*22dc650dSSadaf Ebrahimi uint32_t verb_current_recurse; /* Current recursion group when (*VERB) happens */ 896*22dc650dSSadaf Ebrahimi uint32_t moptions; /* Match options */ 897*22dc650dSSadaf Ebrahimi uint32_t poptions; /* Pattern options */ 898*22dc650dSSadaf Ebrahimi uint32_t skip_arg_count; /* For counting SKIP_ARGs */ 899*22dc650dSSadaf Ebrahimi uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */ 900*22dc650dSSadaf Ebrahimi uint32_t nltype; /* Newline type */ 901*22dc650dSSadaf Ebrahimi uint32_t nllen; /* Newline string length */ 902*22dc650dSSadaf Ebrahimi PCRE2_UCHAR nl[4]; /* Newline string when fixed */ 903*22dc650dSSadaf Ebrahimi pcre2_callout_block *cb; /* Points to a callout block */ 904*22dc650dSSadaf Ebrahimi void *callout_data; /* To pass back to callouts */ 905*22dc650dSSadaf Ebrahimi int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ 906*22dc650dSSadaf Ebrahimi } match_block; 907*22dc650dSSadaf Ebrahimi 908*22dc650dSSadaf Ebrahimi /* A similar structure is used for the same purpose by the DFA matching 909*22dc650dSSadaf Ebrahimi functions. */ 910*22dc650dSSadaf Ebrahimi 911*22dc650dSSadaf Ebrahimi typedef struct dfa_match_block { 912*22dc650dSSadaf Ebrahimi pcre2_memctl memctl; /* For general use */ 913*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_code; /* Start of the compiled pattern */ 914*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_subject ; /* Start of the subject string */ 915*22dc650dSSadaf Ebrahimi PCRE2_SPTR end_subject; /* End of subject string */ 916*22dc650dSSadaf Ebrahimi PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ 917*22dc650dSSadaf Ebrahimi PCRE2_SPTR last_used_ptr; /* Latest consulted character */ 918*22dc650dSSadaf Ebrahimi const uint8_t *tables; /* Character tables */ 919*22dc650dSSadaf Ebrahimi PCRE2_SIZE start_offset; /* The start offset value */ 920*22dc650dSSadaf Ebrahimi uint32_t heap_limit; /* As it says */ 921*22dc650dSSadaf Ebrahimi PCRE2_SIZE heap_used; /* As it says */ 922*22dc650dSSadaf Ebrahimi uint32_t match_limit; /* As it says */ 923*22dc650dSSadaf Ebrahimi uint32_t match_limit_depth; /* As it says */ 924*22dc650dSSadaf Ebrahimi uint32_t match_call_count; /* Number of calls of internal function */ 925*22dc650dSSadaf Ebrahimi uint32_t moptions; /* Match options */ 926*22dc650dSSadaf Ebrahimi uint32_t poptions; /* Pattern options */ 927*22dc650dSSadaf Ebrahimi uint32_t nltype; /* Newline type */ 928*22dc650dSSadaf Ebrahimi uint32_t nllen; /* Newline string length */ 929*22dc650dSSadaf Ebrahimi BOOL allowemptypartial; /* Allow empty hard partial */ 930*22dc650dSSadaf Ebrahimi PCRE2_UCHAR nl[4]; /* Newline string when fixed */ 931*22dc650dSSadaf Ebrahimi uint16_t bsr_convention; /* \R interpretation */ 932*22dc650dSSadaf Ebrahimi pcre2_callout_block *cb; /* Points to a callout block */ 933*22dc650dSSadaf Ebrahimi void *callout_data; /* To pass back to callouts */ 934*22dc650dSSadaf Ebrahimi int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ 935*22dc650dSSadaf Ebrahimi dfa_recursion_info *recursive; /* Linked list of pattern recursion data */ 936*22dc650dSSadaf Ebrahimi } dfa_match_block; 937*22dc650dSSadaf Ebrahimi 938*22dc650dSSadaf Ebrahimi #endif /* PCRE2_PCRE2TEST */ 939*22dc650dSSadaf Ebrahimi 940*22dc650dSSadaf Ebrahimi /* End of pcre2_intmodedep.h */ 941